vhost, pc: fixes for 2.5
Fixes all over the place. This also re-enables a test we disabled in 2.5 cycle now that there's a way not to get a warning from it. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJWTc5PAAoJECgfDbjSjVRpCJEH/jh1CeWCG7zRBXelWK2F5Cgr ls+V1sCX7NvkfCa3cDZI8imGjYQUr6EiXtqxPArEVMjmUOUzEHPkOx3ICPpfMU7o RCVNPELav6VBhGDf3mcIVjlDDN9Syhd90xdgaD8dbeSA0UJFHRTdobNlYpYwiRmp OAASUawEWLGA5cG+W6MBFWiPQWChpNRK3yK3RVduL71TIe4heuHBez4qTB2QKYvF KM2nRvpkBY21frXJQqWPlCJ6jsdjI/Fl2xR1t4C9qv0TKcB9FESMmH3Jff2bwMQM 8OSnTIRqYaqT6kJkk3Kns8a+porJMnn69OwBRehLmLW/rmx9HQrR2Ey7bYtgd0Y= =CrrY -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging vhost, pc: fixes for 2.5 Fixes all over the place. This also re-enables a test we disabled in 2.5 cycle now that there's a way not to get a warning from it. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Thu 19 Nov 2015 13:27:43 GMT using RSA key ID D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" * remotes/mst/tags/for_upstream: exec: silence hugetlbfs warning under qtest tests: re-enable vhost-user-test acpi: fix buffer overrun on migration vhost-user: fix log size vhost-user: ignore qemu-only features specs/vhost-user: fix spec to match reality tests/vhost-user-bridge: implement logging of dirty pages i440fx: print an error message if user tries to enable iommu q35: Check propery to determine if iommu is set vhost-user: start/stop all rings vhost-user: print original request on error vhost-user-test: support VHOST_USER_SET_VRING_ENABLE vhost-user: update spec description vhost: don't send RESET_OWNER at stop vhost: let SET_VRING_ENABLE message depends on protocol feature Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
348c32709f
1
configure
vendored
1
configure
vendored
@ -5681,6 +5681,7 @@ case "$target_name" in
|
||||
echo "CONFIG_KVM=y" >> $config_target_mak
|
||||
if test "$vhost_net" = "yes" ; then
|
||||
echo "CONFIG_VHOST_NET=y" >> $config_target_mak
|
||||
echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak
|
||||
fi
|
||||
fi
|
||||
esac
|
||||
|
@ -87,6 +87,14 @@ Depending on the request type, payload can be:
|
||||
User address: a 64-bit user address
|
||||
mmap offset: 64-bit offset where region starts in the mapped memory
|
||||
|
||||
* Log description
|
||||
---------------------------
|
||||
| log size | log offset |
|
||||
---------------------------
|
||||
log size: size of area used for logging
|
||||
log offset: offset from start of supplied file descriptor
|
||||
where logging starts (i.e. where guest address 0 would be logged)
|
||||
|
||||
In QEMU the vhost-user message is implemented with the following struct:
|
||||
|
||||
typedef struct VhostUserMsg {
|
||||
@ -138,6 +146,29 @@ As older slaves don't support negotiating protocol features,
|
||||
a feature bit was dedicated for this purpose:
|
||||
#define VHOST_USER_F_PROTOCOL_FEATURES 30
|
||||
|
||||
Starting and stopping rings
|
||||
----------------------
|
||||
Client must only process each ring when it is both started and enabled.
|
||||
|
||||
If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized
|
||||
in an enabled state.
|
||||
|
||||
If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized
|
||||
in a disabled state. Client must not process it until ring is enabled by
|
||||
VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by
|
||||
VHOST_USER_SET_VRING_ENABLE with parameter 0.
|
||||
|
||||
Each ring is initialized in a stopped state, client must not process it until
|
||||
ring is started, or after it has been stopped.
|
||||
|
||||
Client must start ring upon receiving a kick (that is, detecting that file
|
||||
descriptor is readable) on the descriptor specified by
|
||||
VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
|
||||
VHOST_USER_GET_VRING_BASE.
|
||||
|
||||
While processing the rings (when they are started and enabled), client must
|
||||
support changing some configuration aspects on the fly.
|
||||
|
||||
Multiple queue support
|
||||
----------------------
|
||||
|
||||
@ -162,9 +193,13 @@ the slave makes to the memory mapped regions. The client should mark
|
||||
the dirty pages in a log. Once it complies to this logging, it may
|
||||
declare the VHOST_F_LOG_ALL vhost feature.
|
||||
|
||||
To start/stop logging of data/used ring writes, server may send messages
|
||||
VHOST_USER_SET_FEATURES with VHOST_F_LOG_ALL and VHOST_USER_SET_VRING_ADDR with
|
||||
VHOST_VRING_F_LOG in ring's flags set to 1/0, respectively.
|
||||
|
||||
All the modifications to memory pointed by vring "descriptor" should
|
||||
be marked. Modifications to "used" vring should be marked if
|
||||
VHOST_VRING_F_LOG is part of ring's features.
|
||||
VHOST_VRING_F_LOG is part of ring's flags.
|
||||
|
||||
Dirty pages are of size:
|
||||
#define VHOST_LOG_PAGE 0x1000
|
||||
@ -173,22 +208,35 @@ The log memory fd is provided in the ancillary data of
|
||||
VHOST_USER_SET_LOG_BASE message when the slave has
|
||||
VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature.
|
||||
|
||||
The size of the log may be computed by using all the known guest
|
||||
addresses. The log covers from address 0 to the maximum of guest
|
||||
The size of the log is supplied as part of VhostUserMsg
|
||||
which should be large enough to cover all known guest
|
||||
addresses. Log starts at the supplied offset in the
|
||||
supplied file descriptor.
|
||||
The log covers from address 0 to the maximum of guest
|
||||
regions. In pseudo-code, to mark page at "addr" as dirty:
|
||||
|
||||
page = addr / VHOST_LOG_PAGE
|
||||
log[page / 8] |= 1 << page % 8
|
||||
|
||||
Where addr is the guest physical address.
|
||||
|
||||
Use atomic operations, as the log may be concurrently manipulated.
|
||||
|
||||
Note that when logging modifications to the used ring (when VHOST_VRING_F_LOG
|
||||
is set for this ring), log_guest_addr should be used to calculate the log
|
||||
offset: the write to first byte of the used ring is logged at this offset from
|
||||
log start. Also note that this value might be outside the legal guest physical
|
||||
address range (i.e. does not have to be covered by the VhostUserMemory table),
|
||||
but the bit offset of the last byte of the ring must fall within
|
||||
the size supplied by VhostUserLog.
|
||||
|
||||
VHOST_USER_SET_LOG_FD is an optional message with an eventfd in
|
||||
ancillary data, it may be used to inform the master that the log has
|
||||
been modified.
|
||||
|
||||
Once the source has finished migration, VHOST_USER_RESET_OWNER message
|
||||
will be sent by the source. No further update must be done before the
|
||||
destination takes over with new regions & rings.
|
||||
Once the source has finished migration, rings will be stopped by
|
||||
the source. No further update must be done before rings are
|
||||
restarted.
|
||||
|
||||
Protocol features
|
||||
-----------------
|
||||
@ -259,11 +307,13 @@ Message types
|
||||
* VHOST_USER_RESET_OWNER
|
||||
|
||||
Id: 4
|
||||
Equivalent ioctl: VHOST_RESET_OWNER
|
||||
Master payload: N/A
|
||||
|
||||
Issued when a new connection is about to be closed. The Master will no
|
||||
longer own this connection (and will usually close it).
|
||||
This is no longer used. Used to be sent to request stopping
|
||||
all rings, but some clients interpreted it to also discard
|
||||
connection state (this interpretation would lead to bugs).
|
||||
It is recommended that clients either ignore this message,
|
||||
or use it to stop all rings.
|
||||
|
||||
* VHOST_USER_SET_MEM_TABLE
|
||||
|
||||
@ -388,6 +438,8 @@ Message types
|
||||
Master payload: vring state description
|
||||
|
||||
Signal slave to enable or disable corresponding vring.
|
||||
This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES
|
||||
has been negotiated.
|
||||
|
||||
* VHOST_USER_SEND_RARP
|
||||
|
||||
|
5
exec.c
5
exec.c
@ -51,6 +51,7 @@
|
||||
#include "qemu/main-loop.h"
|
||||
#include "translate-all.h"
|
||||
#include "sysemu/replay.h"
|
||||
#include "sysemu/qtest.h"
|
||||
|
||||
#include "exec/memory-internal.h"
|
||||
#include "exec/ram_addr.h"
|
||||
@ -1196,8 +1197,10 @@ static long gethugepagesize(const char *path, Error **errp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fs.f_type != HUGETLBFS_MAGIC)
|
||||
if (!qtest_driver() &&
|
||||
fs.f_type != HUGETLBFS_MAGIC) {
|
||||
fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
|
||||
}
|
||||
|
||||
return fs.f_bsize;
|
||||
}
|
||||
|
@ -625,8 +625,12 @@ void acpi_pm1_cnt_reset(ACPIREGS *ar)
|
||||
void acpi_gpe_init(ACPIREGS *ar, uint8_t len)
|
||||
{
|
||||
ar->gpe.len = len;
|
||||
ar->gpe.sts = g_malloc0(len / 2);
|
||||
ar->gpe.en = g_malloc0(len / 2);
|
||||
/* Only first len / 2 bytes are ever used,
|
||||
* but the caller in ich9.c migrates full len bytes.
|
||||
* TODO: fix ich9.c and drop the extra allocation.
|
||||
*/
|
||||
ar->gpe.sts = g_malloc0(len);
|
||||
ar->gpe.en = g_malloc0(len);
|
||||
}
|
||||
|
||||
void acpi_gpe_reset(ACPIREGS *ar)
|
||||
|
@ -462,11 +462,6 @@ bool machine_usb(MachineState *machine)
|
||||
return machine->usb;
|
||||
}
|
||||
|
||||
bool machine_iommu(MachineState *machine)
|
||||
{
|
||||
return machine->iommu;
|
||||
}
|
||||
|
||||
bool machine_kernel_irqchip_allowed(MachineState *machine)
|
||||
{
|
||||
return machine->kernel_irqchip_allowed;
|
||||
|
@ -77,14 +77,8 @@ static const int user_feature_bits[] = {
|
||||
VIRTIO_NET_F_HOST_ECN,
|
||||
VIRTIO_NET_F_HOST_UFO,
|
||||
VIRTIO_NET_F_MRG_RXBUF,
|
||||
VIRTIO_NET_F_STATUS,
|
||||
VIRTIO_NET_F_CTRL_VQ,
|
||||
VIRTIO_NET_F_CTRL_RX,
|
||||
VIRTIO_NET_F_CTRL_VLAN,
|
||||
VIRTIO_NET_F_CTRL_RX_EXTRA,
|
||||
VIRTIO_NET_F_CTRL_MAC_ADDR,
|
||||
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
|
||||
|
||||
/* This bit implies RARP isn't sent by QEMU out of band */
|
||||
VIRTIO_NET_F_GUEST_ANNOUNCE,
|
||||
|
||||
VIRTIO_NET_F_MQ,
|
||||
@ -292,12 +286,6 @@ static void vhost_net_stop_one(struct vhost_net *net,
|
||||
int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
|
||||
assert(r >= 0);
|
||||
}
|
||||
} else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
|
||||
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
|
||||
const VhostOps *vhost_ops = net->dev.vhost_ops;
|
||||
int r = vhost_ops->vhost_reset_device(&net->dev);
|
||||
assert(r >= 0);
|
||||
}
|
||||
}
|
||||
if (net->nc->info->poll) {
|
||||
net->nc->info->poll(net->nc, true);
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "hw/i386/ioapic.h"
|
||||
#include "qapi/visitor.h"
|
||||
#include "qemu/error-report.h"
|
||||
|
||||
/*
|
||||
* I440FX chipset data sheet.
|
||||
@ -301,6 +302,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp)
|
||||
static void i440fx_realize(PCIDevice *dev, Error **errp)
|
||||
{
|
||||
dev->config[I440FX_SMRAM] = 0x02;
|
||||
|
||||
if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
|
||||
error_report("warning: i440fx doesn't support emulated iommu");
|
||||
}
|
||||
}
|
||||
|
||||
PCIBus *i440fx_init(const char *host_type, const char *pci_type,
|
||||
|
@ -506,7 +506,7 @@ static void mch_realize(PCIDevice *d, Error **errp)
|
||||
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
|
||||
}
|
||||
/* Intel IOMMU (VT-d) */
|
||||
if (machine_iommu(current_machine)) {
|
||||
if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
|
||||
mch_init_dmar(mch);
|
||||
}
|
||||
}
|
||||
|
@ -121,8 +121,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
|
||||
|
||||
r = qemu_chr_fe_read_all(chr, p, size);
|
||||
if (r != size) {
|
||||
error_report("Failed to read msg header. Read %d instead of %d.", r,
|
||||
size);
|
||||
error_report("Failed to read msg header. Read %d instead of %d."
|
||||
" Original request %d.", r, size, msg->request);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -206,7 +206,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
|
||||
VhostUserMsg msg = {
|
||||
.request = VHOST_USER_SET_LOG_BASE,
|
||||
.flags = VHOST_USER_VERSION,
|
||||
.payload.log.mmap_size = log->size,
|
||||
.payload.log.mmap_size = log->size * sizeof(*(log->log)),
|
||||
.payload.log.mmap_offset = 0,
|
||||
.size = sizeof(msg.payload.log),
|
||||
};
|
||||
@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev *dev,
|
||||
|
||||
static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
|
||||
{
|
||||
struct vhost_vring_state state = {
|
||||
.index = dev->vq_index,
|
||||
.num = enable,
|
||||
};
|
||||
int i;
|
||||
|
||||
if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
|
||||
if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
|
||||
}
|
||||
for (i = 0; i < dev->nvqs; ++i) {
|
||||
struct vhost_vring_state state = {
|
||||
.index = dev->vq_index + i,
|
||||
.num = enable,
|
||||
};
|
||||
|
||||
vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vhost_user_get_vring_base(struct vhost_dev *dev,
|
||||
struct vhost_vring_state *ring)
|
||||
|
@ -33,7 +33,6 @@ MachineClass *find_default_machine(void);
|
||||
extern MachineState *current_machine;
|
||||
|
||||
bool machine_usb(MachineState *machine);
|
||||
bool machine_iommu(MachineState *machine);
|
||||
bool machine_kernel_irqchip_allowed(MachineState *machine);
|
||||
bool machine_kernel_irqchip_required(MachineState *machine);
|
||||
int machine_kvm_shadow_mem(MachineState *machine);
|
||||
|
@ -197,8 +197,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c
|
||||
check-qtest-i386-y += tests/pc-cpu-test$(EXESUF)
|
||||
check-qtest-i386-y += tests/q35-test$(EXESUF)
|
||||
gcov-files-i386-y += hw/pci-host/q35.c
|
||||
ifeq ($(CONFIG_VHOST_NET),y)
|
||||
check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
|
||||
check-qtest-i386-$(CONFIG_VHOST_NET_TEST_i386) += tests/vhost-user-test$(EXESUF)
|
||||
ifeq ($(CONFIG_VHOST_NET_TEST_i386),)
|
||||
check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF)
|
||||
endif
|
||||
check-qtest-i386-y += tests/test-netfilter$(EXESUF)
|
||||
check-qtest-x86_64-y = $(check-qtest-i386-y)
|
||||
|
@ -13,16 +13,22 @@
|
||||
/*
|
||||
* TODO:
|
||||
* - main should get parameters from the command line.
|
||||
* - implement all request handlers.
|
||||
* - implement all request handlers. Still not implemented:
|
||||
* vubr_get_queue_num_exec()
|
||||
* vubr_send_rarp_exec()
|
||||
* - test for broken requests and virtqueue.
|
||||
* - implement features defined by Virtio 1.0 spec.
|
||||
* - support mergeable buffers and indirect descriptors.
|
||||
* - implement RESET_DEVICE request.
|
||||
* - implement clean shutdown.
|
||||
* - implement non-blocking writes to UDP backend.
|
||||
* - implement polling strategy.
|
||||
* - implement clean starting/stopping of vq processing
|
||||
* - implement clean starting/stopping of used and buffers
|
||||
* dirty page logging.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
@ -166,6 +172,8 @@ typedef struct VubrVirtq {
|
||||
struct vring_desc *desc;
|
||||
struct vring_avail *avail;
|
||||
struct vring_used *used;
|
||||
uint64_t log_guest_addr;
|
||||
int enable;
|
||||
} VubrVirtq;
|
||||
|
||||
/* Based on qemu/hw/virtio/vhost-user.c */
|
||||
@ -173,6 +181,8 @@ typedef struct VubrVirtq {
|
||||
#define VHOST_MEMORY_MAX_NREGIONS 8
|
||||
#define VHOST_USER_F_PROTOCOL_FEATURES 30
|
||||
|
||||
#define VHOST_LOG_PAGE 4096
|
||||
|
||||
enum VhostUserProtocolFeature {
|
||||
VHOST_USER_PROTOCOL_F_MQ = 0,
|
||||
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
|
||||
@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
|
||||
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
|
||||
} VhostUserMemory;
|
||||
|
||||
typedef struct VhostUserLog {
|
||||
uint64_t mmap_size;
|
||||
uint64_t mmap_offset;
|
||||
} VhostUserLog;
|
||||
|
||||
typedef struct VhostUserMsg {
|
||||
VhostUserRequest request;
|
||||
|
||||
@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
|
||||
struct vhost_vring_state state;
|
||||
struct vhost_vring_addr addr;
|
||||
VhostUserMemory memory;
|
||||
VhostUserLog log;
|
||||
} payload;
|
||||
int fds[VHOST_MEMORY_MAX_NREGIONS];
|
||||
int fd_num;
|
||||
@ -265,8 +281,13 @@ typedef struct VubrDev {
|
||||
uint32_t nregions;
|
||||
VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
|
||||
VubrVirtq vq[MAX_NR_VIRTQUEUE];
|
||||
int log_call_fd;
|
||||
uint64_t log_size;
|
||||
uint8_t *log_table;
|
||||
int backend_udp_sock;
|
||||
struct sockaddr_in backend_udp_dest;
|
||||
int ready;
|
||||
uint64_t features;
|
||||
} VubrDev;
|
||||
|
||||
static const char *vubr_request_str[] = {
|
||||
@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
|
||||
|
||||
rc = recvmsg(conn_fd, &msg, 0);
|
||||
|
||||
if (rc <= 0) {
|
||||
if (rc == 0) {
|
||||
vubr_die("recvmsg");
|
||||
fprintf(stderr, "Peer disconnected.\n");
|
||||
exit(1);
|
||||
}
|
||||
if (rc < 0) {
|
||||
vubr_die("recvmsg");
|
||||
}
|
||||
|
||||
@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
|
||||
|
||||
if (vmsg->size) {
|
||||
rc = read(conn_fd, &vmsg->payload, vmsg->size);
|
||||
if (rc <= 0) {
|
||||
if (rc == 0) {
|
||||
vubr_die("recvmsg");
|
||||
fprintf(stderr, "Peer disconnected.\n");
|
||||
exit(1);
|
||||
}
|
||||
if (rc < 0) {
|
||||
vubr_die("recvmsg");
|
||||
}
|
||||
|
||||
@ -455,6 +486,16 @@ vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
|
||||
vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
|
||||
}
|
||||
|
||||
/* Kick the log_call_fd if required. */
|
||||
static void
|
||||
vubr_log_kick(VubrDev *dev)
|
||||
{
|
||||
if (dev->log_call_fd != -1) {
|
||||
DPRINT("Kicking the QEMU's log...\n");
|
||||
eventfd_write(dev->log_call_fd, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Kick the guest if necessary. */
|
||||
static void
|
||||
vubr_virtqueue_kick(VubrVirtq *vq)
|
||||
@ -465,12 +506,40 @@ vubr_virtqueue_kick(VubrVirtq *vq)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vubr_log_page(uint8_t *log_table, uint64_t page)
|
||||
{
|
||||
DPRINT("Logged dirty guest page: %"PRId64"\n", page);
|
||||
atomic_or(&log_table[page / 8], 1 << (page % 8));
|
||||
}
|
||||
|
||||
static void
|
||||
vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
|
||||
{
|
||||
uint64_t page;
|
||||
|
||||
if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
|
||||
!dev->log_table || !length) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
|
||||
|
||||
page = address / VHOST_LOG_PAGE;
|
||||
while (page * VHOST_LOG_PAGE < address + length) {
|
||||
vubr_log_page(dev->log_table, page);
|
||||
page += VHOST_LOG_PAGE;
|
||||
}
|
||||
vubr_log_kick(dev);
|
||||
}
|
||||
|
||||
static void
|
||||
vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
|
||||
{
|
||||
struct vring_desc *desc = vq->desc;
|
||||
struct vring_desc *desc = vq->desc;
|
||||
struct vring_avail *avail = vq->avail;
|
||||
struct vring_used *used = vq->used;
|
||||
struct vring_used *used = vq->used;
|
||||
uint64_t log_guest_addr = vq->log_guest_addr;
|
||||
|
||||
unsigned int size = vq->size;
|
||||
|
||||
@ -510,6 +579,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
|
||||
|
||||
if (len <= chunk_len) {
|
||||
memcpy(chunk_start, buf, len);
|
||||
vubr_log_write(dev, desc[i].addr, len);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Received too long packet from the backend. Dropping...\n");
|
||||
@ -519,11 +589,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
|
||||
/* Add descriptor to the used ring. */
|
||||
used->ring[u_index].id = d_index;
|
||||
used->ring[u_index].len = len;
|
||||
vubr_log_write(dev,
|
||||
log_guest_addr + offsetof(struct vring_used, ring[u_index]),
|
||||
sizeof(used->ring[u_index]));
|
||||
|
||||
vq->last_avail_index++;
|
||||
vq->last_used_index++;
|
||||
|
||||
atomic_mb_set(&used->idx, vq->last_used_index);
|
||||
vubr_log_write(dev,
|
||||
log_guest_addr + offsetof(struct vring_used, idx),
|
||||
sizeof(used->idx));
|
||||
|
||||
/* Kick the guest if necessary. */
|
||||
vubr_virtqueue_kick(vq);
|
||||
@ -532,9 +608,10 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
|
||||
static int
|
||||
vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
|
||||
{
|
||||
struct vring_desc *desc = vq->desc;
|
||||
struct vring_desc *desc = vq->desc;
|
||||
struct vring_avail *avail = vq->avail;
|
||||
struct vring_used *used = vq->used;
|
||||
struct vring_used *used = vq->used;
|
||||
uint64_t log_guest_addr = vq->log_guest_addr;
|
||||
|
||||
unsigned int size = vq->size;
|
||||
|
||||
@ -552,6 +629,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
|
||||
void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
|
||||
uint32_t chunk_len = desc[i].len;
|
||||
|
||||
assert(!(desc[i].flags & VRING_DESC_F_WRITE));
|
||||
|
||||
if (len + chunk_len < buf_size) {
|
||||
memcpy(buf + len, chunk_start, chunk_len);
|
||||
DPRINT("%d ", chunk_len);
|
||||
@ -577,6 +656,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
|
||||
/* Add descriptor to the used ring. */
|
||||
used->ring[u_index].id = d_index;
|
||||
used->ring[u_index].len = len;
|
||||
vubr_log_write(dev,
|
||||
log_guest_addr + offsetof(struct vring_used, ring[u_index]),
|
||||
sizeof(used->ring[u_index]));
|
||||
|
||||
vubr_consume_raw_packet(dev, buf, len);
|
||||
|
||||
@ -588,6 +670,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
|
||||
{
|
||||
struct vring_avail *avail = vq->avail;
|
||||
struct vring_used *used = vq->used;
|
||||
uint64_t log_guest_addr = vq->log_guest_addr;
|
||||
|
||||
while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
|
||||
vubr_process_desc(dev, vq);
|
||||
@ -596,6 +679,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
|
||||
}
|
||||
|
||||
atomic_mb_set(&used->idx, vq->last_used_index);
|
||||
vubr_log_write(dev,
|
||||
log_guest_addr + offsetof(struct vring_used, idx),
|
||||
sizeof(used->idx));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -609,6 +695,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
|
||||
int buflen = sizeof(buf);
|
||||
int len;
|
||||
|
||||
if (!dev->ready) {
|
||||
return;
|
||||
}
|
||||
|
||||
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
|
||||
|
||||
uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
|
||||
@ -656,14 +746,14 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
vmsg->payload.u64 =
|
||||
((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
|
||||
(1ULL << VIRTIO_NET_F_CTRL_VQ) |
|
||||
(1ULL << VIRTIO_NET_F_CTRL_RX) |
|
||||
(1ULL << VHOST_F_LOG_ALL));
|
||||
(1ULL << VHOST_F_LOG_ALL) |
|
||||
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
|
||||
|
||||
vmsg->size = sizeof(vmsg->payload.u64);
|
||||
|
||||
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
|
||||
|
||||
/* reply */
|
||||
/* Reply */
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -671,6 +761,7 @@ static int
|
||||
vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
|
||||
dev->features = vmsg->payload.u64;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -680,10 +771,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
vubr_close_log(VubrDev *dev)
|
||||
{
|
||||
if (dev->log_table) {
|
||||
if (munmap(dev->log_table, dev->log_size) != 0) {
|
||||
vubr_die("munmap()");
|
||||
}
|
||||
|
||||
dev->log_table = 0;
|
||||
}
|
||||
if (dev->log_call_fd != -1) {
|
||||
close(dev->log_call_fd);
|
||||
dev->log_call_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
DPRINT("Function %s() not implemented yet.\n", __func__);
|
||||
vubr_close_log(dev);
|
||||
dev->ready = 0;
|
||||
dev->features = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -710,9 +819,9 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
DPRINT(" mmap_offset 0x%016"PRIx64"\n",
|
||||
msg_region->mmap_offset);
|
||||
|
||||
dev_region->gpa = msg_region->guest_phys_addr;
|
||||
dev_region->size = msg_region->memory_size;
|
||||
dev_region->qva = msg_region->userspace_addr;
|
||||
dev_region->gpa = msg_region->guest_phys_addr;
|
||||
dev_region->size = msg_region->memory_size;
|
||||
dev_region->qva = msg_region->userspace_addr;
|
||||
dev_region->mmap_offset = msg_region->mmap_offset;
|
||||
|
||||
/* We don't use offset argument of mmap() since the
|
||||
@ -736,14 +845,38 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
static int
|
||||
vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
DPRINT("Function %s() not implemented yet.\n", __func__);
|
||||
return 0;
|
||||
int fd;
|
||||
uint64_t log_mmap_size, log_mmap_offset;
|
||||
void *rc;
|
||||
|
||||
assert(vmsg->fd_num == 1);
|
||||
fd = vmsg->fds[0];
|
||||
|
||||
assert(vmsg->size == sizeof(vmsg->payload.log));
|
||||
log_mmap_offset = vmsg->payload.log.mmap_offset;
|
||||
log_mmap_size = vmsg->payload.log.mmap_size;
|
||||
DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
|
||||
DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
|
||||
|
||||
rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
|
||||
log_mmap_offset);
|
||||
if (rc == MAP_FAILED) {
|
||||
vubr_die("mmap");
|
||||
}
|
||||
dev->log_table = rc;
|
||||
dev->log_size = log_mmap_size;
|
||||
|
||||
vmsg->size = sizeof(vmsg->payload.u64);
|
||||
/* Reply */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
DPRINT("Function %s() not implemented yet.\n", __func__);
|
||||
assert(vmsg->fd_num == 1);
|
||||
dev->log_call_fd = vmsg->fds[0];
|
||||
DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -777,6 +910,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
|
||||
vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
|
||||
vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
|
||||
vq->log_guest_addr = vra->log_guest_addr;
|
||||
|
||||
DPRINT("Setting virtq addresses:\n");
|
||||
DPRINT(" vring_desc at %p\n", vq->desc);
|
||||
@ -803,8 +937,18 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
static int
|
||||
vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
DPRINT("Function %s() not implemented yet.\n", __func__);
|
||||
return 0;
|
||||
unsigned int index = vmsg->payload.state.index;
|
||||
|
||||
DPRINT("State.index: %d\n", index);
|
||||
vmsg->payload.state.num = dev->vq[index].last_avail_index;
|
||||
vmsg->size = sizeof(vmsg->payload.state);
|
||||
/* FIXME: this is a work-around for a bug in QEMU enabling
|
||||
* too early vrings. When protocol features are enabled,
|
||||
* we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
|
||||
dev->ready = 0;
|
||||
|
||||
/* Reply */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -829,7 +973,17 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
|
||||
dev->vq[index].kick_fd, index);
|
||||
}
|
||||
/* We temporarily use this hack to determine that both TX and RX
|
||||
* queues are set up and ready for processing.
|
||||
* FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
|
||||
* actual kicks. */
|
||||
if (dev->vq[0].kick_fd != -1 &&
|
||||
dev->vq[1].kick_fd != -1) {
|
||||
dev->ready = 1;
|
||||
DPRINT("vhost-user-bridge is ready for processing queues.\n");
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
@ -858,9 +1012,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
static int
|
||||
vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
/* FIXME: unimplented */
|
||||
vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
|
||||
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
|
||||
return 0;
|
||||
vmsg->size = sizeof(vmsg->payload.u64);
|
||||
|
||||
/* Reply */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -881,7 +1038,12 @@ vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
static int
|
||||
vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
DPRINT("Function %s() not implemented yet.\n", __func__);
|
||||
unsigned int index = vmsg->payload.state.index;
|
||||
unsigned int enable = vmsg->payload.state.num;
|
||||
|
||||
DPRINT("State.index: %d\n", index);
|
||||
DPRINT("State.enable: %d\n", enable);
|
||||
dev->vq[index].enable = enable;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -987,7 +1149,7 @@ vubr_accept_cb(int sock, void *ctx)
|
||||
socklen_t len = sizeof(un);
|
||||
|
||||
conn_fd = accept(sock, (struct sockaddr *) &un, &len);
|
||||
if (conn_fd == -1) {
|
||||
if (conn_fd == -1) {
|
||||
vubr_die("accept()");
|
||||
}
|
||||
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
|
||||
@ -1009,9 +1171,17 @@ vubr_new(const char *path)
|
||||
.size = 0,
|
||||
.last_avail_index = 0, .last_used_index = 0,
|
||||
.desc = 0, .avail = 0, .used = 0,
|
||||
.enable = 0,
|
||||
};
|
||||
}
|
||||
|
||||
/* Init log */
|
||||
dev->log_call_fd = -1;
|
||||
dev->log_size = 0;
|
||||
dev->log_table = 0;
|
||||
dev->ready = 0;
|
||||
dev->features = 0;
|
||||
|
||||
/* Get a UNIX socket. */
|
||||
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (dev->sock == -1) {
|
||||
|
@ -70,6 +70,7 @@ typedef enum VhostUserRequest {
|
||||
VHOST_USER_SET_VRING_ERR = 14,
|
||||
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
|
||||
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
|
||||
VHOST_USER_SET_VRING_ENABLE = 18,
|
||||
VHOST_USER_MAX
|
||||
} VhostUserRequest;
|
||||
|
||||
@ -315,8 +316,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
|
||||
g_cond_signal(&s->data_cond);
|
||||
break;
|
||||
|
||||
case VHOST_USER_RESET_OWNER:
|
||||
s->fds_num = 0;
|
||||
case VHOST_USER_SET_VRING_ENABLE:
|
||||
if (!msg.payload.state.num) {
|
||||
s->fds_num = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
28
vl.c
28
vl.c
@ -4288,14 +4288,23 @@ int main(int argc, char **argv, char **envp)
|
||||
page_size_init();
|
||||
socket_init();
|
||||
|
||||
if (qemu_opts_foreach(qemu_find_opts("object"),
|
||||
object_create,
|
||||
object_create_initial, NULL)) {
|
||||
if (qemu_opts_foreach(qemu_find_opts("chardev"),
|
||||
chardev_init_func, NULL, NULL)) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (qemu_opts_foreach(qemu_find_opts("chardev"),
|
||||
chardev_init_func, NULL, NULL)) {
|
||||
if (qtest_chrdev) {
|
||||
Error *local_err = NULL;
|
||||
qtest_init(qtest_chrdev, qtest_log, &local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (qemu_opts_foreach(qemu_find_opts("object"),
|
||||
object_create,
|
||||
object_create_initial, NULL)) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -4325,15 +4334,6 @@ int main(int argc, char **argv, char **envp)
|
||||
|
||||
configure_accelerator(current_machine);
|
||||
|
||||
if (qtest_chrdev) {
|
||||
Error *local_err = NULL;
|
||||
qtest_init(qtest_chrdev, qtest_log, &local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
machine_opts = qemu_get_machine_opts();
|
||||
kernel_filename = qemu_opt_get(machine_opts, "kernel");
|
||||
initrd_filename = qemu_opt_get(machine_opts, "initrd");
|
||||
|
Loading…
x
Reference in New Issue
Block a user