vhost, pc: fixes for 2.5

Fixes all over the place.
 
 This also re-enables a test we disabled in 2.5 cycle
 now that there's a way not to get a warning from it.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJWTc5PAAoJECgfDbjSjVRpCJEH/jh1CeWCG7zRBXelWK2F5Cgr
 ls+V1sCX7NvkfCa3cDZI8imGjYQUr6EiXtqxPArEVMjmUOUzEHPkOx3ICPpfMU7o
 RCVNPELav6VBhGDf3mcIVjlDDN9Syhd90xdgaD8dbeSA0UJFHRTdobNlYpYwiRmp
 OAASUawEWLGA5cG+W6MBFWiPQWChpNRK3yK3RVduL71TIe4heuHBez4qTB2QKYvF
 KM2nRvpkBY21frXJQqWPlCJ6jsdjI/Fl2xR1t4C9qv0TKcB9FESMmH3Jff2bwMQM
 8OSnTIRqYaqT6kJkk3Kns8a+porJMnn69OwBRehLmLW/rmx9HQrR2Ey7bYtgd0Y=
 =CrrY
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

vhost, pc: fixes for 2.5

Fixes all over the place.

This also re-enables a test we disabled in 2.5 cycle
now that there's a way not to get a warning from it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 19 Nov 2015 13:27:43 GMT using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"

* remotes/mst/tags/for_upstream:
  exec: silence hugetlbfs warning under qtest
  tests: re-enable vhost-user-test
  acpi: fix buffer overrun on migration
  vhost-user: fix log size
  vhost-user: ignore qemu-only features
  specs/vhost-user: fix spec to match reality
  tests/vhost-user-bridge: implement logging of dirty pages
  i440fx: print an error message if user tries to enable iommu
  q35: Check propery to determine if iommu is set
  vhost-user: start/stop all rings
  vhost-user: print original request on error
  vhost-user-test: support VHOST_USER_SET_VRING_ENABLE
  vhost-user: update spec description
  vhost: don't send RESET_OWNER at stop
  vhost: let SET_VRING_ENABLE message depends on protocol feature

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-11-19 16:26:08 +00:00
commit 348c32709f
14 changed files with 311 additions and 85 deletions

1
configure vendored
View File

@ -5681,6 +5681,7 @@ case "$target_name" in
echo "CONFIG_KVM=y" >> $config_target_mak
if test "$vhost_net" = "yes" ; then
echo "CONFIG_VHOST_NET=y" >> $config_target_mak
echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak
fi
fi
esac

View File

@ -87,6 +87,14 @@ Depending on the request type, payload can be:
User address: a 64-bit user address
mmap offset: 64-bit offset where region starts in the mapped memory
* Log description
---------------------------
| log size | log offset |
---------------------------
log size: size of area used for logging
log offset: offset from start of supplied file descriptor
where logging starts (i.e. where guest address 0 would be logged)
In QEMU the vhost-user message is implemented with the following struct:
typedef struct VhostUserMsg {
@ -138,6 +146,29 @@ As older slaves don't support negotiating protocol features,
a feature bit was dedicated for this purpose:
#define VHOST_USER_F_PROTOCOL_FEATURES 30
Starting and stopping rings
----------------------
Client must only process each ring when it is both started and enabled.
If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized
in an enabled state.
If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized
in a disabled state. Client must not process it until ring is enabled by
VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by
VHOST_USER_SET_VRING_ENABLE with parameter 0.
Each ring is initialized in a stopped state, client must not process it until
ring is started, or after it has been stopped.
Client must start ring upon receiving a kick (that is, detecting that file
descriptor is readable) on the descriptor specified by
VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
VHOST_USER_GET_VRING_BASE.
While processing the rings (when they are started and enabled), client must
support changing some configuration aspects on the fly.
Multiple queue support
----------------------
@ -162,9 +193,13 @@ the slave makes to the memory mapped regions. The client should mark
the dirty pages in a log. Once it complies to this logging, it may
declare the VHOST_F_LOG_ALL vhost feature.
To start/stop logging of data/used ring writes, server may send messages
VHOST_USER_SET_FEATURES with VHOST_F_LOG_ALL and VHOST_USER_SET_VRING_ADDR with
VHOST_VRING_F_LOG in ring's flags set to 1/0, respectively.
All the modifications to memory pointed by vring "descriptor" should
be marked. Modifications to "used" vring should be marked if
VHOST_VRING_F_LOG is part of ring's features.
VHOST_VRING_F_LOG is part of ring's flags.
Dirty pages are of size:
#define VHOST_LOG_PAGE 0x1000
@ -173,22 +208,35 @@ The log memory fd is provided in the ancillary data of
VHOST_USER_SET_LOG_BASE message when the slave has
VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature.
The size of the log may be computed by using all the known guest
addresses. The log covers from address 0 to the maximum of guest
The size of the log is supplied as part of VhostUserMsg
which should be large enough to cover all known guest
addresses. Log starts at the supplied offset in the
supplied file descriptor.
The log covers from address 0 to the maximum of guest
regions. In pseudo-code, to mark page at "addr" as dirty:
page = addr / VHOST_LOG_PAGE
log[page / 8] |= 1 << page % 8
Where addr is the guest physical address.
Use atomic operations, as the log may be concurrently manipulated.
Note that when logging modifications to the used ring (when VHOST_VRING_F_LOG
is set for this ring), log_guest_addr should be used to calculate the log
offset: the write to first byte of the used ring is logged at this offset from
log start. Also note that this value might be outside the legal guest physical
address range (i.e. does not have to be covered by the VhostUserMemory table),
but the bit offset of the last byte of the ring must fall within
the size supplied by VhostUserLog.
VHOST_USER_SET_LOG_FD is an optional message with an eventfd in
ancillary data, it may be used to inform the master that the log has
been modified.
Once the source has finished migration, VHOST_USER_RESET_OWNER message
will be sent by the source. No further update must be done before the
destination takes over with new regions & rings.
Once the source has finished migration, rings will be stopped by
the source. No further update must be done before rings are
restarted.
Protocol features
-----------------
@ -259,11 +307,13 @@ Message types
* VHOST_USER_RESET_OWNER
Id: 4
Equivalent ioctl: VHOST_RESET_OWNER
Master payload: N/A
Issued when a new connection is about to be closed. The Master will no
longer own this connection (and will usually close it).
This is no longer used. Used to be sent to request stopping
all rings, but some clients interpreted it to also discard
connection state (this interpretation would lead to bugs).
It is recommended that clients either ignore this message,
or use it to stop all rings.
* VHOST_USER_SET_MEM_TABLE
@ -388,6 +438,8 @@ Message types
Master payload: vring state description
Signal slave to enable or disable corresponding vring.
This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES
has been negotiated.
* VHOST_USER_SEND_RARP

5
exec.c
View File

@ -51,6 +51,7 @@
#include "qemu/main-loop.h"
#include "translate-all.h"
#include "sysemu/replay.h"
#include "sysemu/qtest.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
@ -1196,8 +1197,10 @@ static long gethugepagesize(const char *path, Error **errp)
return 0;
}
if (fs.f_type != HUGETLBFS_MAGIC)
if (!qtest_driver() &&
fs.f_type != HUGETLBFS_MAGIC) {
fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
}
return fs.f_bsize;
}

View File

@ -625,8 +625,12 @@ void acpi_pm1_cnt_reset(ACPIREGS *ar)
void acpi_gpe_init(ACPIREGS *ar, uint8_t len)
{
ar->gpe.len = len;
ar->gpe.sts = g_malloc0(len / 2);
ar->gpe.en = g_malloc0(len / 2);
/* Only first len / 2 bytes are ever used,
* but the caller in ich9.c migrates full len bytes.
* TODO: fix ich9.c and drop the extra allocation.
*/
ar->gpe.sts = g_malloc0(len);
ar->gpe.en = g_malloc0(len);
}
void acpi_gpe_reset(ACPIREGS *ar)

View File

@ -462,11 +462,6 @@ bool machine_usb(MachineState *machine)
return machine->usb;
}
bool machine_iommu(MachineState *machine)
{
return machine->iommu;
}
bool machine_kernel_irqchip_allowed(MachineState *machine)
{
return machine->kernel_irqchip_allowed;

View File

@ -77,14 +77,8 @@ static const int user_feature_bits[] = {
VIRTIO_NET_F_HOST_ECN,
VIRTIO_NET_F_HOST_UFO,
VIRTIO_NET_F_MRG_RXBUF,
VIRTIO_NET_F_STATUS,
VIRTIO_NET_F_CTRL_VQ,
VIRTIO_NET_F_CTRL_RX,
VIRTIO_NET_F_CTRL_VLAN,
VIRTIO_NET_F_CTRL_RX_EXTRA,
VIRTIO_NET_F_CTRL_MAC_ADDR,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
/* This bit implies RARP isn't sent by QEMU out of band */
VIRTIO_NET_F_GUEST_ANNOUNCE,
VIRTIO_NET_F_MQ,
@ -292,12 +286,6 @@ static void vhost_net_stop_one(struct vhost_net *net,
int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
assert(r >= 0);
}
} else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
const VhostOps *vhost_ops = net->dev.vhost_ops;
int r = vhost_ops->vhost_reset_device(&net->dev);
assert(r >= 0);
}
}
if (net->nc->info->poll) {
net->nc->info->poll(net->nc, true);

View File

@ -34,6 +34,7 @@
#include "sysemu/sysemu.h"
#include "hw/i386/ioapic.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
/*
* I440FX chipset data sheet.
@ -301,6 +302,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp)
static void i440fx_realize(PCIDevice *dev, Error **errp)
{
dev->config[I440FX_SMRAM] = 0x02;
if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
error_report("warning: i440fx doesn't support emulated iommu");
}
}
PCIBus *i440fx_init(const char *host_type, const char *pci_type,

View File

@ -506,7 +506,7 @@ static void mch_realize(PCIDevice *d, Error **errp)
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
}
/* Intel IOMMU (VT-d) */
if (machine_iommu(current_machine)) {
if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) {
mch_init_dmar(mch);
}
}

View File

@ -121,8 +121,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
r = qemu_chr_fe_read_all(chr, p, size);
if (r != size) {
error_report("Failed to read msg header. Read %d instead of %d.", r,
size);
error_report("Failed to read msg header. Read %d instead of %d."
" Original request %d.", r, size, msg->request);
goto fail;
}
@ -206,7 +206,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
VhostUserMsg msg = {
.request = VHOST_USER_SET_LOG_BASE,
.flags = VHOST_USER_VERSION,
.payload.log.mmap_size = log->size,
.payload.log.mmap_size = log->size * sizeof(*(log->log)),
.payload.log.mmap_offset = 0,
.size = sizeof(msg.payload.log),
};
@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev *dev,
static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
{
struct vhost_vring_state state = {
.index = dev->vq_index,
.num = enable,
};
int i;
if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
return -1;
}
return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
}
for (i = 0; i < dev->nvqs; ++i) {
struct vhost_vring_state state = {
.index = dev->vq_index + i,
.num = enable,
};
vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
}
return 0;
}
static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)

View File

@ -33,7 +33,6 @@ MachineClass *find_default_machine(void);
extern MachineState *current_machine;
bool machine_usb(MachineState *machine);
bool machine_iommu(MachineState *machine);
bool machine_kernel_irqchip_allowed(MachineState *machine);
bool machine_kernel_irqchip_required(MachineState *machine);
int machine_kvm_shadow_mem(MachineState *machine);

View File

@ -197,8 +197,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c
check-qtest-i386-y += tests/pc-cpu-test$(EXESUF)
check-qtest-i386-y += tests/q35-test$(EXESUF)
gcov-files-i386-y += hw/pci-host/q35.c
ifeq ($(CONFIG_VHOST_NET),y)
check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
check-qtest-i386-$(CONFIG_VHOST_NET_TEST_i386) += tests/vhost-user-test$(EXESUF)
ifeq ($(CONFIG_VHOST_NET_TEST_i386),)
check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF)
endif
check-qtest-i386-y += tests/test-netfilter$(EXESUF)
check-qtest-x86_64-y = $(check-qtest-i386-y)

View File

@ -13,16 +13,22 @@
/*
* TODO:
* - main should get parameters from the command line.
* - implement all request handlers.
* - implement all request handlers. Still not implemented:
* vubr_get_queue_num_exec()
* vubr_send_rarp_exec()
* - test for broken requests and virtqueue.
* - implement features defined by Virtio 1.0 spec.
* - support mergeable buffers and indirect descriptors.
* - implement RESET_DEVICE request.
* - implement clean shutdown.
* - implement non-blocking writes to UDP backend.
* - implement polling strategy.
* - implement clean starting/stopping of vq processing
* - implement clean starting/stopping of used and buffers
* dirty page logging.
*/
#define _FILE_OFFSET_BITS 64
#include <stddef.h>
#include <assert.h>
#include <stdio.h>
@ -166,6 +172,8 @@ typedef struct VubrVirtq {
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
uint64_t log_guest_addr;
int enable;
} VubrVirtq;
/* Based on qemu/hw/virtio/vhost-user.c */
@ -173,6 +181,8 @@ typedef struct VubrVirtq {
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
#define VHOST_LOG_PAGE 4096
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
typedef struct VhostUserLog {
uint64_t mmap_size;
uint64_t mmap_offset;
} VhostUserLog;
typedef struct VhostUserMsg {
VhostUserRequest request;
@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num;
@ -265,8 +281,13 @@ typedef struct VubrDev {
uint32_t nregions;
VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VubrVirtq vq[MAX_NR_VIRTQUEUE];
int log_call_fd;
uint64_t log_size;
uint8_t *log_table;
int backend_udp_sock;
struct sockaddr_in backend_udp_dest;
int ready;
uint64_t features;
} VubrDev;
static const char *vubr_request_str[] = {
@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
rc = recvmsg(conn_fd, &msg, 0);
if (rc <= 0) {
if (rc == 0) {
vubr_die("recvmsg");
fprintf(stderr, "Peer disconnected.\n");
exit(1);
}
if (rc < 0) {
vubr_die("recvmsg");
}
@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
if (vmsg->size) {
rc = read(conn_fd, &vmsg->payload, vmsg->size);
if (rc <= 0) {
if (rc == 0) {
vubr_die("recvmsg");
fprintf(stderr, "Peer disconnected.\n");
exit(1);
}
if (rc < 0) {
vubr_die("recvmsg");
}
@ -455,6 +486,16 @@ vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
}
/* Kick the log_call_fd if required. */
static void
vubr_log_kick(VubrDev *dev)
{
if (dev->log_call_fd != -1) {
DPRINT("Kicking the QEMU's log...\n");
eventfd_write(dev->log_call_fd, 1);
}
}
/* Kick the guest if necessary. */
static void
vubr_virtqueue_kick(VubrVirtq *vq)
@ -465,12 +506,40 @@ vubr_virtqueue_kick(VubrVirtq *vq)
}
}
static void
vubr_log_page(uint8_t *log_table, uint64_t page)
{
DPRINT("Logged dirty guest page: %"PRId64"\n", page);
atomic_or(&log_table[page / 8], 1 << (page % 8));
}
static void
vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
{
uint64_t page;
if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
!dev->log_table || !length) {
return;
}
assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
page = address / VHOST_LOG_PAGE;
while (page * VHOST_LOG_PAGE < address + length) {
vubr_log_page(dev->log_table, page);
page += VHOST_LOG_PAGE;
}
vubr_log_kick(dev);
}
static void
vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
{
struct vring_desc *desc = vq->desc;
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
struct vring_used *used = vq->used;
uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@ -510,6 +579,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
if (len <= chunk_len) {
memcpy(chunk_start, buf, len);
vubr_log_write(dev, desc[i].addr, len);
} else {
fprintf(stderr,
"Received too long packet from the backend. Dropping...\n");
@ -519,11 +589,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, ring[u_index]),
sizeof(used->ring[u_index]));
vq->last_avail_index++;
vq->last_used_index++;
atomic_mb_set(&used->idx, vq->last_used_index);
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, idx),
sizeof(used->idx));
/* Kick the guest if necessary. */
vubr_virtqueue_kick(vq);
@ -532,9 +608,10 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
static int
vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
{
struct vring_desc *desc = vq->desc;
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
struct vring_used *used = vq->used;
uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
@ -552,6 +629,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
uint32_t chunk_len = desc[i].len;
assert(!(desc[i].flags & VRING_DESC_F_WRITE));
if (len + chunk_len < buf_size) {
memcpy(buf + len, chunk_start, chunk_len);
DPRINT("%d ", chunk_len);
@ -577,6 +656,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, ring[u_index]),
sizeof(used->ring[u_index]));
vubr_consume_raw_packet(dev, buf, len);
@ -588,6 +670,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
{
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
uint64_t log_guest_addr = vq->log_guest_addr;
while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
vubr_process_desc(dev, vq);
@ -596,6 +679,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
}
atomic_mb_set(&used->idx, vq->last_used_index);
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, idx),
sizeof(used->idx));
}
static void
@ -609,6 +695,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
int buflen = sizeof(buf);
int len;
if (!dev->ready) {
return;
}
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@ -656,14 +746,14 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 =
((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_NET_F_CTRL_VQ) |
(1ULL << VIRTIO_NET_F_CTRL_RX) |
(1ULL << VHOST_F_LOG_ALL));
(1ULL << VHOST_F_LOG_ALL) |
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
vmsg->size = sizeof(vmsg->payload.u64);
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
/* reply */
/* Reply */
return 1;
}
@ -671,6 +761,7 @@ static int
vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
dev->features = vmsg->payload.u64;
return 0;
}
@ -680,10 +771,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
return 0;
}
static void
vubr_close_log(VubrDev *dev)
{
if (dev->log_table) {
if (munmap(dev->log_table, dev->log_size) != 0) {
vubr_die("munmap()");
}
dev->log_table = 0;
}
if (dev->log_call_fd != -1) {
close(dev->log_call_fd);
dev->log_call_fd = -1;
}
}
static int
vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
vubr_close_log(dev);
dev->ready = 0;
dev->features = 0;
return 0;
}
@ -710,9 +819,9 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT(" mmap_offset 0x%016"PRIx64"\n",
msg_region->mmap_offset);
dev_region->gpa = msg_region->guest_phys_addr;
dev_region->size = msg_region->memory_size;
dev_region->qva = msg_region->userspace_addr;
dev_region->gpa = msg_region->guest_phys_addr;
dev_region->size = msg_region->memory_size;
dev_region->qva = msg_region->userspace_addr;
dev_region->mmap_offset = msg_region->mmap_offset;
/* We don't use offset argument of mmap() since the
@ -736,14 +845,38 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
int fd;
uint64_t log_mmap_size, log_mmap_offset;
void *rc;
assert(vmsg->fd_num == 1);
fd = vmsg->fds[0];
assert(vmsg->size == sizeof(vmsg->payload.log));
log_mmap_offset = vmsg->payload.log.mmap_offset;
log_mmap_size = vmsg->payload.log.mmap_size;
DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
log_mmap_offset);
if (rc == MAP_FAILED) {
vubr_die("mmap");
}
dev->log_table = rc;
dev->log_size = log_mmap_size;
vmsg->size = sizeof(vmsg->payload.u64);
/* Reply */
return 1;
}
static int
vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
assert(vmsg->fd_num == 1);
dev->log_call_fd = vmsg->fds[0];
DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
return 0;
}
@ -777,6 +910,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
vq->log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->desc);
@ -803,8 +937,18 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
unsigned int index = vmsg->payload.state.index;
DPRINT("State.index: %d\n", index);
vmsg->payload.state.num = dev->vq[index].last_avail_index;
vmsg->size = sizeof(vmsg->payload.state);
/* FIXME: this is a work-around for a bug in QEMU enabling
* too early vrings. When protocol features are enabled,
* we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
dev->ready = 0;
/* Reply */
return 1;
}
static int
@ -829,7 +973,17 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
dev->vq[index].kick_fd, index);
}
/* We temporarily use this hack to determine that both TX and RX
* queues are set up and ready for processing.
* FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
* actual kicks. */
if (dev->vq[0].kick_fd != -1 &&
dev->vq[1].kick_fd != -1) {
dev->ready = 1;
DPRINT("vhost-user-bridge is ready for processing queues.\n");
}
return 0;
}
static int
@ -858,9 +1012,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
/* FIXME: unimplented */
vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
return 0;
vmsg->size = sizeof(vmsg->payload.u64);
/* Reply */
return 1;
}
static int
@ -881,7 +1038,12 @@ vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
static int
vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
unsigned int index = vmsg->payload.state.index;
unsigned int enable = vmsg->payload.state.num;
DPRINT("State.index: %d\n", index);
DPRINT("State.enable: %d\n", enable);
dev->vq[index].enable = enable;
return 0;
}
@ -987,7 +1149,7 @@ vubr_accept_cb(int sock, void *ctx)
socklen_t len = sizeof(un);
conn_fd = accept(sock, (struct sockaddr *) &un, &len);
if (conn_fd == -1) {
if (conn_fd == -1) {
vubr_die("accept()");
}
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
@ -1009,9 +1171,17 @@ vubr_new(const char *path)
.size = 0,
.last_avail_index = 0, .last_used_index = 0,
.desc = 0, .avail = 0, .used = 0,
.enable = 0,
};
}
/* Init log */
dev->log_call_fd = -1;
dev->log_size = 0;
dev->log_table = 0;
dev->ready = 0;
dev->features = 0;
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {

View File

@ -70,6 +70,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_MAX
} VhostUserRequest;
@ -315,8 +316,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
g_cond_signal(&s->data_cond);
break;
case VHOST_USER_RESET_OWNER:
s->fds_num = 0;
case VHOST_USER_SET_VRING_ENABLE:
if (!msg.payload.state.num) {
s->fds_num = 0;
}
break;
default:

28
vl.c
View File

@ -4288,14 +4288,23 @@ int main(int argc, char **argv, char **envp)
page_size_init();
socket_init();
if (qemu_opts_foreach(qemu_find_opts("object"),
object_create,
object_create_initial, NULL)) {
if (qemu_opts_foreach(qemu_find_opts("chardev"),
chardev_init_func, NULL, NULL)) {
exit(1);
}
if (qemu_opts_foreach(qemu_find_opts("chardev"),
chardev_init_func, NULL, NULL)) {
if (qtest_chrdev) {
Error *local_err = NULL;
qtest_init(qtest_chrdev, qtest_log, &local_err);
if (local_err) {
error_report_err(local_err);
exit(1);
}
}
if (qemu_opts_foreach(qemu_find_opts("object"),
object_create,
object_create_initial, NULL)) {
exit(1);
}
@ -4325,15 +4334,6 @@ int main(int argc, char **argv, char **envp)
configure_accelerator(current_machine);
if (qtest_chrdev) {
Error *local_err = NULL;
qtest_init(qtest_chrdev, qtest_log, &local_err);
if (local_err) {
error_report_err(local_err);
exit(1);
}
}
machine_opts = qemu_get_machine_opts();
kernel_filename = qemu_opt_get(machine_opts, "kernel");
initrd_filename = qemu_opt_get(machine_opts, "initrd");