2019-09-30 12:51:34 +02:00
|
|
|
/*
|
|
|
|
* Vhost-user filesystem virtio device
|
|
|
|
*
|
|
|
|
* Copyright 2018-2019 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
|
|
|
* (at your option) any later version. See the COPYING file in the
|
|
|
|
* top-level directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include "standard-headers/linux/virtio_fs.h"
|
|
|
|
#include "qapi/error.h"
|
|
|
|
#include "hw/qdev-properties.h"
|
2020-12-11 23:05:12 +01:00
|
|
|
#include "hw/qdev-properties-system.h"
|
2019-09-30 12:51:34 +02:00
|
|
|
#include "hw/virtio/virtio-bus.h"
|
|
|
|
#include "hw/virtio/virtio-access.h"
|
|
|
|
#include "qemu/error-report.h"
|
|
|
|
#include "hw/virtio/vhost-user-fs.h"
|
|
|
|
#include "monitor/monitor.h"
|
|
|
|
|
|
|
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
|
|
|
{
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
|
|
|
struct virtio_fs_config fscfg = {};
|
|
|
|
|
|
|
|
memcpy((char *)fscfg.tag, fs->conf.tag,
|
|
|
|
MIN(strlen(fs->conf.tag) + 1, sizeof(fscfg.tag)));
|
|
|
|
|
|
|
|
virtio_stl_p(vdev, &fscfg.num_request_queues, fs->conf.num_request_queues);
|
|
|
|
|
|
|
|
memcpy(config, &fscfg, sizeof(fscfg));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vuf_start(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
|
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
int ret;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!k->set_guest_notifiers) {
|
|
|
|
error_report("binding does not support guest notifiers");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = vhost_dev_enable_notifiers(&fs->vhost_dev, vdev);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report("Error enabling host notifiers: %d", -ret);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report("Error binding guest notifier: %d", -ret);
|
|
|
|
goto err_host_notifiers;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs->vhost_dev.acked_features = vdev->guest_features;
|
|
|
|
ret = vhost_dev_start(&fs->vhost_dev, vdev);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report("Error starting vhost: %d", -ret);
|
|
|
|
goto err_guest_notifiers;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* guest_notifier_mask/pending not used yet, so just unmask
|
|
|
|
* everything here. virtio-pci will do the right thing by
|
|
|
|
* enabling/disabling irqfd.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < fs->vhost_dev.nvqs; i++) {
|
|
|
|
vhost_virtqueue_mask(&fs->vhost_dev, vdev, i, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
err_guest_notifiers:
|
|
|
|
k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false);
|
|
|
|
err_host_notifiers:
|
|
|
|
vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vuf_stop(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
|
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!k->set_guest_notifiers) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vhost_dev_stop(&fs->vhost_dev, vdev);
|
|
|
|
|
|
|
|
ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report("vhost guest notifier cleanup failed: %d", ret);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vuf_set_status(VirtIODevice *vdev, uint8_t status)
|
|
|
|
{
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
|
|
|
bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
|
|
|
|
|
|
|
|
if (!vdev->vm_running) {
|
|
|
|
should_start = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fs->vhost_dev.started == should_start) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (should_start) {
|
|
|
|
vuf_start(vdev);
|
|
|
|
} else {
|
|
|
|
vuf_stop(vdev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t vuf_get_features(VirtIODevice *vdev,
|
|
|
|
uint64_t requested_features,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
/* No feature bits used yet */
|
|
|
|
return requested_features;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vuf_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Not normally called; it's the daemon that handles the queue;
|
|
|
|
* however virtio's cleanup path can call this.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx,
|
|
|
|
bool mask)
|
|
|
|
{
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
|
|
|
|
|
|
|
vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx)
|
|
|
|
{
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
|
|
|
|
|
|
|
return vhost_virtqueue_pending(&fs->vhost_dev, idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vuf_device_realize(DeviceState *dev, Error **errp)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(dev);
|
|
|
|
unsigned int i;
|
|
|
|
size_t len;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!fs->conf.chardev.chr) {
|
|
|
|
error_setg(errp, "missing chardev");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!fs->conf.tag) {
|
|
|
|
error_setg(errp, "missing tag property");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
len = strlen(fs->conf.tag);
|
|
|
|
if (len == 0) {
|
|
|
|
error_setg(errp, "tag property cannot be empty");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (len > sizeof_field(struct virtio_fs_config, tag)) {
|
|
|
|
error_setg(errp, "tag property must be %zu bytes or less",
|
|
|
|
sizeof_field(struct virtio_fs_config, tag));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fs->conf.num_request_queues == 0) {
|
|
|
|
error_setg(errp, "num-request-queues property must be larger than 0");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_power_of_2(fs->conf.queue_size)) {
|
|
|
|
error_setg(errp, "queue-size property must be a power of 2");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fs->conf.queue_size > VIRTQUEUE_MAX_SIZE) {
|
|
|
|
error_setg(errp, "queue-size property must be %u or smaller",
|
|
|
|
VIRTQUEUE_MAX_SIZE);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
|
|
|
|
sizeof(struct virtio_fs_config));
|
|
|
|
|
|
|
|
/* Hiprio queue */
|
2020-02-25 08:55:52 +01:00
|
|
|
fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
|
2019-09-30 12:51:34 +02:00
|
|
|
|
|
|
|
/* Request queues */
|
2020-02-25 08:55:52 +01:00
|
|
|
fs->req_vqs = g_new(VirtQueue *, fs->conf.num_request_queues);
|
2019-09-30 12:51:34 +02:00
|
|
|
for (i = 0; i < fs->conf.num_request_queues; i++) {
|
2020-02-25 08:55:52 +01:00
|
|
|
fs->req_vqs[i] = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
|
2019-09-30 12:51:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* 1 high prio queue, plus the number configured */
|
|
|
|
fs->vhost_dev.nvqs = 1 + fs->conf.num_request_queues;
|
|
|
|
fs->vhost_dev.vqs = g_new0(struct vhost_virtqueue, fs->vhost_dev.nvqs);
|
|
|
|
ret = vhost_dev_init(&fs->vhost_dev, &fs->vhost_user,
|
|
|
|
VHOST_BACKEND_TYPE_USER, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "vhost_dev_init failed");
|
|
|
|
goto err_virtio;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
err_virtio:
|
|
|
|
vhost_user_cleanup(&fs->vhost_user);
|
2020-02-25 08:55:52 +01:00
|
|
|
virtio_delete_queue(fs->hiprio_vq);
|
2020-02-25 08:55:51 +01:00
|
|
|
for (i = 0; i < fs->conf.num_request_queues; i++) {
|
2020-02-25 08:55:52 +01:00
|
|
|
virtio_delete_queue(fs->req_vqs[i]);
|
2020-02-25 08:55:51 +01:00
|
|
|
}
|
2020-02-25 08:55:52 +01:00
|
|
|
g_free(fs->req_vqs);
|
2019-09-30 12:51:34 +02:00
|
|
|
virtio_cleanup(vdev);
|
|
|
|
g_free(fs->vhost_dev.vqs);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 17:29:24 +02:00
|
|
|
static void vuf_device_unrealize(DeviceState *dev)
|
2019-09-30 12:51:34 +02:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VHostUserFS *fs = VHOST_USER_FS(dev);
|
2020-02-25 08:55:51 +01:00
|
|
|
int i;
|
2019-09-30 12:51:34 +02:00
|
|
|
|
|
|
|
/* This will stop vhost backend if appropriate. */
|
|
|
|
vuf_set_status(vdev, 0);
|
|
|
|
|
|
|
|
vhost_dev_cleanup(&fs->vhost_dev);
|
|
|
|
|
|
|
|
vhost_user_cleanup(&fs->vhost_user);
|
|
|
|
|
2020-02-25 08:55:52 +01:00
|
|
|
virtio_delete_queue(fs->hiprio_vq);
|
2020-02-25 08:55:51 +01:00
|
|
|
for (i = 0; i < fs->conf.num_request_queues; i++) {
|
2020-02-25 08:55:52 +01:00
|
|
|
virtio_delete_queue(fs->req_vqs[i]);
|
2020-02-25 08:55:51 +01:00
|
|
|
}
|
2020-02-25 08:55:52 +01:00
|
|
|
g_free(fs->req_vqs);
|
2019-09-30 12:51:34 +02:00
|
|
|
virtio_cleanup(vdev);
|
|
|
|
g_free(fs->vhost_dev.vqs);
|
|
|
|
fs->vhost_dev.vqs = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vuf_vmstate = {
|
|
|
|
.name = "vhost-user-fs",
|
|
|
|
.unmigratable = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static Property vuf_properties[] = {
|
|
|
|
DEFINE_PROP_CHR("chardev", VHostUserFS, conf.chardev),
|
|
|
|
DEFINE_PROP_STRING("tag", VHostUserFS, conf.tag),
|
|
|
|
DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
|
|
|
|
conf.num_request_queues, 1),
|
|
|
|
DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
|
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vuf_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
|
|
|
|
|
2020-01-10 16:30:32 +01:00
|
|
|
device_class_set_props(dc, vuf_properties);
|
2019-09-30 12:51:34 +02:00
|
|
|
dc->vmsd = &vuf_vmstate;
|
|
|
|
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
|
|
|
|
vdc->realize = vuf_device_realize;
|
|
|
|
vdc->unrealize = vuf_device_unrealize;
|
|
|
|
vdc->get_features = vuf_get_features;
|
|
|
|
vdc->get_config = vuf_get_config;
|
|
|
|
vdc->set_status = vuf_set_status;
|
|
|
|
vdc->guest_notifier_mask = vuf_guest_notifier_mask;
|
|
|
|
vdc->guest_notifier_pending = vuf_guest_notifier_pending;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo vuf_info = {
|
|
|
|
.name = TYPE_VHOST_USER_FS,
|
|
|
|
.parent = TYPE_VIRTIO_DEVICE,
|
|
|
|
.instance_size = sizeof(VHostUserFS),
|
|
|
|
.class_init = vuf_class_init,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vuf_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&vuf_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(vuf_register_types)
|