virtio, pc, pci: features, fixes, cleanups

virtio-pmem support.
 libvhost user mq support.
 A bunch of fixes all over the place.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEbBAABAgAGBQJdHmkBAAoJECgfDbjSjVRpEAIH+Kmy8n5Et9NzsnmNqHAiC/pg
 3V5wGyp9M4ZJVPXC0z/Q1sYJ3YYP6dBd4tjj2/7LzYZSlqlQIs83UlQCo0XTiliH
 /jZD/IaAZABnfB7vAeZW67WNT2a20xG2Jr83083lSaDUI/pfIdvbMelIbBLmo/kd
 tWdAAWT0kcGYjyz4xQQgtAH6zAQUleKE7ECUJ2TpJQbSMLxdI/YTaoYqek471YdP
 ju5OLBO3WbNkSE9JYz4MJqTudYK0sKu568UqBVF8JdpFd5Cv+X/OI+bCsc4QK8KN
 DTtFVVvbm1KGPSceqc9rwsDjO4Wd8ThvuZxrB029AahD6vT82F13IHpi/S29Fw==
 =WAFb
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio, pc, pci: features, fixes, cleanups

virtio-pmem support.
libvhost user mq support.
A bunch of fixes all over the place.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 04 Jul 2019 22:00:49 BST
# gpg:                using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (22 commits)
  docs: avoid vhost-user-net specifics in multiqueue section
  libvhost-user: implement VHOST_USER_PROTOCOL_F_MQ
  libvhost-user: support many virtqueues
  libvhost-user: add vmsg_set_reply_u64() helper
  pc: Move compat_apic_id_mode variable to PCMachineClass
  virtio: Don't change "started" flag on virtio_vmstate_change()
  virtio: Make sure we get correct state of device on handle_aio_output()
  virtio: Set "start_on_kick" on virtio_set_features()
  virtio: Set "start_on_kick" for legacy devices
  virtio: add "use-started" property
  virtio-pci: fix missing device properties
  pc: Support for virtio-pmem-pci
  numa: Handle virtio-pmem in NUMA stats
  hmp: Handle virtio-pmem when printing memory device infos
  virtio-pci: Proxy for virtio-pmem
  virtio-pmem: sync linux headers
  virtio-pci: Allow to specify additional interfaces for the base type
  virtio-pmem: add virtio device
  pcie: minor cleanups for slot control/status
  pcie: work around for racy guest init
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-07-05 09:51:50 +01:00
commit c35d17cabc
36 changed files with 843 additions and 161 deletions

View File

@ -131,18 +131,24 @@ static void vug_watch(VuDev *dev, int condition, void *data)
}
}
void
vug_init(VugDev *dev, int socket,
bool
vug_init(VugDev *dev, uint16_t max_queues, int socket,
vu_panic_cb panic, const VuDevIface *iface)
{
g_assert(dev);
g_assert(iface);
vu_init(&dev->parent, socket, panic, set_watch, remove_watch, iface);
if (!vu_init(&dev->parent, max_queues, socket, panic, set_watch,
remove_watch, iface)) {
return false;
}
dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL,
(GDestroyNotify) g_source_destroy);
dev->src = vug_source_new(dev, socket, G_IO_IN, vug_watch, NULL);
return true;
}
void

View File

@ -25,7 +25,7 @@ typedef struct VugDev {
GSource *src;
} VugDev;
void vug_init(VugDev *dev, int socket,
bool vug_init(VugDev *dev, uint16_t max_queues, int socket,
vu_panic_cb panic, const VuDevIface *iface);
void vug_deinit(VugDev *dev);

View File

@ -216,6 +216,15 @@ vmsg_close_fds(VhostUserMsg *vmsg)
}
}
/* Set reply payload.u64 and clear request flags and fd_num */
static void vmsg_set_reply_u64(VhostUserMsg *vmsg, uint64_t val)
{
vmsg->flags = 0; /* defaults will be set by vu_send_reply() */
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->payload.u64 = val;
vmsg->fd_num = 0;
}
/* A test to see if we have userfault available */
static bool
have_userfault(void)
@ -484,9 +493,9 @@ vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
static void
vu_set_enable_all_rings(VuDev *dev, bool enabled)
{
int i;
uint16_t i;
for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
for (i = 0; i < dev->max_queues; i++) {
dev->vq[i].enable = enabled;
}
}
@ -907,7 +916,7 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (index >= VHOST_MAX_NR_VIRTQUEUE) {
if (index >= dev->max_queues) {
vmsg_close_fds(vmsg);
vu_panic(dev, "Invalid queue index: %u", index);
return false;
@ -1151,7 +1160,8 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
static bool
vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ |
1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD;
@ -1168,10 +1178,7 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
features |= dev->iface->get_protocol_features(dev);
}
vmsg->payload.u64 = features;
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->fd_num = 0;
vmsg_set_reply_u64(vmsg, features);
return true;
}
@ -1194,8 +1201,8 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
static bool
vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return false;
vmsg_set_reply_u64(vmsg, dev->max_queues);
return true;
}
static bool
@ -1207,7 +1214,7 @@ vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
DPRINT("State.index: %d\n", index);
DPRINT("State.enable: %d\n", enable);
if (index >= VHOST_MAX_NR_VIRTQUEUE) {
if (index >= dev->max_queues) {
vu_panic(dev, "Invalid vring_enable index: %u", index);
return false;
}
@ -1307,17 +1314,14 @@ out:
static bool
vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 = -1;
vmsg->size = sizeof(vmsg->payload.u64);
if (dev->nregions) {
vu_panic(dev, "Regions already registered at postcopy-listen");
vmsg_set_reply_u64(vmsg, -1);
return true;
}
dev->postcopy_listening = true;
vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK;
vmsg->payload.u64 = 0; /* Success */
vmsg_set_reply_u64(vmsg, 0);
return true;
}
@ -1332,10 +1336,7 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
DPRINT("%s: Done close\n", __func__);
}
vmsg->fd_num = 0;
vmsg->payload.u64 = 0;
vmsg->size = sizeof(vmsg->payload.u64);
vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK;
vmsg_set_reply_u64(vmsg, 0);
DPRINT("%s: exit\n", __func__);
return true;
}
@ -1582,7 +1583,7 @@ vu_deinit(VuDev *dev)
}
dev->nregions = 0;
for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
for (i = 0; i < dev->max_queues; i++) {
VuVirtq *vq = &dev->vq[i];
if (vq->call_fd != -1) {
@ -1627,18 +1628,23 @@ vu_deinit(VuDev *dev)
if (dev->sock != -1) {
close(dev->sock);
}
free(dev->vq);
dev->vq = NULL;
}
void
bool
vu_init(VuDev *dev,
uint16_t max_queues,
int socket,
vu_panic_cb panic,
vu_set_watch_cb set_watch,
vu_remove_watch_cb remove_watch,
const VuDevIface *iface)
{
int i;
uint16_t i;
assert(max_queues > 0);
assert(socket >= 0);
assert(set_watch);
assert(remove_watch);
@ -1654,18 +1660,28 @@ vu_init(VuDev *dev,
dev->iface = iface;
dev->log_call_fd = -1;
dev->slave_fd = -1;
for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
dev->max_queues = max_queues;
dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
if (!dev->vq) {
DPRINT("%s: failed to malloc virtqueues\n", __func__);
return false;
}
for (i = 0; i < max_queues; i++) {
dev->vq[i] = (VuVirtq) {
.call_fd = -1, .kick_fd = -1, .err_fd = -1,
.notification = true,
};
}
return true;
}
VuVirtq *
vu_get_queue(VuDev *dev, int qidx)
{
assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
assert(qidx < dev->max_queues);
return &dev->vq[qidx];
}

View File

@ -25,7 +25,6 @@
#define VHOST_USER_F_PROTOCOL_FEATURES 30
#define VHOST_LOG_PAGE 4096
#define VHOST_MAX_NR_VIRTQUEUE 8
#define VIRTQUEUE_MAX_SIZE 1024
#define VHOST_MEMORY_MAX_NREGIONS 8
@ -353,7 +352,7 @@ struct VuDev {
int sock;
uint32_t nregions;
VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
VuVirtq *vq;
VuDevInflightInfo inflight_info;
int log_call_fd;
int slave_fd;
@ -362,6 +361,7 @@ struct VuDev {
uint64_t features;
uint64_t protocol_features;
bool broken;
uint16_t max_queues;
/* @set_watch: add or update the given fd to the watch set,
* call cb when condition is met */
@ -391,6 +391,7 @@ typedef struct VuVirtqElement {
/**
* vu_init:
* @dev: a VuDev context
* @max_queues: maximum number of virtqueues
* @socket: the socket connected to vhost-user master
* @panic: a panic callback
* @set_watch: a set_watch callback
@ -398,8 +399,11 @@ typedef struct VuVirtqElement {
* @iface: a VuDevIface structure with vhost-user device callbacks
*
* Intializes a VuDev vhost-user context.
*
* Returns: true on success, false on failure.
**/
void vu_init(VuDev *dev,
bool vu_init(VuDev *dev,
uint16_t max_queues,
int socket,
vu_panic_cb panic,
vu_set_watch_cb set_watch,

View File

@ -25,6 +25,10 @@
#include <sys/ioctl.h>
#endif
enum {
VHOST_USER_BLK_MAX_QUEUES = 8,
};
struct virtio_blk_inhdr {
unsigned char status;
};
@ -334,12 +338,6 @@ static void vub_process_vq(VuDev *vu_dev, int idx)
VuVirtq *vq;
int ret;
if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
fprintf(stderr, "VQ Index out of range: %d\n", idx);
vub_panic_cb(vu_dev, NULL);
return;
}
gdev = container_of(vu_dev, VugDev, parent);
vdev_blk = container_of(gdev, VubDev, parent);
assert(vdev_blk);
@ -631,7 +629,11 @@ int main(int argc, char **argv)
vdev_blk->enable_ro = true;
}
vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
vub_panic_cb, &vub_iface)) {
fprintf(stderr, "Failed to initialized libvhost-user-glib\n");
goto err;
}
g_main_loop_run(vdev_blk->loop);

View File

@ -25,6 +25,10 @@
#include "virgl.h"
#include "vugbm.h"
enum {
VHOST_USER_GPU_MAX_QUEUES = 2,
};
struct virtio_gpu_simple_resource {
uint32_t resource_id;
uint32_t width;
@ -1169,7 +1173,10 @@ main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
vug_init(&g.dev, fd, vg_panic, &vuiface);
if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) {
g_printerr("Failed to initialize libvhost-user-glib.\n");
exit(EXIT_FAILURE);
}
loop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(loop);

View File

@ -17,6 +17,10 @@
#include "standard-headers/linux/virtio_input.h"
#include "qapi/error.h"
enum {
VHOST_USER_INPUT_MAX_QUEUES = 2,
};
typedef struct virtio_input_event virtio_input_event;
typedef struct virtio_input_config virtio_input_config;
@ -384,7 +388,12 @@ main(int argc, char *argv[])
g_printerr("Invalid vhost-user socket.\n");
exit(EXIT_FAILURE);
}
vug_init(&vi.dev, fd, vi_panic, &vuiface);
if (!vug_init(&vi.dev, VHOST_USER_INPUT_MAX_QUEUES, fd, vi_panic,
&vuiface)) {
g_printerr("Failed to initialize libvhost-user-glib.\n");
exit(EXIT_FAILURE);
}
loop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(loop);

View File

@ -19,6 +19,10 @@
#define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi"
enum {
VHOST_USER_SCSI_MAX_QUEUES = 8,
};
typedef struct VusIscsiLun {
struct iscsi_context *iscsi_ctx;
int iscsi_lun;
@ -231,11 +235,6 @@ static void vus_proc_req(VuDev *vu_dev, int idx)
gdev = container_of(vu_dev, VugDev, parent);
vdev_scsi = container_of(gdev, VusDev, parent);
if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) {
g_warning("VQ Index out of range: %d", idx);
vus_panic_cb(vu_dev, NULL);
return;
}
vq = vu_get_queue(vu_dev, idx);
if (!vq) {
@ -295,12 +294,6 @@ static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started)
assert(vu_dev);
if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) {
g_warning("VQ Index out of range: %d", idx);
vus_panic_cb(vu_dev, NULL);
return;
}
vq = vu_get_queue(vu_dev, idx);
if (idx == 0 || idx == 1) {
@ -398,7 +391,11 @@ int main(int argc, char **argv)
goto err;
}
vug_init(&vdev_scsi->parent, csock, vus_panic_cb, &vus_iface);
if (!vug_init(&vdev_scsi->parent, VHOST_USER_SCSI_MAX_QUEUES, csock,
vus_panic_cb, &vus_iface)) {
g_printerr("Failed to initialize libvhost-user-glib\n");
goto err;
}
g_main_loop_run(vdev_scsi->loop);

View File

@ -324,19 +324,20 @@ must support changing some configuration aspects on the fly.
Multiple queue support
----------------------
Multiple queue is treated as a protocol extension, hence the slave has
to implement protocol features first. The multiple queues feature is
supported only when the protocol feature ``VHOST_USER_PROTOCOL_F_MQ``
(bit 0) is set.
Multiple queue support allows the slave to advertise the maximum number of
queues. This is treated as a protocol extension, hence the slave has to
implement protocol features first. The multiple queues feature is supported
only when the protocol feature ``VHOST_USER_PROTOCOL_F_MQ`` (bit 0) is set.
The max number of queue pairs the slave supports can be queried with
message ``VHOST_USER_GET_QUEUE_NUM``. Master should stop when the
number of requested queues is bigger than that.
The max number of queues the slave supports can be queried with message
``VHOST_USER_GET_QUEUE_NUM``. Master should stop when the number of requested
queues is bigger than that.
As all queues share one connection, the master uses a unique index for each
queue in the sent message to identify a specified queue. One queue pair
is enabled initially. More queues are enabled dynamically, by sending
message ``VHOST_USER_SET_VRING_ENABLE``.
queue in the sent message to identify a specified queue.
The master enables queues by sending message ``VHOST_USER_SET_VRING_ENABLE``.
vhost-user-net has historically automatically enabled the first queue pair.
Migration
---------

View File

@ -191,7 +191,7 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
bool should_start = vdev->started;
bool should_start = virtio_device_started(vdev, status);
int ret;
if (!vdev->vm_running) {
@ -317,7 +317,7 @@ static int vhost_user_blk_connect(DeviceState *dev)
}
/* restore vhost state */
if (vdev->started) {
if (virtio_device_started(vdev, vdev->status)) {
ret = vhost_user_blk_start(vdev);
if (ret < 0) {
error_report("vhost-user-blk: vhost start failed: %s",

View File

@ -30,6 +30,7 @@ GlobalProperty hw_compat_4_0[] = {
{ "bochs-display", "edid", "false" },
{ "virtio-vga", "edid", "false" },
{ "virtio-gpu-pci", "edid", "false" },
{ "virtio-device", "use-started", "false" },
};
const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);

View File

@ -533,6 +533,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
MemoryDeviceInfoList *info_list = qmp_memory_device_list();
MemoryDeviceInfoList *info;
PCDIMMDeviceInfo *pcdimm_info;
VirtioPMEMDeviceInfo *vpi;
for (info = info_list; info; info = info->next) {
MemoryDeviceInfo *value = info->value;
@ -540,22 +541,21 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
if (value) {
switch (value->type) {
case MEMORY_DEVICE_INFO_KIND_DIMM:
pcdimm_info = value->u.dimm.data;
break;
case MEMORY_DEVICE_INFO_KIND_NVDIMM:
pcdimm_info = value->u.nvdimm.data;
break;
default:
pcdimm_info = NULL;
break;
}
if (pcdimm_info) {
pcdimm_info = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ?
value->u.dimm.data : value->u.nvdimm.data;
node_mem[pcdimm_info->node].node_mem += pcdimm_info->size;
node_mem[pcdimm_info->node].node_plugged_mem +=
pcdimm_info->size;
break;
case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM:
vpi = value->u.virtio_pmem.data;
/* TODO: once we support numa, assign to right node */
node_mem[0].node_mem += vpi->size;
node_mem[0].node_plugged_mem += vpi->size;
break;
default:
g_assert_not_reached();
}
}
}

View File

@ -30,6 +30,7 @@ config PC
# For ACPI builder:
select SERIAL_ISA
select ACPI_VMGENID
select VIRTIO_PMEM_SUPPORTED
config PC_PCI
bool

View File

@ -79,6 +79,8 @@
#include "hw/i386/intel_iommu.h"
#include "hw/net/ne2000-isa.h"
#include "standard-headers/asm-x86/bootparam.h"
#include "hw/virtio/virtio-pmem-pci.h"
#include "hw/mem/memory-device.h"
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ
@ -913,14 +915,6 @@ bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
return false;
}
/* Enables contiguous-apic-ID mode, for compatibility */
static bool compat_apic_id_mode;
void enable_compat_apic_id_mode(void)
{
compat_apic_id_mode = true;
}
/* Calculates initial APIC ID for a specific CPU index
*
* Currently we need to be able to calculate the APIC ID from the CPU index
@ -928,13 +922,15 @@ void enable_compat_apic_id_mode(void)
* no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
* all CPUs up to max_cpus.
*/
static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms,
unsigned int cpu_index)
{
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
uint32_t correct_id;
static bool warned;
correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index);
if (compat_apic_id_mode) {
if (pcmc->compat_apic_id_mode) {
if (cpu_index != correct_id && !warned && !qtest_enabled()) {
error_report("APIC IDs set in compatibility mode, "
"CPU topology won't match the configuration");
@ -1533,7 +1529,8 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp)
void pc_hot_add_cpu(const int64_t id, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
int64_t apic_id = x86_cpu_apic_id_from_index(id);
PCMachineState *pcms = PC_MACHINE(ms);
int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id);
Error *local_err = NULL;
if (id < 0) {
@ -1569,7 +1566,7 @@ void pc_cpus_init(PCMachineState *pcms)
*
* This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
*/
pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, max_cpus - 1) + 1;
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < smp_cpus; i++) {
pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id,
@ -2395,6 +2392,65 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
numa_cpu_pre_plug(cpu_slot, dev, errp);
}
static void pc_virtio_pmem_pci_pre_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
Error *local_err = NULL;
if (!hotplug_dev2) {
/*
* Without a bus hotplug handler, we cannot control the plug/unplug
* order. This should never be the case on x86, however better add
* a safety net.
*/
error_setg(errp, "virtio-pmem-pci not supported on this bus.");
return;
}
/*
* First, see if we can plug this memory device at all. If that
* succeeds, branch of to the actual hotplug handler.
*/
memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
&local_err);
if (!local_err) {
hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err);
}
error_propagate(errp, local_err);
}
static void pc_virtio_pmem_pci_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
Error *local_err = NULL;
/*
* Plug the memory device first and then branch off to the actual
* hotplug handler. If that one fails, we can easily undo the memory
* device bits.
*/
memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
hotplug_handler_plug(hotplug_dev2, dev, &local_err);
if (local_err) {
memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
}
error_propagate(errp, local_err);
}
static void pc_virtio_pmem_pci_unplug_request(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
/* We don't support virtio pmem hot unplug */
error_setg(errp, "virtio pmem device unplug not supported.");
}
static void pc_virtio_pmem_pci_unplug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
/* We don't support virtio pmem hot unplug */
}
static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@ -2402,6 +2458,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
pc_memory_pre_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_pre_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
pc_virtio_pmem_pci_pre_plug(hotplug_dev, dev, errp);
}
}
@ -2412,6 +2470,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
pc_memory_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
pc_virtio_pmem_pci_plug(hotplug_dev, dev, errp);
}
}
@ -2422,6 +2482,8 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
pc_memory_unplug_request(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_unplug_request_cb(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
pc_virtio_pmem_pci_unplug_request(hotplug_dev, dev, errp);
} else {
error_setg(errp, "acpi: device unplug request for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
@ -2435,6 +2497,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
pc_memory_unplug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_unplug_cb(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
pc_virtio_pmem_pci_unplug(hotplug_dev, dev, errp);
} else {
error_setg(errp, "acpi: device unplug for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
@ -2445,7 +2509,8 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
DeviceState *dev)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
return HOTPLUG_HANDLER(machine);
}
@ -2660,6 +2725,7 @@ static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx)
static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
{
PCMachineState *pcms = PC_MACHINE(ms);
int i;
if (ms->possible_cpus) {
@ -2679,7 +2745,7 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
ms->possible_cpus->cpus[i].type = ms->cpu_type;
ms->possible_cpus->cpus[i].vcpus_count = 1;
ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i);
x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
smp_cores, smp_threads, &topo);
ms->possible_cpus->cpus[i].props.has_socket_id = true;

View File

@ -358,7 +358,6 @@ static void pc_compat_1_4_fn(MachineState *machine)
static void pc_compat_1_3(MachineState *machine)
{
pc_compat_1_4_fn(machine);
enable_compat_apic_id_mode();
}
/* PC compat function for pc-0.14 to pc-1.2 */
@ -708,6 +707,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn,
static void pc_i440fx_1_3_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
static GlobalProperty compat[] = {
PC_CPU_MODEL_IDS("1.3.0")
{ "usb-tablet", "usb_version", "1" },
@ -718,6 +718,7 @@ static void pc_i440fx_1_3_machine_options(MachineClass *m)
pc_i440fx_1_4_machine_options(m);
m->hw_version = "1.3.0";
pcmc->compat_apic_id_mode = true;
compat_props_add(m->compat_props, compat, G_N_ELEMENTS(compat));
}

View File

@ -31,10 +31,13 @@ static void rp_write_config(PCIDevice *d, uint32_t address,
{
uint32_t root_cmd =
pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND);
uint16_t slt_ctl, slt_sta;
pcie_cap_slot_get(d, &slt_ctl, &slt_sta);
pci_bridge_write_config(d, address, val, len);
rp_aer_vector_update(d);
pcie_cap_slot_write_config(d, address, val, len);
pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
pcie_aer_write_config(d, address, val, len);
pcie_aer_root_write_config(d, address, val, len, root_cmd);
}

View File

@ -41,9 +41,12 @@
static void xio3130_downstream_write_config(PCIDevice *d, uint32_t address,
uint32_t val, int len)
{
uint16_t slt_ctl, slt_sta;
pcie_cap_slot_get(d, &slt_sta, &slt_ctl);
pci_bridge_write_config(d, address, val, len);
pcie_cap_flr_write_config(d, address, val, len);
pcie_cap_slot_write_config(d, address, val, len);
pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
pcie_aer_write_config(d, address, val, len);
}

View File

@ -383,7 +383,7 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event)
{
/* Minor optimization: if nothing changed - no event is needed. */
if (pci_word_test_and_set_mask(dev->config + dev->exp.exp_cap +
PCI_EXP_SLTSTA, event)) {
PCI_EXP_SLTSTA, event) == event) {
return;
}
hotplug_event_notify(dev);
@ -594,7 +594,16 @@ void pcie_cap_slot_reset(PCIDevice *dev)
hotplug_event_update_event_status(dev);
}
void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta)
{
uint32_t pos = dev->exp.exp_cap;
uint8_t *exp_cap = dev->config + pos;
*slt_ctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
*slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
}
void pcie_cap_slot_write_config(PCIDevice *dev,
uint16_t old_slt_ctl, uint16_t old_slt_sta,
uint32_t addr, uint32_t val, int len)
{
uint32_t pos = dev->exp.exp_cap;
@ -602,6 +611,25 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
/*
* Guests tend to clears all bits during init.
* If they clear bits that weren't set this is racy and will lose events:
* not a big problem for manual button presses, but a problem for us.
* As a work-around, detect this and revert status to what it was
* before the write.
*
* Note: in theory this can be detected as a duplicate button press
* which cancels the previous press. Does not seem to happen in
* practice as guests seem to only have this bug during init.
*/
#define PCIE_SLOT_EVENTS (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | \
PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | \
PCI_EXP_SLTSTA_CC)
if (val & ~old_slt_sta & PCIE_SLOT_EVENTS) {
sltsta = (sltsta & ~PCIE_SLOT_EVENTS) | (old_slt_sta & PCIE_SLOT_EVENTS);
pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta);
}
hotplug_event_clear(dev);
}
@ -619,11 +647,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
}
/*
* If the slot is polulated, power indicator is off and power
* If the slot is populated, power indicator is off and power
* controller is off, it is safe to detach the devices.
*
* Note: don't detach if condition was already true:
* this is a work around for guests that overwrite
* control of powered off slots before powering them on.
*/
if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
((val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF)) {
(val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
(!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
(old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
pcie_unplug_device, NULL);

View File

@ -29,3 +29,13 @@ config VIRTIO_CRYPTO
bool
default y
depends on VIRTIO
config VIRTIO_PMEM_SUPPORTED
bool
config VIRTIO_PMEM
bool
default y
depends on VIRTIO
depends on VIRTIO_PMEM_SUPPORTED
select MEM_DEVICE

View File

@ -12,6 +12,8 @@ common-obj-$(CONFIG_VIRTIO_MMIO) += virtio-mmio.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon.o
obj-$(CONFIG_VIRTIO_CRYPTO) += virtio-crypto.o
obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-pci.o
obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o
common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o
obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o
ifeq ($(CONFIG_VIRTIO_PCI),y)

View File

@ -1913,13 +1913,6 @@ static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
dc->props = virtio_pci_generic_properties;
}
/* Used when the generic type and the base type is the same */
static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data)
{
virtio_pci_base_class_init(klass, data);
virtio_pci_generic_class_init(klass, NULL);
}
static void virtio_pci_transitional_instance_init(Object *obj)
{
VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
@ -1938,15 +1931,15 @@ static void virtio_pci_non_transitional_instance_init(Object *obj)
void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
{
char *base_name = NULL;
TypeInfo base_type_info = {
.name = t->base_name,
.parent = t->parent ? t->parent : TYPE_VIRTIO_PCI,
.instance_size = t->instance_size,
.instance_init = t->instance_init,
.class_size = t->class_size,
.class_init = virtio_pci_base_class_init,
.class_data = (void *)t,
.abstract = true,
.interfaces = t->interfaces,
};
TypeInfo generic_type_info = {
.name = t->generic_name,
@ -1961,13 +1954,20 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
if (!base_type_info.name) {
/* No base type -> register a single generic device type */
base_type_info.name = t->generic_name;
base_type_info.class_init = virtio_pci_generic_base_class_init;
base_type_info.interfaces = generic_type_info.interfaces;
base_type_info.abstract = false;
generic_type_info.name = NULL;
/* use intermediate %s-base-type to add generic device props */
base_name = g_strdup_printf("%s-base-type", t->generic_name);
base_type_info.name = base_name;
base_type_info.class_init = virtio_pci_generic_class_init;
generic_type_info.parent = base_name;
generic_type_info.class_init = virtio_pci_base_class_init;
generic_type_info.class_data = (void *)t;
assert(!t->non_transitional_name);
assert(!t->transitional_name);
} else {
base_type_info.class_init = virtio_pci_base_class_init;
base_type_info.class_data = (void *)t;
}
type_register(&base_type_info);
@ -2005,6 +2005,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
};
type_register(&transitional_type_info);
}
g_free(base_name);
}
/* virtio-pci-bus */

View File

@ -252,6 +252,7 @@ typedef struct VirtioPCIDeviceTypeInfo {
size_t class_size;
void (*instance_init)(Object *obj);
void (*class_init)(ObjectClass *klass, void *data);
InterfaceInfo *interfaces;
} VirtioPCIDeviceTypeInfo;
/* Register virtio-pci type(s). @t must be static. */

131
hw/virtio/virtio-pmem-pci.c Normal file
View File

@ -0,0 +1,131 @@
/*
* Virtio PMEM PCI device
*
* Copyright (C) 2018-2019 Red Hat, Inc.
*
* Authors:
* Pankaj Gupta <pagupta@redhat.com>
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "virtio-pmem-pci.h"
#include "hw/mem/memory-device.h"
#include "qapi/error.h"
static void virtio_pmem_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
VirtIOPMEMPCI *pmem_pci = VIRTIO_PMEM_PCI(vpci_dev);
DeviceState *vdev = DEVICE(&pmem_pci->vdev);
qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
object_property_set_bool(OBJECT(vdev), true, "realized", errp);
}
static void virtio_pmem_pci_set_addr(MemoryDeviceState *md, uint64_t addr,
Error **errp)
{
object_property_set_uint(OBJECT(md), addr, VIRTIO_PMEM_ADDR_PROP, errp);
}
static uint64_t virtio_pmem_pci_get_addr(const MemoryDeviceState *md)
{
return object_property_get_uint(OBJECT(md), VIRTIO_PMEM_ADDR_PROP,
&error_abort);
}
static MemoryRegion *virtio_pmem_pci_get_memory_region(MemoryDeviceState *md,
Error **errp)
{
VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md);
VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev);
VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem);
return vpc->get_memory_region(pmem, errp);
}
static uint64_t virtio_pmem_pci_get_plugged_size(const MemoryDeviceState *md,
Error **errp)
{
VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md);
VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev);
VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem);
MemoryRegion *mr = vpc->get_memory_region(pmem, errp);
/* the plugged size corresponds to the region size */
return mr ? 0 : memory_region_size(mr);
}
static void virtio_pmem_pci_fill_device_info(const MemoryDeviceState *md,
MemoryDeviceInfo *info)
{
VirtioPMEMDeviceInfo *vi = g_new0(VirtioPMEMDeviceInfo, 1);
VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md);
VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev);
VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem);
DeviceState *dev = DEVICE(md);
if (dev->id) {
vi->has_id = true;
vi->id = g_strdup(dev->id);
}
/* let the real device handle everything else */
vpc->fill_device_info(pmem, vi);
info->u.virtio_pmem.data = vi;
info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM;
}
static void virtio_pmem_pci_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
k->realize = virtio_pmem_pci_realize;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PMEM;
pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
pcidev_k->class_id = PCI_CLASS_OTHERS;
mdc->get_addr = virtio_pmem_pci_get_addr;
mdc->set_addr = virtio_pmem_pci_set_addr;
mdc->get_plugged_size = virtio_pmem_pci_get_plugged_size;
mdc->get_memory_region = virtio_pmem_pci_get_memory_region;
mdc->fill_device_info = virtio_pmem_pci_fill_device_info;
}
static void virtio_pmem_pci_instance_init(Object *obj)
{
VirtIOPMEMPCI *dev = VIRTIO_PMEM_PCI(obj);
virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
TYPE_VIRTIO_PMEM);
}
static const VirtioPCIDeviceTypeInfo virtio_pmem_pci_info = {
.base_name = TYPE_VIRTIO_PMEM_PCI,
.generic_name = "virtio-pmem-pci",
.transitional_name = "virtio-pmem-pci-transitional",
.non_transitional_name = "virtio-pmem-pci-non-transitional",
.instance_size = sizeof(VirtIOPMEMPCI),
.instance_init = virtio_pmem_pci_instance_init,
.class_init = virtio_pmem_pci_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_MEMORY_DEVICE },
{ }
},
};
static void virtio_pmem_pci_register_types(void)
{
virtio_pci_types_register(&virtio_pmem_pci_info);
}
type_init(virtio_pmem_pci_register_types)

View File

@ -0,0 +1,34 @@
/*
* Virtio PMEM PCI device
*
* Copyright (C) 2018-2019 Red Hat, Inc.
*
* Authors:
* Pankaj Gupta <pagupta@redhat.com>
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_VIRTIO_PMEM_PCI_H
#define QEMU_VIRTIO_PMEM_PCI_H
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-pmem.h"
typedef struct VirtIOPMEMPCI VirtIOPMEMPCI;
/*
* virtio-pmem-pci: This extends VirtioPCIProxy.
*/
#define TYPE_VIRTIO_PMEM_PCI "virtio-pmem-pci-base"
#define VIRTIO_PMEM_PCI(obj) \
OBJECT_CHECK(VirtIOPMEMPCI, (obj), TYPE_VIRTIO_PMEM_PCI)
struct VirtIOPMEMPCI {
VirtIOPCIProxy parent_obj;
VirtIOPMEM vdev;
};
#endif /* QEMU_VIRTIO_PMEM_PCI_H */

189
hw/virtio/virtio-pmem.c Normal file
View File

@ -0,0 +1,189 @@
/*
* Virtio PMEM device
*
* Copyright (C) 2018-2019 Red Hat, Inc.
*
* Authors:
* Pankaj Gupta <pagupta@redhat.com>
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "hw/virtio/virtio-pmem.h"
#include "hw/virtio/virtio-access.h"
#include "standard-headers/linux/virtio_ids.h"
#include "standard-headers/linux/virtio_pmem.h"
#include "block/aio.h"
#include "block/thread-pool.h"
typedef struct VirtIODeviceRequest {
VirtQueueElement elem;
int fd;
VirtIOPMEM *pmem;
VirtIODevice *vdev;
struct virtio_pmem_req req;
struct virtio_pmem_resp resp;
} VirtIODeviceRequest;
static int worker_cb(void *opaque)
{
VirtIODeviceRequest *req_data = opaque;
int err = 0;
/* flush raw backing image */
err = fsync(req_data->fd);
if (err != 0) {
err = 1;
}
virtio_stw_p(req_data->vdev, &req_data->resp.ret, err);
return 0;
}
static void done_cb(void *opaque, int ret)
{
VirtIODeviceRequest *req_data = opaque;
int len = iov_from_buf(req_data->elem.in_sg, req_data->elem.in_num, 0,
&req_data->resp, sizeof(struct virtio_pmem_resp));
/* Callbacks are serialized, so no need to use atomic ops. */
virtqueue_push(req_data->pmem->rq_vq, &req_data->elem, len);
virtio_notify((VirtIODevice *)req_data->pmem, req_data->pmem->rq_vq);
g_free(req_data);
}
static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIODeviceRequest *req_data;
VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev);
ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest));
if (!req_data) {
virtio_error(vdev, "virtio-pmem missing request data");
return;
}
if (req_data->elem.out_num < 1 || req_data->elem.in_num < 1) {
virtio_error(vdev, "virtio-pmem request not proper");
g_free(req_data);
return;
}
req_data->fd = memory_region_get_fd(&backend->mr);
req_data->pmem = pmem;
req_data->vdev = vdev;
thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data);
}
static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config)
{
VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
struct virtio_pmem_config *pmemcfg = (struct virtio_pmem_config *) config;
virtio_stq_p(vdev, &pmemcfg->start, pmem->start);
virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(&pmem->memdev->mr));
}
static uint64_t virtio_pmem_get_features(VirtIODevice *vdev, uint64_t features,
Error **errp)
{
return features;
}
static void virtio_pmem_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOPMEM *pmem = VIRTIO_PMEM(dev);
if (!pmem->memdev) {
error_setg(errp, "virtio-pmem memdev not set");
return;
}
if (host_memory_backend_is_mapped(pmem->memdev)) {
char *path = object_get_canonical_path_component(OBJECT(pmem->memdev));
error_setg(errp, "can't use already busy memdev: %s", path);
g_free(path);
return;
}
host_memory_backend_set_mapped(pmem->memdev, true);
virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM,
sizeof(struct virtio_pmem_config));
pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush);
}
static void virtio_pmem_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOPMEM *pmem = VIRTIO_PMEM(dev);
host_memory_backend_set_mapped(pmem->memdev, false);
virtio_cleanup(vdev);
}
static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
VirtioPMEMDeviceInfo *vi)
{
vi->memaddr = pmem->start;
vi->size = pmem->memdev ? memory_region_size(&pmem->memdev->mr) : 0;
vi->memdev = object_get_canonical_path(OBJECT(pmem->memdev));
}
static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
Error **errp)
{
if (!pmem->memdev) {
error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
return NULL;
}
return &pmem->memdev->mr;
}
static Property virtio_pmem_properties[] = {
DEFINE_PROP_UINT64(VIRTIO_PMEM_ADDR_PROP, VirtIOPMEM, start, 0),
DEFINE_PROP_LINK(VIRTIO_PMEM_MEMDEV_PROP, VirtIOPMEM, memdev,
TYPE_MEMORY_BACKEND, HostMemoryBackend *),
DEFINE_PROP_END_OF_LIST(),
};
static void virtio_pmem_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
VirtIOPMEMClass *vpc = VIRTIO_PMEM_CLASS(klass);
dc->props = virtio_pmem_properties;
vdc->realize = virtio_pmem_realize;
vdc->unrealize = virtio_pmem_unrealize;
vdc->get_config = virtio_pmem_get_config;
vdc->get_features = virtio_pmem_get_features;
vpc->fill_device_info = virtio_pmem_fill_device_info;
vpc->get_memory_region = virtio_pmem_get_memory_region;
}
static TypeInfo virtio_pmem_info = {
.name = TYPE_VIRTIO_PMEM,
.parent = TYPE_VIRTIO_DEVICE,
.class_size = sizeof(VirtIOPMEMClass),
.class_init = virtio_pmem_class_init,
.instance_size = sizeof(VirtIOPMEM),
};
static void virtio_register_types(void)
{
type_register_static(&virtio_pmem_info);
}
type_init(virtio_register_types)

View File

@ -1162,9 +1162,10 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
}
}
}
vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
if (unlikely(vdev->start_on_kick && vdev->started)) {
vdev->start_on_kick = false;
if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
}
if (k->set_status) {
@ -1214,8 +1215,7 @@ void virtio_reset(void *opaque)
k->reset(vdev);
}
vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
vdev->start_on_kick = false;
vdev->started = false;
vdev->broken = false;
vdev->guest_features = 0;
@ -1536,8 +1536,7 @@ static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
ret = vq->handle_aio_output(vdev, vq);
if (unlikely(vdev->start_on_kick)) {
vdev->started = true;
vdev->start_on_kick = false;
virtio_set_started(vdev, true);
}
}
@ -1557,8 +1556,7 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
vq->handle_output(vdev, vq);
if (unlikely(vdev->start_on_kick)) {
vdev->started = true;
vdev->start_on_kick = false;
virtio_set_started(vdev, true);
}
}
}
@ -1576,11 +1574,10 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
event_notifier_set(&vq->host_notifier);
} else if (vq->handle_output) {
vq->handle_output(vdev, vq);
}
if (unlikely(vdev->start_on_kick)) {
vdev->started = true;
vdev->start_on_kick = false;
if (unlikely(vdev->start_on_kick)) {
virtio_set_started(vdev, true);
}
}
}
@ -2069,14 +2066,21 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
return -EINVAL;
}
ret = virtio_set_features_nocheck(vdev, val);
if (!ret && virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
/* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
int i;
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
if (vdev->vq[i].vring.num != 0) {
virtio_init_region_cache(vdev, i);
if (!ret) {
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
/* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
int i;
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
if (vdev->vq[i].vring.num != 0) {
virtio_init_region_cache(vdev, i);
}
}
}
if (!virtio_device_started(vdev, vdev->status) &&
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
vdev->start_on_kick = true;
}
}
return ret;
}
@ -2228,6 +2232,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
}
}
if (!virtio_device_started(vdev, vdev->status) &&
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
vdev->start_on_kick = true;
}
rcu_read_lock();
for (i = 0; i < num; i++) {
if (vdev->vq[i].vring.desc) {
@ -2291,7 +2300,7 @@ static void virtio_vmstate_change(void *opaque, int running, RunState state)
VirtIODevice *vdev = opaque;
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
bool backend_run = running && vdev->started;
bool backend_run = running && virtio_device_started(vdev, vdev->status);
vdev->vm_running = running;
if (backend_run) {
@ -2330,8 +2339,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
}
vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
vdev->start_on_kick = false;
vdev->started = false;
vdev->device_id = device_id;
vdev->status = 0;
@ -2669,6 +2677,7 @@ static void virtio_device_instance_finalize(Object *obj)
static Property virtio_properties[] = {
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
DEFINE_PROP_END_OF_LIST(),
};

View File

@ -134,6 +134,9 @@ typedef struct PCMachineClass {
/* use PVH to load kernels that support this feature */
bool pvh_enabled;
/* Enables contiguous-apic-ID mode */
bool compat_apic_id_mode;
} PCMachineClass;
#define TYPE_PC_MACHINE "generic-pc-machine"

View File

@ -85,6 +85,7 @@ extern bool pci_available;
#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
#define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012
#define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001

View File

@ -107,7 +107,9 @@ void pcie_cap_lnkctl_reset(PCIDevice *dev);
void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot);
void pcie_cap_slot_reset(PCIDevice *dev);
void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slot_ctl, uint16_t *slt_sta);
void pcie_cap_slot_write_config(PCIDevice *dev,
uint16_t old_slot_ctl, uint16_t old_slt_sta,
uint32_t addr, uint32_t val, int len);
int pcie_cap_slot_post_load(void *opaque, int version_id);
void pcie_cap_slot_push_attention_button(PCIDevice *dev);

View File

@ -0,0 +1,49 @@
/*
* Virtio PMEM device
*
* Copyright (C) 2018-2019 Red Hat, Inc.
*
* Authors:
* Pankaj Gupta <pagupta@redhat.com>
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_VIRTIO_PMEM_H
#define HW_VIRTIO_PMEM_H
#include "hw/virtio/virtio.h"
#include "sysemu/hostmem.h"
#define TYPE_VIRTIO_PMEM "virtio-pmem"
#define VIRTIO_PMEM(obj) \
OBJECT_CHECK(VirtIOPMEM, (obj), TYPE_VIRTIO_PMEM)
#define VIRTIO_PMEM_CLASS(oc) \
OBJECT_CLASS_CHECK(VirtIOPMEMClass, (oc), TYPE_VIRTIO_PMEM)
#define VIRTIO_PMEM_GET_CLASS(obj) \
OBJECT_GET_CLASS(VirtIOPMEMClass, (obj), TYPE_VIRTIO_PMEM)
#define VIRTIO_PMEM_ADDR_PROP "memaddr"
#define VIRTIO_PMEM_MEMDEV_PROP "memdev"
typedef struct VirtIOPMEM {
VirtIODevice parent_obj;
VirtQueue *rq_vq;
uint64_t start;
HostMemoryBackend *memdev;
} VirtIOPMEM;
typedef struct VirtIOPMEMClass {
/* private */
VirtIODevice parent;
/* public */
void (*fill_device_info)(const VirtIOPMEM *pmem, VirtioPMEMDeviceInfo *vi);
MemoryRegion *(*get_memory_region)(VirtIOPMEM *pmem, Error **errp);
} VirtIOPMEMClass;
#endif

View File

@ -105,8 +105,9 @@ struct VirtIODevice
uint16_t device_id;
bool vm_running;
bool broken; /* device in invalid state, needs reset */
bool use_started;
bool started;
bool start_on_kick; /* virtio 1.0 transitional devices support that */
bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
VMChangeStateEntry *vmstate;
char *bus_name;
uint8_t device_endian;
@ -351,4 +352,24 @@ static inline bool virtio_is_big_endian(VirtIODevice *vdev)
/* Devices conforming to VIRTIO 1.0 or later are always LE. */
return false;
}
static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
{
if (vdev->use_started) {
return vdev->started;
}
return status & VIRTIO_CONFIG_S_DRIVER_OK;
}
static inline void virtio_set_started(VirtIODevice *vdev, bool started)
{
if (started) {
vdev->start_on_kick = false;
}
if (vdev->use_started) {
vdev->started = started;
}
}
#endif

View File

@ -43,5 +43,6 @@
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#endif /* _LINUX_VIRTIO_IDS_H */

View File

@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Definitions for virtio-pmem devices.
*
* Copyright (C) 2019 Red Hat, Inc.
*
* Author(s): Pankaj Gupta <pagupta@redhat.com>
*/
#ifndef _UAPI_LINUX_VIRTIO_PMEM_H
#define _UAPI_LINUX_VIRTIO_PMEM_H
#include "standard-headers/linux/types.h"
#include "standard-headers/linux/virtio_ids.h"
#include "standard-headers/linux/virtio_config.h"
struct virtio_pmem_config {
uint64_t start;
uint64_t size;
};
#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0
struct virtio_pmem_resp {
/* Host return status corresponding to flush request */
uint32_t ret;
};
struct virtio_pmem_req {
/* command type */
uint32_t type;
};
#endif

View File

@ -2567,6 +2567,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
Error *err = NULL;
MemoryDeviceInfoList *info_list = qmp_query_memory_devices(&err);
MemoryDeviceInfoList *info;
VirtioPMEMDeviceInfo *vpi;
MemoryDeviceInfo *value;
PCDIMMDeviceInfo *di;
@ -2576,19 +2577,9 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
if (value) {
switch (value->type) {
case MEMORY_DEVICE_INFO_KIND_DIMM:
di = value->u.dimm.data;
break;
case MEMORY_DEVICE_INFO_KIND_NVDIMM:
di = value->u.nvdimm.data;
break;
default:
di = NULL;
break;
}
if (di) {
di = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ?
value->u.dimm.data : value->u.nvdimm.data;
monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
MemoryDeviceInfoKind_str(value->type),
di->id ? di->id : "");
@ -2601,6 +2592,18 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
di->hotplugged ? "true" : "false");
monitor_printf(mon, " hotpluggable: %s\n",
di->hotpluggable ? "true" : "false");
break;
case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM:
vpi = value->u.virtio_pmem.data;
monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
MemoryDeviceInfoKind_str(value->type),
vpi->id ? vpi->id : "");
monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", vpi->memaddr);
monitor_printf(mon, " size: %" PRIu64 "\n", vpi->size);
monitor_printf(mon, " memdev: %s\n", vpi->memdev);
break;
default:
g_assert_not_reached();
}
}
}

View File

@ -1550,16 +1550,42 @@
}
}
##
# @VirtioPMEMDeviceInfo:
#
# VirtioPMEM state information
#
# @id: device's ID
#
# @memaddr: physical address in memory, where device is mapped
#
# @size: size of memory that the device provides
#
# @memdev: memory backend linked with device
#
# Since: 4.1
##
{ 'struct': 'VirtioPMEMDeviceInfo',
'data': { '*id': 'str',
'memaddr': 'size',
'size': 'size',
'memdev': 'str'
}
}
##
# @MemoryDeviceInfo:
#
# Union containing information about a memory device
#
# nvdimm is included since 2.12. virtio-pmem is included since 4.1.
#
# Since: 2.1
##
{ 'union': 'MemoryDeviceInfo',
'data': { 'dimm': 'PCDIMMDeviceInfo',
'nvdimm': 'PCDIMMDeviceInfo'
'nvdimm': 'PCDIMMDeviceInfo',
'virtio-pmem': 'VirtioPMEMDeviceInfo'
}
}

View File

@ -45,6 +45,10 @@
} \
} while (0)
enum {
VHOST_USER_BRIDGE_MAX_QUEUES = 8,
};
typedef void (*CallbackFunc)(int sock, void *ctx);
typedef struct Event {
@ -512,12 +516,16 @@ vubr_accept_cb(int sock, void *ctx)
}
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
vu_init(&dev->vudev,
conn_fd,
vubr_panic,
vubr_set_watch,
vubr_remove_watch,
&vuiface);
if (!vu_init(&dev->vudev,
VHOST_USER_BRIDGE_MAX_QUEUES,
conn_fd,
vubr_panic,
vubr_set_watch,
vubr_remove_watch,
&vuiface)) {
fprintf(stderr, "Failed to initialize libvhost-user\n");
exit(1);
}
dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
dispatcher_remove(&dev->dispatcher, sock);
@ -560,12 +568,18 @@ vubr_new(const char *path, bool client)
if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
vubr_die("connect");
}
vu_init(&dev->vudev,
dev->sock,
vubr_panic,
vubr_set_watch,
vubr_remove_watch,
&vuiface);
if (!vu_init(&dev->vudev,
VHOST_USER_BRIDGE_MAX_QUEUES,
dev->sock,
vubr_panic,
vubr_set_watch,
vubr_remove_watch,
&vuiface)) {
fprintf(stderr, "Failed to initialize libvhost-user\n");
exit(1);
}
cb = vubr_receive_cb;
}
@ -584,7 +598,7 @@ static void *notifier_thread(void *arg)
int qidx;
while (true) {
for (qidx = 0; qidx < VHOST_MAX_NR_VIRTQUEUE; qidx++) {
for (qidx = 0; qidx < VHOST_USER_BRIDGE_MAX_QUEUES; qidx++) {
uint16_t *n = vubr->notifier.addr + pagesize * qidx;
if (*n == qidx) {
@ -616,7 +630,7 @@ vubr_host_notifier_setup(VubrDev *dev)
void *addr;
int fd;
length = getpagesize() * VHOST_MAX_NR_VIRTQUEUE;
length = getpagesize() * VHOST_USER_BRIDGE_MAX_QUEUES;
fd = mkstemp(template);
if (fd < 0) {