s390x update:
- update Linux headers to 5.8-rc1 (for vfio-ccw path handling) - vfio-ccw: add support for path handling - documentation fix -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEw9DWbcNiT/aowBjO3s9rk8bwL68FAl7rieQSHGNvaHVja0By ZWRoYXQuY29tAAoJEN7Pa5PG8C+vOn0P/iOCCMhOTPNEfDF7JandmBV2sp+ZKbVq zrMJzybWSwu1YD8tILUiB/G8K9iVK/dpnccbiH/OfHHnW6x88Q8Ggrs4yxBDggOg 4v/CfoaPR9/0CxhE68OOVz+Wl+6nly1tJu7l8f/8zTkKZhb0WLrn2NypvTIH3n0Q cBMmNCoas15YYkKMCWb68McXWThB3BNAeo0gUZsNH+DayQbHna34zI274xQIXhhM pZynKyxOjYm1BTYqyIEGwXP+IGdJwC1SgknExE93NF/2QW/ZPkrruZuh7BKJQBm1 v2Zix0uR7tuXzuf1DNNLIPm+/sXcVUOq+h/GOtT+HpdrpNixW8qDOuOl9UAAhTSU Gb0EOHbh2X9ypopYswi4nVSuMVQwqXXyWTn/i2XfCQhoIQL/BQ750uacQPaO2W7u zaqEqUdezG6AyYACW2juhqs2jGGOL4/4Vlu7drQFNTm5lAOzfqtE5B7AJ6t71P8k xcKcgEzWL5qTB4kFyFDahKCH2BLluSOa+mshHaZmYZUvSnpFBKWsdEkuPTwXhnl6 FtHjFAfv2a6EsAKsa3rZBR43Kv3pHsSqhdyJczA7AlfL5abUxvU0H86JWVXQEl90 zVbSOqwd3uu2zGUqfVdvCT5+FT3SujpmKujZHXkJuZRcm5AKOXz97aihbJzjoIDf xa3T2/8xWLvJ =qr0y -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200618' into staging s390x update: - update Linux headers to 5.8-rc1 (for vfio-ccw path handling) - vfio-ccw: add support for path handling - documentation fix # gpg: Signature made Thu 18 Jun 2020 16:36:04 BST # gpg: using RSA key C3D0D66DC3624FF6A8C018CEDECF6B93C6F02FAF # gpg: issuer "cohuck@redhat.com" # gpg: Good signature from "Cornelia Huck <conny@cornelia-huck.de>" [marginal] # gpg: aka "Cornelia Huck <huckc@linux.vnet.ibm.com>" [full] # gpg: aka "Cornelia Huck <cornelia.huck@de.ibm.com>" [full] # gpg: aka "Cornelia Huck <cohuck@kernel.org>" [marginal] # gpg: aka "Cornelia Huck <cohuck@redhat.com>" [marginal] # Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0 18CE DECF 6B93 C6F0 2FAF * remotes/cohuck/tags/s390x-20200618: docs/s390x: fix vfio-ap device_del description vfio-ccw: Add support for the CRW region and IRQ s390x/css: Refactor the css_queue_crw() routine vfio-ccw: Refactor ccw irq handler vfio-ccw: Add support for the schib region vfio-ccw: Refactor cleanup of regions Linux headers: update Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
4d285821c5
@ -606,10 +606,11 @@ action.
|
||||
|
||||
To hot plug a vfio-ap device, use the QEMU ``device_add`` command::
|
||||
|
||||
(qemu) device_add vfio-ap,sysfsdev="$path-to-mdev"
|
||||
(qemu) device_add vfio-ap,sysfsdev="$path-to-mdev",id="$id"
|
||||
|
||||
Where the ``$path-to-mdev`` value specifies the absolute path to a mediated
|
||||
device to which AP resources to be used by the guest have been assigned.
|
||||
``$id`` is the name value for the optional id parameter.
|
||||
|
||||
Note that on Linux guests, the AP devices will be created in the
|
||||
``/sys/bus/ap/devices`` directory when the AP bus subsequently performs its periodic
|
||||
@ -632,10 +633,9 @@ or a prior hot plug action.
|
||||
|
||||
To hot unplug a vfio-ap device, use the QEMU ``device_del`` command::
|
||||
|
||||
(qemu) device_del vfio-ap,sysfsdev="$path-to-mdev"
|
||||
(qemu) device_del "$id"
|
||||
|
||||
Where ``$path-to-mdev`` is the same as the path specified when the vfio-ap
|
||||
device was attached to the virtual machine's ap-bus.
|
||||
Where ``$id`` is the same id that was specified at device creation.
|
||||
|
||||
On a Linux guest, the AP devices will be removed from the ``/sys/bus/ap/devices``
|
||||
directory on the guest when the AP bus subsequently performs its periodic scan,
|
||||
|
@ -1335,11 +1335,20 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
|
||||
}
|
||||
}
|
||||
|
||||
int css_do_stsch(SubchDev *sch, SCHIB *schib)
|
||||
IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* For some subchannels, we may want to update parts of
|
||||
* the schib (e.g., update path masks from the host device
|
||||
* for passthrough subchannels).
|
||||
*/
|
||||
ret = s390_ccw_store(sch);
|
||||
|
||||
/* Use current status. */
|
||||
copy_schib_to_guest(schib, &sch->curr_status);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src)
|
||||
@ -2161,30 +2170,23 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
|
||||
}
|
||||
}
|
||||
|
||||
void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
|
||||
int chain, uint16_t rsid)
|
||||
void css_crw_add_to_queue(CRW crw)
|
||||
{
|
||||
CrwContainer *crw_cont;
|
||||
|
||||
trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : "");
|
||||
trace_css_crw((crw.flags & CRW_FLAGS_MASK_RSC) >> 8,
|
||||
crw.flags & CRW_FLAGS_MASK_ERC,
|
||||
crw.rsid,
|
||||
(crw.flags & CRW_FLAGS_MASK_C) ? "(chained)" : "");
|
||||
|
||||
/* TODO: Maybe use a static crw pool? */
|
||||
crw_cont = g_try_new0(CrwContainer, 1);
|
||||
if (!crw_cont) {
|
||||
channel_subsys.crws_lost = true;
|
||||
return;
|
||||
}
|
||||
crw_cont->crw.flags = (rsc << 8) | erc;
|
||||
if (solicited) {
|
||||
crw_cont->crw.flags |= CRW_FLAGS_MASK_S;
|
||||
}
|
||||
if (chain) {
|
||||
crw_cont->crw.flags |= CRW_FLAGS_MASK_C;
|
||||
}
|
||||
crw_cont->crw.rsid = rsid;
|
||||
if (channel_subsys.crws_lost) {
|
||||
crw_cont->crw.flags |= CRW_FLAGS_MASK_R;
|
||||
channel_subsys.crws_lost = false;
|
||||
}
|
||||
|
||||
crw_cont->crw = crw;
|
||||
|
||||
QTAILQ_INSERT_TAIL(&channel_subsys.pending_crws, crw_cont, sibling);
|
||||
|
||||
@ -2195,6 +2197,27 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
|
||||
}
|
||||
}
|
||||
|
||||
void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
|
||||
int chain, uint16_t rsid)
|
||||
{
|
||||
CRW crw;
|
||||
|
||||
crw.flags = (rsc << 8) | erc;
|
||||
if (solicited) {
|
||||
crw.flags |= CRW_FLAGS_MASK_S;
|
||||
}
|
||||
if (chain) {
|
||||
crw.flags |= CRW_FLAGS_MASK_C;
|
||||
}
|
||||
crw.rsid = rsid;
|
||||
if (channel_subsys.crws_lost) {
|
||||
crw.flags |= CRW_FLAGS_MASK_R;
|
||||
channel_subsys.crws_lost = false;
|
||||
}
|
||||
|
||||
css_crw_add_to_queue(crw);
|
||||
}
|
||||
|
||||
void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
|
||||
int hotplugged, int add)
|
||||
{
|
||||
|
@ -51,6 +51,27 @@ int s390_ccw_clear(SubchDev *sch)
|
||||
return cdc->handle_clear(sch);
|
||||
}
|
||||
|
||||
IOInstEnding s390_ccw_store(SubchDev *sch)
|
||||
{
|
||||
S390CCWDeviceClass *cdc = NULL;
|
||||
int ret = IOINST_CC_EXPECTED;
|
||||
|
||||
/*
|
||||
* This code is called for both virtual and passthrough devices,
|
||||
* but only applies to to the latter. This ugly check makes that
|
||||
* distinction for us.
|
||||
*/
|
||||
if (object_dynamic_cast(OBJECT(sch->driver_data), TYPE_S390_CCW)) {
|
||||
cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data);
|
||||
}
|
||||
|
||||
if (cdc && cdc->handle_store) {
|
||||
ret = cdc->handle_store(sch);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
|
||||
char *sysfsdev,
|
||||
Error **errp)
|
||||
|
208
hw/vfio/ccw.c
208
hw/vfio/ccw.c
@ -41,7 +41,14 @@ struct VFIOCCWDevice {
|
||||
uint64_t async_cmd_region_size;
|
||||
uint64_t async_cmd_region_offset;
|
||||
struct ccw_cmd_region *async_cmd_region;
|
||||
uint64_t schib_region_size;
|
||||
uint64_t schib_region_offset;
|
||||
struct ccw_schib_region *schib_region;
|
||||
uint64_t crw_region_size;
|
||||
uint64_t crw_region_offset;
|
||||
struct ccw_crw_region *crw_region;
|
||||
EventNotifier io_notifier;
|
||||
EventNotifier crw_notifier;
|
||||
bool force_orb_pfch;
|
||||
bool warned_orb_pfch;
|
||||
};
|
||||
@ -116,6 +123,51 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
static IOInstEnding vfio_ccw_handle_store(SubchDev *sch)
|
||||
{
|
||||
S390CCWDevice *cdev = sch->driver_data;
|
||||
VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
|
||||
SCHIB *schib = &sch->curr_status;
|
||||
struct ccw_schib_region *region = vcdev->schib_region;
|
||||
SCHIB *s;
|
||||
int ret;
|
||||
|
||||
/* schib region not available so nothing else to do */
|
||||
if (!region) {
|
||||
return IOINST_CC_EXPECTED;
|
||||
}
|
||||
|
||||
memset(region, 0, sizeof(*region));
|
||||
ret = pread(vcdev->vdev.fd, region, vcdev->schib_region_size,
|
||||
vcdev->schib_region_offset);
|
||||
|
||||
if (ret == -1) {
|
||||
/*
|
||||
* Device is probably damaged, but store subchannel does not
|
||||
* have a nonzero cc defined for this scenario. Log an error,
|
||||
* and presume things are otherwise fine.
|
||||
*/
|
||||
error_report("vfio-ccw: store region read failed with errno=%d", errno);
|
||||
return IOINST_CC_EXPECTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Selectively copy path-related bits of the SCHIB,
|
||||
* rather than copying the entire struct.
|
||||
*/
|
||||
s = (SCHIB *)region->schib_area;
|
||||
schib->pmcw.pnom = s->pmcw.pnom;
|
||||
schib->pmcw.lpum = s->pmcw.lpum;
|
||||
schib->pmcw.pam = s->pmcw.pam;
|
||||
schib->pmcw.pom = s->pmcw.pom;
|
||||
|
||||
if (s->scsw.flags & SCSW_FLAGS_MASK_PNO) {
|
||||
schib->scsw.flags |= SCSW_FLAGS_MASK_PNO;
|
||||
}
|
||||
|
||||
return IOINST_CC_EXPECTED;
|
||||
}
|
||||
|
||||
static int vfio_ccw_handle_clear(SubchDev *sch)
|
||||
{
|
||||
S390CCWDevice *cdev = sch->driver_data;
|
||||
@ -206,6 +258,44 @@ static void vfio_ccw_reset(DeviceState *dev)
|
||||
ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
|
||||
}
|
||||
|
||||
static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev)
|
||||
{
|
||||
struct ccw_crw_region *region = vcdev->crw_region;
|
||||
CRW crw;
|
||||
int size;
|
||||
|
||||
/* Keep reading CRWs as long as data is returned */
|
||||
do {
|
||||
memset(region, 0, sizeof(*region));
|
||||
size = pread(vcdev->vdev.fd, region, vcdev->crw_region_size,
|
||||
vcdev->crw_region_offset);
|
||||
|
||||
if (size == -1) {
|
||||
error_report("vfio-ccw: Read crw region failed with errno=%d",
|
||||
errno);
|
||||
break;
|
||||
}
|
||||
|
||||
if (region->crw == 0) {
|
||||
/* No more CRWs to queue */
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(&crw, ®ion->crw, sizeof(CRW));
|
||||
|
||||
css_crw_add_to_queue(crw);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
static void vfio_ccw_crw_notifier_handler(void *opaque)
|
||||
{
|
||||
VFIOCCWDevice *vcdev = opaque;
|
||||
|
||||
while (event_notifier_test_and_clear(&vcdev->crw_notifier)) {
|
||||
vfio_ccw_crw_read(vcdev);
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_ccw_io_notifier_handler(void *opaque)
|
||||
{
|
||||
VFIOCCWDevice *vcdev = opaque;
|
||||
@ -276,22 +366,40 @@ read_err:
|
||||
css_inject_io_interrupt(sch);
|
||||
}
|
||||
|
||||
static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
|
||||
static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
|
||||
unsigned int irq,
|
||||
Error **errp)
|
||||
{
|
||||
VFIODevice *vdev = &vcdev->vdev;
|
||||
struct vfio_irq_info *irq_info;
|
||||
size_t argsz;
|
||||
int fd;
|
||||
EventNotifier *notifier;
|
||||
IOHandler *fd_read;
|
||||
|
||||
if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
|
||||
error_setg(errp, "vfio: unexpected number of io irqs %u",
|
||||
switch (irq) {
|
||||
case VFIO_CCW_IO_IRQ_INDEX:
|
||||
notifier = &vcdev->io_notifier;
|
||||
fd_read = vfio_ccw_io_notifier_handler;
|
||||
break;
|
||||
case VFIO_CCW_CRW_IRQ_INDEX:
|
||||
notifier = &vcdev->crw_notifier;
|
||||
fd_read = vfio_ccw_crw_notifier_handler;
|
||||
break;
|
||||
default:
|
||||
error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vdev->num_irqs < irq + 1) {
|
||||
error_setg(errp, "vfio: unexpected number of irqs %u",
|
||||
vdev->num_irqs);
|
||||
return;
|
||||
}
|
||||
|
||||
argsz = sizeof(*irq_info);
|
||||
irq_info = g_malloc0(argsz);
|
||||
irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
|
||||
irq_info->index = irq;
|
||||
irq_info->argsz = argsz;
|
||||
if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
|
||||
irq_info) < 0 || irq_info->count < 1) {
|
||||
@ -299,37 +407,52 @@ static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
|
||||
goto out_free_info;
|
||||
}
|
||||
|
||||
if (event_notifier_init(&vcdev->io_notifier, 0)) {
|
||||
if (event_notifier_init(notifier, 0)) {
|
||||
error_setg_errno(errp, errno,
|
||||
"vfio: Unable to init event notifier for IO");
|
||||
"vfio: Unable to init event notifier for irq (%d)",
|
||||
irq);
|
||||
goto out_free_info;
|
||||
}
|
||||
|
||||
fd = event_notifier_get_fd(&vcdev->io_notifier);
|
||||
qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev);
|
||||
fd = event_notifier_get_fd(notifier);
|
||||
qemu_set_fd_handler(fd, fd_read, NULL, vcdev);
|
||||
|
||||
if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
|
||||
if (vfio_set_irq_signaling(vdev, irq, 0,
|
||||
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
|
||||
qemu_set_fd_handler(fd, NULL, NULL, vcdev);
|
||||
event_notifier_cleanup(&vcdev->io_notifier);
|
||||
event_notifier_cleanup(notifier);
|
||||
}
|
||||
|
||||
out_free_info:
|
||||
g_free(irq_info);
|
||||
}
|
||||
|
||||
static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
|
||||
static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
|
||||
unsigned int irq)
|
||||
{
|
||||
Error *err = NULL;
|
||||
EventNotifier *notifier;
|
||||
|
||||
if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
|
||||
switch (irq) {
|
||||
case VFIO_CCW_IO_IRQ_INDEX:
|
||||
notifier = &vcdev->io_notifier;
|
||||
break;
|
||||
case VFIO_CCW_CRW_IRQ_INDEX:
|
||||
notifier = &vcdev->crw_notifier;
|
||||
break;
|
||||
default:
|
||||
error_report("vfio: Unsupported device irq(%d)", irq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0,
|
||||
VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
|
||||
error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
|
||||
}
|
||||
|
||||
qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier),
|
||||
qemu_set_fd_handler(event_notifier_get_fd(notifier),
|
||||
NULL, NULL, vcdev);
|
||||
event_notifier_cleanup(&vcdev->io_notifier);
|
||||
event_notifier_cleanup(notifier);
|
||||
}
|
||||
|
||||
static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
|
||||
@ -363,8 +486,7 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
|
||||
vcdev->io_region_size = info->size;
|
||||
if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
|
||||
error_setg(errp, "vfio: Unexpected size of the I/O region");
|
||||
g_free(info);
|
||||
return;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
vcdev->io_region_offset = info->offset;
|
||||
@ -377,19 +499,53 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
|
||||
vcdev->async_cmd_region_size = info->size;
|
||||
if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) {
|
||||
error_setg(errp, "vfio: Unexpected size of the async cmd region");
|
||||
g_free(vcdev->io_region);
|
||||
g_free(info);
|
||||
return;
|
||||
goto out_err;
|
||||
}
|
||||
vcdev->async_cmd_region_offset = info->offset;
|
||||
vcdev->async_cmd_region = g_malloc0(info->size);
|
||||
}
|
||||
|
||||
ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
|
||||
VFIO_REGION_SUBTYPE_CCW_SCHIB, &info);
|
||||
if (!ret) {
|
||||
vcdev->schib_region_size = info->size;
|
||||
if (sizeof(*vcdev->schib_region) != vcdev->schib_region_size) {
|
||||
error_setg(errp, "vfio: Unexpected size of the schib region");
|
||||
goto out_err;
|
||||
}
|
||||
vcdev->schib_region_offset = info->offset;
|
||||
vcdev->schib_region = g_malloc(info->size);
|
||||
}
|
||||
|
||||
ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
|
||||
VFIO_REGION_SUBTYPE_CCW_CRW, &info);
|
||||
|
||||
if (!ret) {
|
||||
vcdev->crw_region_size = info->size;
|
||||
if (sizeof(*vcdev->crw_region) != vcdev->crw_region_size) {
|
||||
error_setg(errp, "vfio: Unexpected size of the CRW region");
|
||||
goto out_err;
|
||||
}
|
||||
vcdev->crw_region_offset = info->offset;
|
||||
vcdev->crw_region = g_malloc(info->size);
|
||||
}
|
||||
|
||||
g_free(info);
|
||||
return;
|
||||
|
||||
out_err:
|
||||
g_free(vcdev->crw_region);
|
||||
g_free(vcdev->schib_region);
|
||||
g_free(vcdev->async_cmd_region);
|
||||
g_free(vcdev->io_region);
|
||||
g_free(info);
|
||||
return;
|
||||
}
|
||||
|
||||
static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
|
||||
{
|
||||
g_free(vcdev->crw_region);
|
||||
g_free(vcdev->schib_region);
|
||||
g_free(vcdev->async_cmd_region);
|
||||
g_free(vcdev->io_region);
|
||||
}
|
||||
@ -499,11 +655,19 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
||||
goto out_region_err;
|
||||
}
|
||||
|
||||
vfio_ccw_register_io_notifier(vcdev, &err);
|
||||
vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err);
|
||||
if (err) {
|
||||
goto out_notifier_err;
|
||||
}
|
||||
|
||||
if (vcdev->crw_region) {
|
||||
vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err);
|
||||
if (err) {
|
||||
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
|
||||
goto out_notifier_err;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
out_notifier_err:
|
||||
@ -528,7 +692,8 @@ static void vfio_ccw_unrealize(DeviceState *dev)
|
||||
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
|
||||
VFIOGroup *group = vcdev->vdev.group;
|
||||
|
||||
vfio_ccw_unregister_io_notifier(vcdev);
|
||||
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX);
|
||||
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
|
||||
vfio_ccw_put_region(vcdev);
|
||||
vfio_ccw_put_device(vcdev);
|
||||
vfio_put_group(group);
|
||||
@ -565,6 +730,7 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data)
|
||||
cdc->handle_request = vfio_ccw_handle_request;
|
||||
cdc->handle_halt = vfio_ccw_handle_halt;
|
||||
cdc->handle_clear = vfio_ccw_handle_clear;
|
||||
cdc->handle_store = vfio_ccw_handle_store;
|
||||
}
|
||||
|
||||
static const TypeInfo vfio_ccw_info = {
|
||||
|
@ -205,6 +205,7 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src);
|
||||
void css_inject_io_interrupt(SubchDev *sch);
|
||||
void css_reset(void);
|
||||
void css_reset_sch(SubchDev *sch);
|
||||
void css_crw_add_to_queue(CRW crw);
|
||||
void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
|
||||
int chain, uint16_t rsid);
|
||||
void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
|
||||
@ -218,6 +219,7 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sub);
|
||||
|
||||
int s390_ccw_halt(SubchDev *sch);
|
||||
int s390_ccw_clear(SubchDev *sch);
|
||||
IOInstEnding s390_ccw_store(SubchDev *sch);
|
||||
|
||||
typedef enum {
|
||||
CSS_IO_ADAPTER_VIRTIO = 0,
|
||||
@ -242,7 +244,7 @@ SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
|
||||
uint16_t schid);
|
||||
bool css_subch_visible(SubchDev *sch);
|
||||
void css_conditional_io_interrupt(SubchDev *sch);
|
||||
int css_do_stsch(SubchDev *sch, SCHIB *schib);
|
||||
IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib);
|
||||
bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid);
|
||||
IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib);
|
||||
IOInstEnding css_do_xsch(SubchDev *sch);
|
||||
|
@ -37,6 +37,7 @@ typedef struct S390CCWDeviceClass {
|
||||
IOInstEnding (*handle_request) (SubchDev *sch);
|
||||
int (*handle_halt) (SubchDev *sch);
|
||||
int (*handle_clear) (SubchDev *sch);
|
||||
IOInstEnding (*handle_store) (SubchDev *sch);
|
||||
} S390CCWDeviceClass;
|
||||
|
||||
#endif
|
||||
|
@ -31,6 +31,7 @@
|
||||
#define KVM_FEATURE_PV_SEND_IPI 11
|
||||
#define KVM_FEATURE_POLL_CONTROL 12
|
||||
#define KVM_FEATURE_PV_SCHED_YIELD 13
|
||||
#define KVM_FEATURE_ASYNC_PF_INT 14
|
||||
|
||||
#define KVM_HINTS_REALTIME 0
|
||||
|
||||
@ -50,6 +51,8 @@
|
||||
#define MSR_KVM_STEAL_TIME 0x4b564d03
|
||||
#define MSR_KVM_PV_EOI_EN 0x4b564d04
|
||||
#define MSR_KVM_POLL_CONTROL 0x4b564d05
|
||||
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
|
||||
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
|
||||
|
||||
struct kvm_steal_time {
|
||||
uint64_t steal;
|
||||
@ -81,6 +84,11 @@ struct kvm_clock_pairing {
|
||||
#define KVM_ASYNC_PF_ENABLED (1 << 0)
|
||||
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
|
||||
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
|
||||
#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
|
||||
|
||||
/* MSR_KVM_ASYNC_PF_INT */
|
||||
#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
|
||||
|
||||
|
||||
/* Operations for KVM_HC_MMU_OP */
|
||||
#define KVM_MMU_OP_WRITE_PTE 1
|
||||
@ -112,8 +120,13 @@ struct kvm_mmu_op_release_pt {
|
||||
#define KVM_PV_REASON_PAGE_READY 2
|
||||
|
||||
struct kvm_vcpu_pv_apf_data {
|
||||
uint32_t reason;
|
||||
uint8_t pad[60];
|
||||
/* Used for 'page not present' events delivered via #PF */
|
||||
uint32_t flags;
|
||||
|
||||
/* Used for 'page ready' events delivered via interrupt notification */
|
||||
uint32_t token;
|
||||
|
||||
uint8_t pad[56];
|
||||
uint32_t enabled;
|
||||
};
|
||||
|
||||
|
@ -353,9 +353,12 @@ extern "C" {
|
||||
* a platform-dependent stride. On top of that the memory can apply
|
||||
* platform-depending swizzling of some higher address bits into bit6.
|
||||
*
|
||||
* This format is highly platforms specific and not useful for cross-driver
|
||||
* sharing. It exists since on a given platform it does uniquely identify the
|
||||
* layout in a simple way for i915-specific userspace.
|
||||
* Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
|
||||
* On earlier platforms the is highly platforms specific and not useful for
|
||||
* cross-driver sharing. It exists since on a given platform it does uniquely
|
||||
* identify the layout in a simple way for i915-specific userspace, which
|
||||
* facilitated conversion of userspace to modifiers. Additionally the exact
|
||||
* format on some really old platforms is not known.
|
||||
*/
|
||||
#define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1)
|
||||
|
||||
@ -368,9 +371,12 @@ extern "C" {
|
||||
* memory can apply platform-depending swizzling of some higher address bits
|
||||
* into bit6.
|
||||
*
|
||||
* This format is highly platforms specific and not useful for cross-driver
|
||||
* sharing. It exists since on a given platform it does uniquely identify the
|
||||
* layout in a simple way for i915-specific userspace.
|
||||
* Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
|
||||
* On earlier platforms the is highly platforms specific and not useful for
|
||||
* cross-driver sharing. It exists since on a given platform it does uniquely
|
||||
* identify the layout in a simple way for i915-specific userspace, which
|
||||
* facilitated conversion of userspace to modifiers. Additionally the exact
|
||||
* format on some really old platforms is not known.
|
||||
*/
|
||||
#define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2)
|
||||
|
||||
@ -520,7 +526,113 @@ extern "C" {
|
||||
#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1)
|
||||
|
||||
/*
|
||||
* 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later
|
||||
* Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80,
|
||||
* and Tegra GPUs starting with Tegra K1.
|
||||
*
|
||||
* Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies
|
||||
* based on the architecture generation. GOBs themselves are then arranged in
|
||||
* 3D blocks, with the block dimensions (in terms of GOBs) always being a power
|
||||
* of two, and hence expressible as their log2 equivalent (E.g., "2" represents
|
||||
* a block depth or height of "4").
|
||||
*
|
||||
* Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
|
||||
* in full detail.
|
||||
*
|
||||
* Macro
|
||||
* Bits Param Description
|
||||
* ---- ----- -----------------------------------------------------------------
|
||||
*
|
||||
* 3:0 h log2(height) of each block, in GOBs. Placed here for
|
||||
* compatibility with the existing
|
||||
* DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
|
||||
*
|
||||
* 4:4 - Must be 1, to indicate block-linear layout. Necessary for
|
||||
* compatibility with the existing
|
||||
* DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
|
||||
*
|
||||
* 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block
|
||||
* size). Must be zero.
|
||||
*
|
||||
* Note there is no log2(width) parameter. Some portions of the
|
||||
* hardware support a block width of two gobs, but it is impractical
|
||||
* to use due to lack of support elsewhere, and has no known
|
||||
* benefits.
|
||||
*
|
||||
* 11:9 - Reserved (To support 2D-array textures with variable array stride
|
||||
* in blocks, specified via log2(tile width in blocks)). Must be
|
||||
* zero.
|
||||
*
|
||||
* 19:12 k Page Kind. This value directly maps to a field in the page
|
||||
* tables of all GPUs >= NV50. It affects the exact layout of bits
|
||||
* in memory and can be derived from the tuple
|
||||
*
|
||||
* (format, GPU model, compression type, samples per pixel)
|
||||
*
|
||||
* Where compression type is defined below. If GPU model were
|
||||
* implied by the format modifier, format, or memory buffer, page
|
||||
* kind would not need to be included in the modifier itself, but
|
||||
* since the modifier should define the layout of the associated
|
||||
* memory buffer independent from any device or other context, it
|
||||
* must be included here.
|
||||
*
|
||||
* 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed
|
||||
* starting with Fermi GPUs. Additionally, the mapping between page
|
||||
* kind and bit layout has changed at various points.
|
||||
*
|
||||
* 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping
|
||||
* 1 = Gob Height 4, G80 - GT2XX Page Kind mapping
|
||||
* 2 = Gob Height 8, Turing+ Page Kind mapping
|
||||
* 3 = Reserved for future use.
|
||||
*
|
||||
* 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further
|
||||
* bit remapping step that occurs at an even lower level than the
|
||||
* page kind and block linear swizzles. This causes the layout of
|
||||
* surfaces mapped in those SOC's GPUs to be incompatible with the
|
||||
* equivalent mapping on other GPUs in the same system.
|
||||
*
|
||||
* 0 = Tegra K1 - Tegra Parker/TX2 Layout.
|
||||
* 1 = Desktop GPU and Tegra Xavier+ Layout
|
||||
*
|
||||
* 25:23 c Lossless Framebuffer Compression type.
|
||||
*
|
||||
* 0 = none
|
||||
* 1 = ROP/3D, layout 1, exact compression format implied by Page
|
||||
* Kind field
|
||||
* 2 = ROP/3D, layout 2, exact compression format implied by Page
|
||||
* Kind field
|
||||
* 3 = CDE horizontal
|
||||
* 4 = CDE vertical
|
||||
* 5 = Reserved for future use
|
||||
* 6 = Reserved for future use
|
||||
* 7 = Reserved for future use
|
||||
*
|
||||
* 55:25 - Reserved for future use. Must be zero.
|
||||
*/
|
||||
#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
|
||||
fourcc_mod_code(NVIDIA, (0x10 | \
|
||||
((h) & 0xf) | \
|
||||
(((k) & 0xff) << 12) | \
|
||||
(((g) & 0x3) << 20) | \
|
||||
(((s) & 0x1) << 22) | \
|
||||
(((c) & 0x7) << 23)))
|
||||
|
||||
/* To grandfather in prior block linear format modifiers to the above layout,
|
||||
* the page kind "0", which corresponds to "pitch/linear" and hence is unusable
|
||||
* with block-linear layouts, is remapped within drivers to the value 0xfe,
|
||||
* which corresponds to the "generic" kind used for simple single-sample
|
||||
* uncompressed color formats on Fermi - Volta GPUs.
|
||||
*/
|
||||
static inline uint64_t
|
||||
drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
|
||||
{
|
||||
if (!(modifier & 0x10) || (modifier & (0xff << 12)))
|
||||
return modifier;
|
||||
else
|
||||
return modifier | (0xfe << 12);
|
||||
}
|
||||
|
||||
/*
|
||||
* 16Bx2 Block Linear layout, used by Tegra K1 and later
|
||||
*
|
||||
* Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked
|
||||
* vertically by a power of 2 (1 to 32 GOBs) to form a block.
|
||||
@ -541,20 +653,20 @@ extern "C" {
|
||||
* in full detail.
|
||||
*/
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \
|
||||
fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf))
|
||||
DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v))
|
||||
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \
|
||||
fourcc_mod_code(NVIDIA, 0x10)
|
||||
DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0)
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \
|
||||
fourcc_mod_code(NVIDIA, 0x11)
|
||||
DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1)
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \
|
||||
fourcc_mod_code(NVIDIA, 0x12)
|
||||
DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2)
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \
|
||||
fourcc_mod_code(NVIDIA, 0x13)
|
||||
DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3)
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \
|
||||
fourcc_mod_code(NVIDIA, 0x14)
|
||||
DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4)
|
||||
#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
|
||||
fourcc_mod_code(NVIDIA, 0x15)
|
||||
DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5)
|
||||
|
||||
/*
|
||||
* Some Broadcom modifiers take parameters, for example the number of
|
||||
|
@ -1666,6 +1666,18 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MASTER_SLAVE_CFG_UNSUPPORTED 0
|
||||
#define MASTER_SLAVE_CFG_UNKNOWN 1
|
||||
#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2
|
||||
#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3
|
||||
#define MASTER_SLAVE_CFG_MASTER_FORCE 4
|
||||
#define MASTER_SLAVE_CFG_SLAVE_FORCE 5
|
||||
#define MASTER_SLAVE_STATE_UNSUPPORTED 0
|
||||
#define MASTER_SLAVE_STATE_UNKNOWN 1
|
||||
#define MASTER_SLAVE_STATE_MASTER 2
|
||||
#define MASTER_SLAVE_STATE_SLAVE 3
|
||||
#define MASTER_SLAVE_STATE_ERR 4
|
||||
|
||||
/* Which connector port. */
|
||||
#define PORT_TP 0x00
|
||||
#define PORT_AUI 0x01
|
||||
@ -1904,7 +1916,9 @@ struct ethtool_link_settings {
|
||||
uint8_t eth_tp_mdix_ctrl;
|
||||
int8_t link_mode_masks_nwords;
|
||||
uint8_t transceiver;
|
||||
uint8_t reserved1[3];
|
||||
uint8_t master_slave_cfg;
|
||||
uint8_t master_slave_state;
|
||||
uint8_t reserved1[1];
|
||||
uint32_t reserved[7];
|
||||
uint32_t link_mode_masks[0];
|
||||
/* layout of link_mode_masks fields:
|
||||
|
@ -44,6 +44,7 @@
|
||||
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
|
||||
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
|
||||
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
|
||||
#define VIRTIO_ID_MEM 24 /* virtio mem */
|
||||
#define VIRTIO_ID_FS 26 /* virtio filesystem */
|
||||
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
|
||||
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
|
||||
|
211
include/standard-headers/linux/virtio_mem.h
Normal file
211
include/standard-headers/linux/virtio_mem.h
Normal file
@ -0,0 +1,211 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause */
|
||||
/*
|
||||
* Virtio Mem Device
|
||||
*
|
||||
* Copyright Red Hat, Inc. 2020
|
||||
*
|
||||
* Authors:
|
||||
* David Hildenbrand <david@redhat.com>
|
||||
*
|
||||
* This header is BSD licensed so anyone can use the definitions
|
||||
* to implement compatible drivers/servers:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of IBM nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_VIRTIO_MEM_H
|
||||
#define _LINUX_VIRTIO_MEM_H
|
||||
|
||||
#include "standard-headers/linux/types.h"
|
||||
#include "standard-headers/linux/virtio_types.h"
|
||||
#include "standard-headers/linux/virtio_ids.h"
|
||||
#include "standard-headers/linux/virtio_config.h"
|
||||
|
||||
/*
|
||||
* Each virtio-mem device manages a dedicated region in physical address
|
||||
* space. Each device can belong to a single NUMA node, multiple devices
|
||||
* for a single NUMA node are possible. A virtio-mem device is like a
|
||||
* "resizable DIMM" consisting of small memory blocks that can be plugged
|
||||
* or unplugged. The device driver is responsible for (un)plugging memory
|
||||
* blocks on demand.
|
||||
*
|
||||
* Virtio-mem devices can only operate on their assigned memory region in
|
||||
* order to (un)plug memory. A device cannot (un)plug memory belonging to
|
||||
* other devices.
|
||||
*
|
||||
* The "region_size" corresponds to the maximum amount of memory that can
|
||||
* be provided by a device. The "size" corresponds to the amount of memory
|
||||
* that is currently plugged. "requested_size" corresponds to a request
|
||||
* from the device to the device driver to (un)plug blocks. The
|
||||
* device driver should try to (un)plug blocks in order to reach the
|
||||
* "requested_size". It is impossible to plug more memory than requested.
|
||||
*
|
||||
* The "usable_region_size" represents the memory region that can actually
|
||||
* be used to (un)plug memory. It is always at least as big as the
|
||||
* "requested_size" and will grow dynamically. It will only shrink when
|
||||
* explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
|
||||
*
|
||||
* There are no guarantees what will happen if unplugged memory is
|
||||
* read/written. Such memory should, in general, not be touched. E.g.,
|
||||
* even writing might succeed, but the values will simply be discarded at
|
||||
* random points in time.
|
||||
*
|
||||
* It can happen that the device cannot process a request, because it is
|
||||
* busy. The device driver has to retry later.
|
||||
*
|
||||
* Usually, during system resets all memory will get unplugged, so the
|
||||
* device driver can start with a clean state. However, in specific
|
||||
* scenarios (if the device is busy) it can happen that the device still
|
||||
* has memory plugged. The device driver can request to unplug all memory
|
||||
* (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the
|
||||
* device is busy.
|
||||
*/
|
||||
|
||||
/* --- virtio-mem: feature bits --- */
|
||||
|
||||
/* node_id is an ACPI PXM and is valid */
|
||||
#define VIRTIO_MEM_F_ACPI_PXM 0
|
||||
|
||||
|
||||
/* --- virtio-mem: guest -> host requests --- */
|
||||
|
||||
/* request to plug memory blocks */
|
||||
#define VIRTIO_MEM_REQ_PLUG 0
|
||||
/* request to unplug memory blocks */
|
||||
#define VIRTIO_MEM_REQ_UNPLUG 1
|
||||
/* request to unplug all blocks and shrink the usable size */
|
||||
#define VIRTIO_MEM_REQ_UNPLUG_ALL 2
|
||||
/* request information about the plugged state of memory blocks */
|
||||
#define VIRTIO_MEM_REQ_STATE 3
|
||||
|
||||
struct virtio_mem_req_plug {
|
||||
__virtio64 addr;
|
||||
__virtio16 nb_blocks;
|
||||
__virtio16 padding[3];
|
||||
};
|
||||
|
||||
struct virtio_mem_req_unplug {
|
||||
__virtio64 addr;
|
||||
__virtio16 nb_blocks;
|
||||
__virtio16 padding[3];
|
||||
};
|
||||
|
||||
struct virtio_mem_req_state {
|
||||
__virtio64 addr;
|
||||
__virtio16 nb_blocks;
|
||||
__virtio16 padding[3];
|
||||
};
|
||||
|
||||
struct virtio_mem_req {
|
||||
__virtio16 type;
|
||||
__virtio16 padding[3];
|
||||
|
||||
union {
|
||||
struct virtio_mem_req_plug plug;
|
||||
struct virtio_mem_req_unplug unplug;
|
||||
struct virtio_mem_req_state state;
|
||||
} u;
|
||||
};
|
||||
|
||||
|
||||
/* --- virtio-mem: host -> guest response --- */
|
||||
|
||||
/*
|
||||
* Request processed successfully, applicable for
|
||||
* - VIRTIO_MEM_REQ_PLUG
|
||||
* - VIRTIO_MEM_REQ_UNPLUG
|
||||
* - VIRTIO_MEM_REQ_UNPLUG_ALL
|
||||
* - VIRTIO_MEM_REQ_STATE
|
||||
*/
|
||||
#define VIRTIO_MEM_RESP_ACK 0
|
||||
/*
|
||||
* Request denied - e.g. trying to plug more than requested, applicable for
|
||||
* - VIRTIO_MEM_REQ_PLUG
|
||||
*/
|
||||
#define VIRTIO_MEM_RESP_NACK 1
|
||||
/*
|
||||
* Request cannot be processed right now, try again later, applicable for
|
||||
* - VIRTIO_MEM_REQ_PLUG
|
||||
* - VIRTIO_MEM_REQ_UNPLUG
|
||||
* - VIRTIO_MEM_REQ_UNPLUG_ALL
|
||||
*/
|
||||
#define VIRTIO_MEM_RESP_BUSY 2
|
||||
/*
|
||||
* Error in request (e.g. addresses/alignment), applicable for
|
||||
* - VIRTIO_MEM_REQ_PLUG
|
||||
* - VIRTIO_MEM_REQ_UNPLUG
|
||||
* - VIRTIO_MEM_REQ_STATE
|
||||
*/
|
||||
#define VIRTIO_MEM_RESP_ERROR 3
|
||||
|
||||
|
||||
/* State of memory blocks is "plugged" */
|
||||
#define VIRTIO_MEM_STATE_PLUGGED 0
|
||||
/* State of memory blocks is "unplugged" */
|
||||
#define VIRTIO_MEM_STATE_UNPLUGGED 1
|
||||
/* State of memory blocks is "mixed" */
|
||||
#define VIRTIO_MEM_STATE_MIXED 2
|
||||
|
||||
struct virtio_mem_resp_state {
|
||||
__virtio16 state;
|
||||
};
|
||||
|
||||
struct virtio_mem_resp {
|
||||
__virtio16 type;
|
||||
__virtio16 padding[3];
|
||||
|
||||
union {
|
||||
struct virtio_mem_resp_state state;
|
||||
} u;
|
||||
};
|
||||
|
||||
/* --- virtio-mem: configuration --- */
|
||||
|
||||
struct virtio_mem_config {
|
||||
/* Block size and alignment. Cannot change. */
|
||||
uint64_t block_size;
|
||||
/* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
|
||||
uint16_t node_id;
|
||||
uint8_t padding[6];
|
||||
/* Start address of the memory region. Cannot change. */
|
||||
uint64_t addr;
|
||||
/* Region size (maximum). Cannot change. */
|
||||
uint64_t region_size;
|
||||
/*
|
||||
* Currently usable region size. Can grow up to region_size. Can
|
||||
* shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
|
||||
* update will be sent).
|
||||
*/
|
||||
uint64_t usable_region_size;
|
||||
/*
|
||||
* Currently used size. Changes due to plug/unplug requests, but no
|
||||
* config updates will be sent.
|
||||
*/
|
||||
uint64_t plugged_size;
|
||||
/* Requested size. New plug requests cannot exceed it. Can change. */
|
||||
uint64_t requested_size;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_VIRTIO_MEM_H */
|
@ -84,6 +84,13 @@
|
||||
* at the end of the used ring. Guest should ignore the used->flags field. */
|
||||
#define VIRTIO_RING_F_EVENT_IDX 29
|
||||
|
||||
/* Alignment requirements for vring elements.
|
||||
* When using pre-virtio 1.0 layout, these fall out naturally.
|
||||
*/
|
||||
#define VRING_AVAIL_ALIGN_SIZE 2
|
||||
#define VRING_USED_ALIGN_SIZE 4
|
||||
#define VRING_DESC_ALIGN_SIZE 16
|
||||
|
||||
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
|
||||
struct vring_desc {
|
||||
/* Address (guest-physical). */
|
||||
@ -110,28 +117,47 @@ struct vring_used_elem {
|
||||
__virtio32 len;
|
||||
};
|
||||
|
||||
typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
|
||||
vring_used_elem_t;
|
||||
|
||||
struct vring_used {
|
||||
__virtio16 flags;
|
||||
__virtio16 idx;
|
||||
struct vring_used_elem ring[];
|
||||
vring_used_elem_t ring[];
|
||||
};
|
||||
|
||||
/*
|
||||
* The ring element addresses are passed between components with different
|
||||
* alignments assumptions. Thus, we might need to decrease the compiler-selected
|
||||
* alignment, and so must use a typedef to make sure the aligned attribute
|
||||
* actually takes hold:
|
||||
*
|
||||
* https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes
|
||||
*
|
||||
* When used on a struct, or struct member, the aligned attribute can only
|
||||
* increase the alignment; in order to decrease it, the packed attribute must
|
||||
* be specified as well. When used as part of a typedef, the aligned attribute
|
||||
* can both increase and decrease alignment, and specifying the packed
|
||||
* attribute generates a warning.
|
||||
*/
|
||||
typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE)))
|
||||
vring_desc_t;
|
||||
typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE)))
|
||||
vring_avail_t;
|
||||
typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
|
||||
vring_used_t;
|
||||
|
||||
struct vring {
|
||||
unsigned int num;
|
||||
|
||||
struct vring_desc *desc;
|
||||
vring_desc_t *desc;
|
||||
|
||||
struct vring_avail *avail;
|
||||
vring_avail_t *avail;
|
||||
|
||||
struct vring_used *used;
|
||||
vring_used_t *used;
|
||||
};
|
||||
|
||||
/* Alignment requirements for vring elements.
|
||||
* When using pre-virtio 1.0 layout, these fall out naturally.
|
||||
*/
|
||||
#define VRING_AVAIL_ALIGN_SIZE 2
|
||||
#define VRING_USED_ALIGN_SIZE 4
|
||||
#define VRING_DESC_ALIGN_SIZE 16
|
||||
#ifndef VIRTIO_RING_NO_LEGACY
|
||||
|
||||
/* The standard layout for the ring is a continuous chunk of memory which looks
|
||||
* like this. We assume num is a power of 2.
|
||||
@ -179,6 +205,8 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
|
||||
+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
|
||||
}
|
||||
|
||||
#endif /* VIRTIO_RING_NO_LEGACY */
|
||||
|
||||
/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
|
||||
/* Assuming a given event_idx value from the other side, if
|
||||
* we have just incremented index from old to new_idx,
|
||||
|
@ -1 +1,9 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef __ASM_MMAN_H
|
||||
#define __ASM_MMAN_H
|
||||
|
||||
#include <asm-generic/mman.h>
|
||||
|
||||
#define PROT_BTI 0x10 /* BTI guarded page */
|
||||
|
||||
#endif /* ! _UAPI__ASM_MMAN_H */
|
||||
|
@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3)
|
||||
__SYSCALL(__NR_openat2, sys_openat2)
|
||||
#define __NR_pidfd_getfd 438
|
||||
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
|
||||
#define __NR_faccessat2 439
|
||||
__SYSCALL(__NR_faccessat2, sys_faccessat2)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 439
|
||||
#define __NR_syscalls 440
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@ -367,6 +367,7 @@
|
||||
#define __NR_clone3 (__NR_Linux + 435)
|
||||
#define __NR_openat2 (__NR_Linux + 437)
|
||||
#define __NR_pidfd_getfd (__NR_Linux + 438)
|
||||
#define __NR_faccessat2 (__NR_Linux + 439)
|
||||
|
||||
|
||||
#endif /* _ASM_MIPS_UNISTD_N32_H */
|
||||
|
@ -343,6 +343,7 @@
|
||||
#define __NR_clone3 (__NR_Linux + 435)
|
||||
#define __NR_openat2 (__NR_Linux + 437)
|
||||
#define __NR_pidfd_getfd (__NR_Linux + 438)
|
||||
#define __NR_faccessat2 (__NR_Linux + 439)
|
||||
|
||||
|
||||
#endif /* _ASM_MIPS_UNISTD_N64_H */
|
||||
|
@ -413,6 +413,7 @@
|
||||
#define __NR_clone3 (__NR_Linux + 435)
|
||||
#define __NR_openat2 (__NR_Linux + 437)
|
||||
#define __NR_pidfd_getfd (__NR_Linux + 438)
|
||||
#define __NR_faccessat2 (__NR_Linux + 439)
|
||||
|
||||
|
||||
#endif /* _ASM_MIPS_UNISTD_O32_H */
|
||||
|
@ -420,6 +420,7 @@
|
||||
#define __NR_clone3 435
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
|
||||
|
||||
#endif /* _ASM_POWERPC_UNISTD_32_H */
|
||||
|
@ -392,6 +392,7 @@
|
||||
#define __NR_clone3 435
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
|
||||
|
||||
#endif /* _ASM_POWERPC_UNISTD_64_H */
|
||||
|
@ -410,5 +410,6 @@
|
||||
#define __NR_clone3 435
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
|
||||
#endif /* _ASM_S390_UNISTD_32_H */
|
||||
|
@ -358,5 +358,6 @@
|
||||
#define __NR_clone3 435
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
|
||||
#endif /* _ASM_S390_UNISTD_64_H */
|
||||
|
@ -385,32 +385,48 @@ struct kvm_sync_regs {
|
||||
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
|
||||
|
||||
#define KVM_STATE_NESTED_FORMAT_VMX 0
|
||||
#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
|
||||
#define KVM_STATE_NESTED_FORMAT_SVM 1
|
||||
|
||||
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
|
||||
#define KVM_STATE_NESTED_EVMCS 0x00000004
|
||||
#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
|
||||
#define KVM_STATE_NESTED_GIF_SET 0x00000100
|
||||
|
||||
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
|
||||
|
||||
#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
|
||||
|
||||
#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000
|
||||
|
||||
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
|
||||
|
||||
struct kvm_vmx_nested_state_data {
|
||||
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
};
|
||||
|
||||
struct kvm_vmx_nested_state_hdr {
|
||||
__u32 flags;
|
||||
__u64 vmxon_pa;
|
||||
__u64 vmcs12_pa;
|
||||
__u64 preemption_timer_deadline;
|
||||
|
||||
struct {
|
||||
__u16 flags;
|
||||
} smm;
|
||||
};
|
||||
|
||||
struct kvm_svm_nested_state_data {
|
||||
/* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */
|
||||
__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
|
||||
};
|
||||
|
||||
struct kvm_svm_nested_state_hdr {
|
||||
__u64 vmcb_pa;
|
||||
};
|
||||
|
||||
/* for KVM_CAP_NESTED_STATE */
|
||||
struct kvm_nested_state {
|
||||
__u16 flags;
|
||||
@ -419,6 +435,7 @@ struct kvm_nested_state {
|
||||
|
||||
union {
|
||||
struct kvm_vmx_nested_state_hdr vmx;
|
||||
struct kvm_svm_nested_state_hdr svm;
|
||||
|
||||
/* Pad the header to 128 bytes. */
|
||||
__u8 pad[120];
|
||||
@ -431,6 +448,7 @@ struct kvm_nested_state {
|
||||
*/
|
||||
union {
|
||||
struct kvm_vmx_nested_state_data vmx[0];
|
||||
struct kvm_svm_nested_state_data svm[0];
|
||||
} data;
|
||||
};
|
||||
|
||||
|
@ -2,8 +2,15 @@
|
||||
#ifndef _ASM_X86_UNISTD_H
|
||||
#define _ASM_X86_UNISTD_H
|
||||
|
||||
/* x32 syscall flag bit */
|
||||
#define __X32_SYSCALL_BIT 0x40000000UL
|
||||
/*
|
||||
* x32 syscall flag bit. Some user programs expect syscall NR macros
|
||||
* and __X32_SYSCALL_BIT to have type int, even though syscall numbers
|
||||
* are, for practical purposes, unsigned long.
|
||||
*
|
||||
* Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right
|
||||
* thing regardless.
|
||||
*/
|
||||
#define __X32_SYSCALL_BIT 0x40000000
|
||||
|
||||
# ifdef __i386__
|
||||
# include <asm/unistd_32.h>
|
||||
|
@ -428,6 +428,7 @@
|
||||
#define __NR_clone3 435
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
|
||||
|
||||
#endif /* _ASM_X86_UNISTD_32_H */
|
||||
|
@ -350,6 +350,7 @@
|
||||
#define __NR_clone3 435
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
|
||||
|
||||
#endif /* _ASM_X86_UNISTD_64_H */
|
||||
|
@ -303,6 +303,7 @@
|
||||
#define __NR_clone3 (__X32_SYSCALL_BIT + 435)
|
||||
#define __NR_openat2 (__X32_SYSCALL_BIT + 437)
|
||||
#define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438)
|
||||
#define __NR_faccessat2 (__X32_SYSCALL_BIT + 439)
|
||||
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
|
||||
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
|
||||
#define __NR_ioctl (__X32_SYSCALL_BIT + 514)
|
||||
|
@ -116,7 +116,7 @@ struct kvm_irq_level {
|
||||
* ACPI gsi notion of irq.
|
||||
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
|
||||
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
|
||||
* For ARM: See Documentation/virt/kvm/api.txt
|
||||
* For ARM: See Documentation/virt/kvm/api.rst
|
||||
*/
|
||||
union {
|
||||
__u32 irq;
|
||||
@ -188,10 +188,13 @@ struct kvm_s390_cmma_log {
|
||||
struct kvm_hyperv_exit {
|
||||
#define KVM_EXIT_HYPERV_SYNIC 1
|
||||
#define KVM_EXIT_HYPERV_HCALL 2
|
||||
#define KVM_EXIT_HYPERV_SYNDBG 3
|
||||
__u32 type;
|
||||
__u32 pad1;
|
||||
union {
|
||||
struct {
|
||||
__u32 msr;
|
||||
__u32 pad2;
|
||||
__u64 control;
|
||||
__u64 evt_page;
|
||||
__u64 msg_page;
|
||||
@ -201,6 +204,15 @@ struct kvm_hyperv_exit {
|
||||
__u64 result;
|
||||
__u64 params[2];
|
||||
} hcall;
|
||||
struct {
|
||||
__u32 msr;
|
||||
__u32 pad2;
|
||||
__u64 control;
|
||||
__u64 status;
|
||||
__u64 send_page;
|
||||
__u64 recv_page;
|
||||
__u64 pending_page;
|
||||
} syndbg;
|
||||
} u;
|
||||
};
|
||||
|
||||
@ -1017,6 +1029,8 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_S390_VCPU_RESETS 179
|
||||
#define KVM_CAP_S390_PROTECTED 180
|
||||
#define KVM_CAP_PPC_SECURE_GUEST 181
|
||||
#define KVM_CAP_HALT_POLL 182
|
||||
#define KVM_CAP_ASYNC_PF_INT 183
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1107,7 +1121,7 @@ struct kvm_xen_hvm_config {
|
||||
*
|
||||
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
|
||||
* the irqfd to operate in resampling mode for level triggered interrupt
|
||||
* emulation. See Documentation/virt/kvm/api.txt.
|
||||
* emulation. See Documentation/virt/kvm/api.rst.
|
||||
*/
|
||||
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
|
||||
|
||||
|
@ -83,6 +83,8 @@ struct sev_user_data_status {
|
||||
__u32 guest_count; /* Out */
|
||||
} __attribute__((packed));
|
||||
|
||||
#define SEV_STATUS_FLAGS_CONFIG_ES 0x0100
|
||||
|
||||
/**
|
||||
* struct sev_user_data_pek_csr - PEK_CSR command parameters
|
||||
*
|
||||
|
@ -305,6 +305,7 @@ struct vfio_region_info_cap_type {
|
||||
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
|
||||
#define VFIO_REGION_TYPE_GFX (1)
|
||||
#define VFIO_REGION_TYPE_CCW (2)
|
||||
#define VFIO_REGION_TYPE_MIGRATION (3)
|
||||
|
||||
/* sub-types for VFIO_REGION_TYPE_PCI_* */
|
||||
|
||||
@ -378,6 +379,235 @@ struct vfio_region_gfx_edid {
|
||||
|
||||
/* sub-types for VFIO_REGION_TYPE_CCW */
|
||||
#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1)
|
||||
#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2)
|
||||
#define VFIO_REGION_SUBTYPE_CCW_CRW (3)
|
||||
|
||||
/* sub-types for VFIO_REGION_TYPE_MIGRATION */
|
||||
#define VFIO_REGION_SUBTYPE_MIGRATION (1)
|
||||
|
||||
/*
|
||||
* The structure vfio_device_migration_info is placed at the 0th offset of
|
||||
* the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
|
||||
* migration information. Field accesses from this structure are only supported
|
||||
* at their native width and alignment. Otherwise, the result is undefined and
|
||||
* vendor drivers should return an error.
|
||||
*
|
||||
* device_state: (read/write)
|
||||
* - The user application writes to this field to inform the vendor driver
|
||||
* about the device state to be transitioned to.
|
||||
* - The vendor driver should take the necessary actions to change the
|
||||
* device state. After successful transition to a given state, the
|
||||
* vendor driver should return success on write(device_state, state)
|
||||
* system call. If the device state transition fails, the vendor driver
|
||||
* should return an appropriate -errno for the fault condition.
|
||||
* - On the user application side, if the device state transition fails,
|
||||
* that is, if write(device_state, state) returns an error, read
|
||||
* device_state again to determine the current state of the device from
|
||||
* the vendor driver.
|
||||
* - The vendor driver should return previous state of the device unless
|
||||
* the vendor driver has encountered an internal error, in which case
|
||||
* the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR.
|
||||
* - The user application must use the device reset ioctl to recover the
|
||||
* device from VFIO_DEVICE_STATE_ERROR state. If the device is
|
||||
* indicated to be in a valid device state by reading device_state, the
|
||||
* user application may attempt to transition the device to any valid
|
||||
* state reachable from the current state or terminate itself.
|
||||
*
|
||||
* device_state consists of 3 bits:
|
||||
* - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
|
||||
* it indicates the _STOP state. When the device state is changed to
|
||||
* _STOP, driver should stop the device before write() returns.
|
||||
* - If bit 1 is set, it indicates the _SAVING state, which means that the
|
||||
* driver should start gathering device state information that will be
|
||||
* provided to the VFIO user application to save the device's state.
|
||||
* - If bit 2 is set, it indicates the _RESUMING state, which means that
|
||||
* the driver should prepare to resume the device. Data provided through
|
||||
* the migration region should be used to resume the device.
|
||||
* Bits 3 - 31 are reserved for future use. To preserve them, the user
|
||||
* application should perform a read-modify-write operation on this
|
||||
* field when modifying the specified bits.
|
||||
*
|
||||
* +------- _RESUMING
|
||||
* |+------ _SAVING
|
||||
* ||+----- _RUNNING
|
||||
* |||
|
||||
* 000b => Device Stopped, not saving or resuming
|
||||
* 001b => Device running, which is the default state
|
||||
* 010b => Stop the device & save the device state, stop-and-copy state
|
||||
* 011b => Device running and save the device state, pre-copy state
|
||||
* 100b => Device stopped and the device state is resuming
|
||||
* 101b => Invalid state
|
||||
* 110b => Error state
|
||||
* 111b => Invalid state
|
||||
*
|
||||
* State transitions:
|
||||
*
|
||||
* _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP
|
||||
* (100b) (001b) (011b) (010b) (000b)
|
||||
* 0. Running or default state
|
||||
* |
|
||||
*
|
||||
* 1. Normal Shutdown (optional)
|
||||
* |------------------------------------->|
|
||||
*
|
||||
* 2. Save the state or suspend
|
||||
* |------------------------->|---------->|
|
||||
*
|
||||
* 3. Save the state during live migration
|
||||
* |----------->|------------>|---------->|
|
||||
*
|
||||
* 4. Resuming
|
||||
* |<---------|
|
||||
*
|
||||
* 5. Resumed
|
||||
* |--------->|
|
||||
*
|
||||
* 0. Default state of VFIO device is _RUNNNG when the user application starts.
|
||||
* 1. During normal shutdown of the user application, the user application may
|
||||
* optionally change the VFIO device state from _RUNNING to _STOP. This
|
||||
* transition is optional. The vendor driver must support this transition but
|
||||
* must not require it.
|
||||
* 2. When the user application saves state or suspends the application, the
|
||||
* device state transitions from _RUNNING to stop-and-copy and then to _STOP.
|
||||
* On state transition from _RUNNING to stop-and-copy, driver must stop the
|
||||
* device, save the device state and send it to the application through the
|
||||
* migration region. The sequence to be followed for such transition is given
|
||||
* below.
|
||||
* 3. In live migration of user application, the state transitions from _RUNNING
|
||||
* to pre-copy, to stop-and-copy, and to _STOP.
|
||||
* On state transition from _RUNNING to pre-copy, the driver should start
|
||||
* gathering the device state while the application is still running and send
|
||||
* the device state data to application through the migration region.
|
||||
* On state transition from pre-copy to stop-and-copy, the driver must stop
|
||||
* the device, save the device state and send it to the user application
|
||||
* through the migration region.
|
||||
* Vendor drivers must support the pre-copy state even for implementations
|
||||
* where no data is provided to the user before the stop-and-copy state. The
|
||||
* user must not be required to consume all migration data before the device
|
||||
* transitions to a new state, including the stop-and-copy state.
|
||||
* The sequence to be followed for above two transitions is given below.
|
||||
* 4. To start the resuming phase, the device state should be transitioned from
|
||||
* the _RUNNING to the _RESUMING state.
|
||||
* In the _RESUMING state, the driver should use the device state data
|
||||
* received through the migration region to resume the device.
|
||||
* 5. After providing saved device data to the driver, the application should
|
||||
* change the state from _RESUMING to _RUNNING.
|
||||
*
|
||||
* reserved:
|
||||
* Reads on this field return zero and writes are ignored.
|
||||
*
|
||||
* pending_bytes: (read only)
|
||||
* The number of pending bytes still to be migrated from the vendor driver.
|
||||
*
|
||||
* data_offset: (read only)
|
||||
* The user application should read data_offset field from the migration
|
||||
* region. The user application should read the device data from this
|
||||
* offset within the migration region during the _SAVING state or write
|
||||
* the device data during the _RESUMING state. See below for details of
|
||||
* sequence to be followed.
|
||||
*
|
||||
* data_size: (read/write)
|
||||
* The user application should read data_size to get the size in bytes of
|
||||
* the data copied in the migration region during the _SAVING state and
|
||||
* write the size in bytes of the data copied in the migration region
|
||||
* during the _RESUMING state.
|
||||
*
|
||||
* The format of the migration region is as follows:
|
||||
* ------------------------------------------------------------------
|
||||
* |vfio_device_migration_info| data section |
|
||||
* | | /////////////////////////////// |
|
||||
* ------------------------------------------------------------------
|
||||
* ^ ^
|
||||
* offset 0-trapped part data_offset
|
||||
*
|
||||
* The structure vfio_device_migration_info is always followed by the data
|
||||
* section in the region, so data_offset will always be nonzero. The offset
|
||||
* from where the data is copied is decided by the kernel driver. The data
|
||||
* section can be trapped, mmapped, or partitioned, depending on how the kernel
|
||||
* driver defines the data section. The data section partition can be defined
|
||||
* as mapped by the sparse mmap capability. If mmapped, data_offset must be
|
||||
* page aligned, whereas initial section which contains the
|
||||
* vfio_device_migration_info structure, might not end at the offset, which is
|
||||
* page aligned. The user is not required to access through mmap regardless
|
||||
* of the capabilities of the region mmap.
|
||||
* The vendor driver should determine whether and how to partition the data
|
||||
* section. The vendor driver should return data_offset accordingly.
|
||||
*
|
||||
* The sequence to be followed while in pre-copy state and stop-and-copy state
|
||||
* is as follows:
|
||||
* a. Read pending_bytes, indicating the start of a new iteration to get device
|
||||
* data. Repeated read on pending_bytes at this stage should have no side
|
||||
* effects.
|
||||
* If pending_bytes == 0, the user application should not iterate to get data
|
||||
* for that device.
|
||||
* If pending_bytes > 0, perform the following steps.
|
||||
* b. Read data_offset, indicating that the vendor driver should make data
|
||||
* available through the data section. The vendor driver should return this
|
||||
* read operation only after data is available from (region + data_offset)
|
||||
* to (region + data_offset + data_size).
|
||||
* c. Read data_size, which is the amount of data in bytes available through
|
||||
* the migration region.
|
||||
* Read on data_offset and data_size should return the offset and size of
|
||||
* the current buffer if the user application reads data_offset and
|
||||
* data_size more than once here.
|
||||
* d. Read data_size bytes of data from (region + data_offset) from the
|
||||
* migration region.
|
||||
* e. Process the data.
|
||||
* f. Read pending_bytes, which indicates that the data from the previous
|
||||
* iteration has been read. If pending_bytes > 0, go to step b.
|
||||
*
|
||||
* The user application can transition from the _SAVING|_RUNNING
|
||||
* (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the
|
||||
* number of pending bytes. The user application should iterate in _SAVING
|
||||
* (stop-and-copy) until pending_bytes is 0.
|
||||
*
|
||||
* The sequence to be followed while _RESUMING device state is as follows:
|
||||
* While data for this device is available, repeat the following steps:
|
||||
* a. Read data_offset from where the user application should write data.
|
||||
* b. Write migration data starting at the migration region + data_offset for
|
||||
* the length determined by data_size from the migration source.
|
||||
* c. Write data_size, which indicates to the vendor driver that data is
|
||||
* written in the migration region. Vendor driver must return this write
|
||||
* operations on consuming data. Vendor driver should apply the
|
||||
* user-provided migration region data to the device resume state.
|
||||
*
|
||||
* If an error occurs during the above sequences, the vendor driver can return
|
||||
* an error code for next read() or write() operation, which will terminate the
|
||||
* loop. The user application should then take the next necessary action, for
|
||||
* example, failing migration or terminating the user application.
|
||||
*
|
||||
* For the user application, data is opaque. The user application should write
|
||||
* data in the same order as the data is received and the data should be of
|
||||
* same transaction size at the source.
|
||||
*/
|
||||
|
||||
struct vfio_device_migration_info {
|
||||
__u32 device_state; /* VFIO device state */
|
||||
#define VFIO_DEVICE_STATE_STOP (0)
|
||||
#define VFIO_DEVICE_STATE_RUNNING (1 << 0)
|
||||
#define VFIO_DEVICE_STATE_SAVING (1 << 1)
|
||||
#define VFIO_DEVICE_STATE_RESUMING (1 << 2)
|
||||
#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
|
||||
VFIO_DEVICE_STATE_SAVING | \
|
||||
VFIO_DEVICE_STATE_RESUMING)
|
||||
|
||||
#define VFIO_DEVICE_STATE_VALID(state) \
|
||||
(state & VFIO_DEVICE_STATE_RESUMING ? \
|
||||
(state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1)
|
||||
|
||||
#define VFIO_DEVICE_STATE_IS_ERROR(state) \
|
||||
((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \
|
||||
VFIO_DEVICE_STATE_RESUMING))
|
||||
|
||||
#define VFIO_DEVICE_STATE_SET_ERROR(state) \
|
||||
((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \
|
||||
VFIO_DEVICE_STATE_RESUMING)
|
||||
|
||||
__u32 reserved;
|
||||
__u64 pending_bytes;
|
||||
__u64 data_offset;
|
||||
__u64 data_size;
|
||||
};
|
||||
|
||||
/*
|
||||
* The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
|
||||
@ -577,6 +807,7 @@ enum {
|
||||
|
||||
enum {
|
||||
VFIO_CCW_IO_IRQ_INDEX,
|
||||
VFIO_CCW_CRW_IRQ_INDEX,
|
||||
VFIO_CCW_NUM_IRQS
|
||||
};
|
||||
|
||||
@ -785,6 +1016,29 @@ struct vfio_iommu_type1_info_cap_iova_range {
|
||||
struct vfio_iova_range iova_ranges[];
|
||||
};
|
||||
|
||||
/*
|
||||
* The migration capability allows to report supported features for migration.
|
||||
*
|
||||
* The structures below define version 1 of this capability.
|
||||
*
|
||||
* The existence of this capability indicates that IOMMU kernel driver supports
|
||||
* dirty page logging.
|
||||
*
|
||||
* pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty
|
||||
* page logging.
|
||||
* max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap
|
||||
* size in bytes that can be used by user applications when getting the dirty
|
||||
* bitmap.
|
||||
*/
|
||||
#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1
|
||||
|
||||
struct vfio_iommu_type1_info_cap_migration {
|
||||
struct vfio_info_cap_header header;
|
||||
__u32 flags;
|
||||
__u64 pgsize_bitmap;
|
||||
__u64 max_dirty_bitmap_size; /* in bytes */
|
||||
};
|
||||
|
||||
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
|
||||
|
||||
/**
|
||||
@ -805,6 +1059,12 @@ struct vfio_iommu_type1_dma_map {
|
||||
|
||||
#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
|
||||
|
||||
struct vfio_bitmap {
|
||||
__u64 pgsize; /* page size for bitmap in bytes */
|
||||
__u64 size; /* in bytes */
|
||||
__u64 *data; /* one bit per page */
|
||||
};
|
||||
|
||||
/**
|
||||
* VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
|
||||
* struct vfio_dma_unmap)
|
||||
@ -814,12 +1074,23 @@ struct vfio_iommu_type1_dma_map {
|
||||
* field. No guarantee is made to the user that arbitrary unmaps of iova
|
||||
* or size different from those used in the original mapping call will
|
||||
* succeed.
|
||||
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
|
||||
* before unmapping IO virtual addresses. When this flag is set, the user must
|
||||
* provide a struct vfio_bitmap in data[]. User must provide zero-allocated
|
||||
* memory via vfio_bitmap.data and its size in the vfio_bitmap.size field.
|
||||
* A bit in the bitmap represents one page, of user provided page size in
|
||||
* vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set
|
||||
* indicates that the page at that offset from iova is dirty. A Bitmap of the
|
||||
* pages in the range of unmapped size is returned in the user-provided
|
||||
* vfio_bitmap.data.
|
||||
*/
|
||||
struct vfio_iommu_type1_dma_unmap {
|
||||
__u32 argsz;
|
||||
__u32 flags;
|
||||
#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
|
||||
__u64 iova; /* IO virtual address */
|
||||
__u64 size; /* Size of mapping (bytes) */
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
|
||||
@ -831,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap {
|
||||
#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
|
||||
#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16)
|
||||
|
||||
/**
|
||||
* VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
|
||||
* struct vfio_iommu_type1_dirty_bitmap)
|
||||
* IOCTL is used for dirty pages logging.
|
||||
* Caller should set flag depending on which operation to perform, details as
|
||||
* below:
|
||||
*
|
||||
* Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs
|
||||
* the IOMMU driver to log pages that are dirtied or potentially dirtied by
|
||||
* the device; designed to be used when a migration is in progress. Dirty pages
|
||||
* are logged until logging is disabled by user application by calling the IOCTL
|
||||
* with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
|
||||
*
|
||||
* Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs
|
||||
* the IOMMU driver to stop logging dirtied pages.
|
||||
*
|
||||
* Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set
|
||||
* returns the dirty pages bitmap for IOMMU container for a given IOVA range.
|
||||
* The user must specify the IOVA range and the pgsize through the structure
|
||||
* vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
|
||||
* supports getting a bitmap of the smallest supported pgsize only and can be
|
||||
* modified in future to get a bitmap of any specified supported pgsize. The
|
||||
* user must provide a zeroed memory area for the bitmap memory and specify its
|
||||
* size in bitmap.size. One bit is used to represent one page consecutively
|
||||
* starting from iova offset. The user should provide page size in bitmap.pgsize
|
||||
* field. A bit set in the bitmap indicates that the page at that offset from
|
||||
* iova is dirty. The caller must set argsz to a value including the size of
|
||||
* structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the
|
||||
* actual bitmap. If dirty pages logging is not enabled, an error will be
|
||||
* returned.
|
||||
*
|
||||
* Only one of the flags _START, _STOP and _GET may be specified at a time.
|
||||
*
|
||||
*/
|
||||
struct vfio_iommu_type1_dirty_bitmap {
|
||||
__u32 argsz;
|
||||
__u32 flags;
|
||||
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0)
|
||||
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1)
|
||||
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2)
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
struct vfio_iommu_type1_dirty_bitmap_get {
|
||||
__u64 iova; /* IO virtual address */
|
||||
__u64 size; /* Size of iova range */
|
||||
struct vfio_bitmap bitmap;
|
||||
};
|
||||
|
||||
#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
|
||||
|
||||
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
|
||||
|
||||
/*
|
||||
|
@ -34,4 +34,23 @@ struct ccw_cmd_region {
|
||||
__u32 ret_code;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* Used for processing commands that read the subchannel-information block
|
||||
* Reading this region triggers a stsch() to hardware
|
||||
* Note: this is controlled by a capability
|
||||
*/
|
||||
struct ccw_schib_region {
|
||||
#define SCHIB_AREA_SIZE 52
|
||||
__u8 schib_area[SCHIB_AREA_SIZE];
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* Used for returning a Channel Report Word to userspace.
|
||||
* Note: this is controlled by a capability
|
||||
*/
|
||||
struct ccw_crw_region {
|
||||
__u32 crw;
|
||||
__u32 pad;
|
||||
} __attribute__((packed));
|
||||
|
||||
#endif
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
#define VHOST_FILE_UNBIND -1
|
||||
|
||||
/* ioctls */
|
||||
|
||||
#define VHOST_VIRTIO 0xAF
|
||||
@ -140,4 +142,6 @@
|
||||
/* Get the max ring size. */
|
||||
#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
|
||||
|
||||
/* Set event fd for config interrupt*/
|
||||
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
|
||||
#endif
|
||||
|
@ -292,8 +292,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
|
||||
sch = css_find_subch(m, cssid, ssid, schid);
|
||||
if (sch) {
|
||||
if (css_subch_visible(sch)) {
|
||||
css_do_stsch(sch, &schib);
|
||||
cc = 0;
|
||||
cc = css_do_stsch(sch, &schib);
|
||||
} else {
|
||||
/* Indicate no more subchannels in this css/ss */
|
||||
cc = 3;
|
||||
|
Loading…
x
Reference in New Issue
Block a user