From 5e4f6bcc296ffbd4a66ccdfb975202ba0147d306 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 6 Jul 2021 09:10:56 +0200 Subject: [PATCH 01/11] hw/nvme: remove NvmeCtrl parameter from ns setup/check functions The nvme_ns_setup and nvme_ns_check_constraints should not depend on the controller state. Refactor and remove it. Reviewed-by: Hannes Reinecke Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 2 +- hw/nvme/ns.c | 37 ++++++++++++++++++------------------- hw/nvme/nvme.h | 2 +- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 629b0d38c2..dd18015100 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6498,7 +6498,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) ns = &n->namespace; ns->params.nsid = 1; - if (nvme_ns_setup(n, ns, errp)) { + if (nvme_ns_setup(ns, errp)) { return; } diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 4275c3db63..3c4f5b8c71 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -346,8 +346,7 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) assert(ns->nr_open_zones == 0); } -static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, - Error **errp) +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { if (!ns->blkconf.blk) { error_setg(errp, "block backend not configured"); @@ -366,20 +365,6 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return -1; } - if (!n->subsys) { - if (ns->params.detached) { - error_setg(errp, "detached requires that the nvme device is " - "linked to an nvme-subsys device"); - return -1; - } - - if (ns->params.shared) { - error_setg(errp, "shared requires that the nvme device is " - "linked to an nvme-subsys device"); - return -1; - } - } - if (ns->params.zoned) { if (ns->params.max_active_zones) { if (ns->params.max_open_zones > ns->params.max_active_zones) { @@ -411,9 +396,9 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return 0; } -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +int nvme_ns_setup(NvmeNamespace *ns, Error **errp) { - if (nvme_ns_check_constraints(n, ns, errp)) { + if (nvme_ns_check_constraints(ns, errp)) { return -1; } @@ -465,7 +450,21 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) uint32_t nsid = ns->params.nsid; int i; - if (nvme_ns_setup(n, ns, errp)) { + if (!n->subsys) { + if (ns->params.detached) { + error_setg(errp, "detached requires that the nvme device is " + "linked to an nvme-subsys device"); + return; + } + + if (ns->params.shared) { + error_setg(errp, "shared requires that the nvme device is " + "linked to an nvme-subsys device"); + return; + } + } + + if (nvme_ns_setup(ns, errp)) { return; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 56f8eceed2..0868359a1e 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -246,7 +246,7 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns) } void nvme_ns_init_format(NvmeNamespace *ns); -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); +int nvme_ns_setup(NvmeNamespace *ns, Error **errp); void nvme_ns_drain(NvmeNamespace *ns); void nvme_ns_shutdown(NvmeNamespace *ns); void nvme_ns_cleanup(NvmeNamespace *ns); From cc6fb6bc506e6c47ed604fcb7b7413dff0b7d845 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 6 Jul 2021 10:48:40 +0200 Subject: [PATCH 02/11] hw/nvme: mark nvme-subsys non-hotpluggable We currently lack the infrastructure to handle subsystem hotplugging, so disable it. Reviewed-by: Hannes Reinecke Signed-off-by: Klaus Jensen --- hw/nvme/subsys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 192223d17c..dc7a96862f 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -61,6 +61,7 @@ static void nvme_subsys_class_init(ObjectClass *oc, void *data) dc->realize = nvme_subsys_realize; dc->desc = "Virtual NVMe subsystem"; + dc->hotpluggable = false; device_class_set_props(dc, nvme_subsystem_props); } From b0fde9e86133f66c054b31722fa29640f57e975c Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 6 Jul 2021 10:51:36 +0200 Subject: [PATCH 03/11] hw/nvme: unregister controller with subsystem at exit Make sure the controller is unregistered from the subsystem when device is removed. Reviewed-by: Hannes Reinecke Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 4 ++++ hw/nvme/nvme.h | 1 + hw/nvme/subsys.c | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index dd18015100..90e3ee2b70 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6523,6 +6523,10 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ns_cleanup(ns); } + if (n->subsys) { + nvme_subsys_unregister_ctrl(n->subsys, n); + } + g_free(n->cq); g_free(n->sq); g_free(n->aer_reqs); diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 0868359a1e..c4065467d8 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -50,6 +50,7 @@ typedef struct NvmeSubsystem { } NvmeSubsystem; int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); +void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n); static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, uint32_t cntlid) diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index dc7a96862f..92caa604a2 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -32,6 +32,11 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) return cntlid; } +void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) +{ + subsys->ctrls[n->cntlid] = NULL; +} + static void nvme_subsys_setup(NvmeSubsystem *subsys) { const char *nqn = subsys->params.nqn ? From 234214734f7347b1bc3ceeb8f4a2ef53195a8242 Mon Sep 17 00:00:00 2001 From: Padmakar Kalghatgi Date: Fri, 9 Jul 2021 07:58:40 +0200 Subject: [PATCH 04/11] hw/nvme: error handling for too many mappings If the number of PRP/SGL mappings exceed 1024, reads and writes will fail because of an internal QEMU limitation of max 1024 vectors. Signed-off-by: Padmakar Kalghatgi Reviewed-by: Klaus Jensen [k.jensen: changed the error message to be more generic] Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 13 +++++++++++++ hw/nvme/trace-events | 1 + 2 files changed, 14 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 90e3ee2b70..ead7531bde 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -623,6 +623,10 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) return NVME_INVALID_USE_OF_CMB | NVME_DNR; } + if (sg->iov.niov + 1 > IOV_MAX) { + goto max_mappings_exceeded; + } + if (cmb) { return nvme_map_addr_cmb(n, &sg->iov, addr, len); } else { @@ -634,9 +638,18 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) return NVME_INVALID_USE_OF_CMB | NVME_DNR; } + if (sg->qsg.nsg + 1 > IOV_MAX) { + goto max_mappings_exceeded; + } + qemu_sglist_add(&sg->qsg, addr, len); return NVME_SUCCESS; + +max_mappings_exceeded: + NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings, + "number of mappings exceed 1024"); + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; } static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index f9a1f14e26..430eeb395b 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -199,3 +199,4 @@ pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion qu pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" +pci_nvme_ub_too_many_mappings(void) "too many prp/sgl mappings" From 51e90178f710d64400f8d01035dc7e4f4f9cb9da Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Fri, 18 Jun 2021 16:04:31 +0530 Subject: [PATCH 05/11] tests/qtest/nvme-test: add persistent memory region test This will test the PMR functionality. Signed-off-by: Gollu Appalanaidu Reviewed-by: Klaus Jensen [k.jensen: replaced memory-backend-file with memory-backend-ram] Signed-off-by: Klaus Jensen --- tests/qtest/nvme-test.c | 61 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index d32c953a38..47e757d7e2 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -13,6 +13,7 @@ #include "libqos/libqtest.h" #include "libqos/qgraph.h" #include "libqos/pci.h" +#include "include/block/nvme.h" typedef struct QNvme QNvme; @@ -66,12 +67,65 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator *alloc) +{ + QNvme *nvme = obj; + QPCIDevice *pdev = &nvme->dev; + QPCIBar pmr_bar, nvme_bar; + uint32_t pmrcap, pmrsts; + + qpci_device_enable(pdev); + pmr_bar = qpci_iomap(pdev, 4, NULL); + + /* Without Enabling PMRCTL check bar enablemet */ + qpci_io_writel(pdev, pmr_bar, 0, 0xccbbaa99); + g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x99); + g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0xaa99); + + /* Map NVMe Bar Register to Enable the Mem Region */ + nvme_bar = qpci_iomap(pdev, 0, NULL); + + pmrcap = qpci_io_readl(pdev, nvme_bar, 0xe00); + g_assert_cmpint(NVME_PMRCAP_RDS(pmrcap), ==, 0x1); + g_assert_cmpint(NVME_PMRCAP_WDS(pmrcap), ==, 0x1); + g_assert_cmpint(NVME_PMRCAP_BIR(pmrcap), ==, 0x4); + g_assert_cmpint(NVME_PMRCAP_PMRWBM(pmrcap), ==, 0x2); + g_assert_cmpint(NVME_PMRCAP_CMSS(pmrcap), ==, 0x1); + + /* Enable PMRCTRL */ + qpci_io_writel(pdev, nvme_bar, 0xe04, 0x1); + + qpci_io_writel(pdev, pmr_bar, 0, 0x44332211); + g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), ==, 0x11); + g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), ==, 0x2211); + g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), ==, 0x44332211); + + pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08); + g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x0); + + /* Disable PMRCTRL */ + qpci_io_writel(pdev, nvme_bar, 0xe04, 0x0); + + qpci_io_writel(pdev, pmr_bar, 0, 0x88776655); + g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x55); + g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0x6655); + g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), !=, 0x88776655); + + pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08); + g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x1); + + qpci_iounmap(pdev, nvme_bar); + qpci_iounmap(pdev, pmr_bar); +} + static void nvme_register_nodes(void) { QOSGraphEdgeOptions opts = { .extra_device_opts = "addr=04.0,drive=drv0,serial=foo", .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," - "file.read-zeroes=on,format=raw", + "file.read-zeroes=on,format=raw " + "-object memory-backend-ram,id=pmr0," + "share=on,size=8", }; add_qpci_address(&opts, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) }); @@ -83,6 +137,11 @@ static void nvme_register_nodes(void) qos_add_test("oob-cmb-access", "nvme", nvmetest_oob_cmb_test, &(QOSGraphTestOptions) { .edge.extra_device_opts = "cmb_size_mb=2" }); + + qos_add_test("pmr-test-access", "nvme", nvmetest_pmr_reg_test, + &(QOSGraphTestOptions) { + .edge.extra_device_opts = "pmrdev=pmr0" + }); } libqos_init(nvme_register_nodes); From 5ffbaeed164da1a87619a3abfadee0c7d63ea1c4 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 23 Apr 2021 18:55:11 +0200 Subject: [PATCH 06/11] hw/nvme: fix controller hot unplugging Prior to this patch the nvme-ns devices are always children of the NvmeBus owned by the NvmeCtrl. This causes the namespaces to be unrealized when the parent device is removed. However, when subsystems are involved, this is not what we want since the namespaces may be attached to other controllers as well. This patch adds an additional NvmeBus on the subsystem device. When nvme-ns devices are realized, if the parent controller device is linked to a subsystem, the parent bus is set to the subsystem one instead. This makes sure that namespaces are kept alive and not unrealized. Reviewed-by: Hannes Reinecke Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 14 ++++++-------- hw/nvme/ns.c | 18 ++++++++++++++++++ hw/nvme/nvme.h | 15 ++++++++------- hw/nvme/subsys.c | 3 +++ 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index ead7531bde..2f0524e12a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6527,16 +6527,14 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n); - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; + if (n->subsys) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + ns->attached--; + } } - nvme_ns_cleanup(ns); - } - - if (n->subsys) { nvme_subsys_unregister_ctrl(n->subsys, n); } diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 3c4f5b8c71..b7cf1494e7 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -441,6 +441,15 @@ void nvme_ns_cleanup(NvmeNamespace *ns) } } +static void nvme_ns_unrealize(DeviceState *dev) +{ + NvmeNamespace *ns = NVME_NS(dev); + + nvme_ns_drain(ns); + nvme_ns_shutdown(ns); + nvme_ns_cleanup(ns); +} + static void nvme_ns_realize(DeviceState *dev, Error **errp) { NvmeNamespace *ns = NVME_NS(dev); @@ -462,6 +471,14 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) "linked to an nvme-subsys device"); return; } + } else { + /* + * If this namespace belongs to a subsystem (through a link on the + * controller device), reparent the device. + */ + if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { + return; + } } if (nvme_ns_setup(ns, errp)) { @@ -552,6 +569,7 @@ static void nvme_ns_class_init(ObjectClass *oc, void *data) dc->bus_type = TYPE_NVME_BUS; dc->realize = nvme_ns_realize; + dc->unrealize = nvme_ns_unrealize; device_class_set_props(dc, nvme_ns_props); dc->desc = "Virtual NVMe namespace"; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index c4065467d8..83ffabade4 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -33,12 +33,20 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); typedef struct NvmeCtrl NvmeCtrl; typedef struct NvmeNamespace NvmeNamespace; +#define TYPE_NVME_BUS "nvme-bus" +OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; + #define TYPE_NVME_SUBSYS "nvme-subsys" #define NVME_SUBSYS(obj) \ OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) typedef struct NvmeSubsystem { DeviceState parent_obj; + NvmeBus bus; uint8_t subnqn[256]; NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; @@ -365,13 +373,6 @@ typedef struct NvmeCQueue { QTAILQ_HEAD(, NvmeRequest) req_list; } NvmeCQueue; -#define TYPE_NVME_BUS "nvme-bus" -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) - -typedef struct NvmeBus { - BusState parent_bus; -} NvmeBus; - #define TYPE_NVME "nvme" #define NVME(obj) \ OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 92caa604a2..93c35950d6 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -50,6 +50,9 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp) { NvmeSubsystem *subsys = NVME_SUBSYS(dev); + qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, + dev->id); + nvme_subsys_setup(subsys); } From 5d45edbeac143c0a18d82efde92cc5e22c4dc021 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 13 Jul 2021 14:34:52 +0200 Subject: [PATCH 07/11] hw/nvme: split pmrmsc register into upper and lower The specification uses a set of 32 bit PMRMSCL and PMRMSCU registers to make up the 64 bit logical PMRMSC register. Make it so. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 10 ++++++---- include/block/nvme.h | 31 ++++++++++++++++--------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2f0524e12a..070d9f6a96 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5916,11 +5916,13 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; } - n->bar.pmrmsc = (n->bar.pmrmsc & ~0xffffffff) | (data & 0xffffffff); + n->bar.pmrmscl = data; n->pmr.cmse = false; - if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) { - hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT; + if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) { + uint64_t pmrmscu = n->bar.pmrmscu; + hwaddr cba = (pmrmscu << 32) | + (NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT); if (cba + int128_get64(n->pmr.dev->mr.size) < cba) { NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1); return; @@ -5936,7 +5938,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; } - n->bar.pmrmsc = (n->bar.pmrmsc & 0xffffffff) | (data << 32); + n->bar.pmrmscu = data; return; default: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, diff --git a/include/block/nvme.h b/include/block/nvme.h index 527105fafc..84053b68b9 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -26,7 +26,8 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t pmrsts; uint32_t pmrebs; uint32_t pmrswtp; - uint64_t pmrmsc; + uint32_t pmrmscl; + uint32_t pmrmscu; uint8_t css[484]; } NvmeBar; @@ -475,25 +476,25 @@ enum NvmePmrswtpMask { #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) -enum NvmePmrmscShift { - PMRMSC_CMSE_SHIFT = 1, - PMRMSC_CBA_SHIFT = 12, +enum NvmePmrmsclShift { + PMRMSCL_CMSE_SHIFT = 1, + PMRMSCL_CBA_SHIFT = 12, }; -enum NvmePmrmscMask { - PMRMSC_CMSE_MASK = 0x1, - PMRMSC_CBA_MASK = 0xfffffffffffff, +enum NvmePmrmsclMask { + PMRMSCL_CMSE_MASK = 0x1, + PMRMSCL_CBA_MASK = 0xfffff, }; -#define NVME_PMRMSC_CMSE(pmrmsc) \ - ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) -#define NVME_PMRMSC_CBA(pmrmsc) \ - ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) +#define NVME_PMRMSCL_CMSE(pmrmscl) \ + ((pmrmscl >> PMRMSCL_CMSE_SHIFT) & PMRMSCL_CMSE_MASK) +#define NVME_PMRMSCL_CBA(pmrmscl) \ + ((pmrmscl >> PMRMSCL_CBA_SHIFT) & PMRMSCL_CBA_MASK) -#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ - (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) -#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ - (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) +#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val) \ + (pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT) +#define NVME_PMRMSCL_SET_CBA(pmrmscl, val) \ + (pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT) enum NvmeSglDescriptorType { NVME_SGL_DESCR_TYPE_DATA_BLOCK = 0x0, From a316aa50e6c9f25c22a705000271d33620a40595 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 13 Jul 2021 16:29:59 +0200 Subject: [PATCH 08/11] hw/nvme: use symbolic names for registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the NvmeBarRegs enum and use these instead of explicit register offsets. Signed-off-by: Klaus Jensen Reviewed-by: Gollu Appalanaidu Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 44 ++++++++++++++++++++++---------------------- include/block/nvme.h | 29 ++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 070d9f6a96..23ff71f65c 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5740,7 +5740,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } switch (offset) { - case 0xc: /* INTMS */ + case NVME_REG_INTMS: if (unlikely(msix_enabled(&(n->parent_obj)))) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask set" @@ -5752,7 +5752,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc); nvme_irq_check(n); break; - case 0x10: /* INTMC */ + case NVME_REG_INTMC: if (unlikely(msix_enabled(&(n->parent_obj)))) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask clr" @@ -5764,7 +5764,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc); nvme_irq_check(n); break; - case 0x14: /* CC */ + case NVME_REG_CC: trace_pci_nvme_mmio_cfg(data & 0xffffffff); /* Windows first sends data, then sends enable bit */ if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && @@ -5798,7 +5798,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->bar.cc = data; } break; - case 0x1c: /* CSTS */ + case NVME_REG_CSTS: if (data & (1 << 4)) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, "attempted to W1C CSTS.NSSRO" @@ -5809,7 +5809,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, " of controller status"); } break; - case 0x20: /* NSSR */ + case NVME_REG_NSSR: if (data == 0x4e564d65) { trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { @@ -5817,38 +5817,38 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; } break; - case 0x24: /* AQA */ + case NVME_REG_AQA: n->bar.aqa = data & 0xffffffff; trace_pci_nvme_mmio_aqattr(data & 0xffffffff); break; - case 0x28: /* ASQ */ + case NVME_REG_ASQ: n->bar.asq = size == 8 ? data : (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff); trace_pci_nvme_mmio_asqaddr(data); break; - case 0x2c: /* ASQ hi */ + case NVME_REG_ASQ + 4: n->bar.asq = (n->bar.asq & 0xffffffff) | (data << 32); trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); break; - case 0x30: /* ACQ */ + case NVME_REG_ACQ: trace_pci_nvme_mmio_acqaddr(data); n->bar.acq = size == 8 ? data : (n->bar.acq & ~0xffffffffULL) | (data & 0xffffffff); break; - case 0x34: /* ACQ hi */ + case NVME_REG_ACQ + 4: n->bar.acq = (n->bar.acq & 0xffffffff) | (data << 32); trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); break; - case 0x38: /* CMBLOC */ + case NVME_REG_CMBLOC: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, "invalid write to reserved CMBLOC" " when CMBSZ is zero, ignored"); return; - case 0x3C: /* CMBSZ */ + case NVME_REG_CMBSZ: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; - case 0x50: /* CMBMSC */ + case NVME_REG_CMBMSC: if (!NVME_CAP_CMBS(n->bar.cap)) { return; } @@ -5876,15 +5876,15 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; - case 0x54: /* CMBMSC hi */ + case NVME_REG_CMBMSC + 4: n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32); return; - case 0xe00: /* PMRCAP */ + case NVME_REG_PMRCAP: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; - case 0xe04: /* PMRCTL */ + case NVME_REG_PMRCTL: if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5899,19 +5899,19 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->pmr.cmse = false; } return; - case 0xe08: /* PMRSTS */ + case NVME_REG_PMRSTS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; - case 0xe0C: /* PMREBS */ + case NVME_REG_PMREBS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; - case 0xe10: /* PMRSWTP */ + case NVME_REG_PMRSWTP: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; - case 0xe14: /* PMRMSCL */ + case NVME_REG_PMRMSCL: if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5933,7 +5933,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; - case 0xe18: /* PMRMSCU */ + case NVME_REG_PMRMSCU: if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5975,7 +5975,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) * from PMRSTS should ensure prior writes * made it to persistent media */ - if (addr == 0xe08 && + if (addr == NVME_REG_PMRSTS && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } diff --git a/include/block/nvme.h b/include/block/nvme.h index 84053b68b9..77aae01174 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -9,7 +9,7 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t cc; uint8_t rsvd24[4]; uint32_t csts; - uint32_t nssrc; + uint32_t nssr; uint32_t aqa; uint64_t asq; uint64_t acq; @@ -31,6 +31,33 @@ typedef struct QEMU_PACKED NvmeBar { uint8_t css[484]; } NvmeBar; +enum NvmeBarRegs { + NVME_REG_CAP = offsetof(NvmeBar, cap), + NVME_REG_VS = offsetof(NvmeBar, vs), + NVME_REG_INTMS = offsetof(NvmeBar, intms), + NVME_REG_INTMC = offsetof(NvmeBar, intmc), + NVME_REG_CC = offsetof(NvmeBar, cc), + NVME_REG_CSTS = offsetof(NvmeBar, csts), + NVME_REG_NSSR = offsetof(NvmeBar, nssr), + NVME_REG_AQA = offsetof(NvmeBar, aqa), + NVME_REG_ASQ = offsetof(NvmeBar, asq), + NVME_REG_ACQ = offsetof(NvmeBar, acq), + NVME_REG_CMBLOC = offsetof(NvmeBar, cmbloc), + NVME_REG_CMBSZ = offsetof(NvmeBar, cmbsz), + NVME_REG_BPINFO = offsetof(NvmeBar, bpinfo), + NVME_REG_BPRSEL = offsetof(NvmeBar, bprsel), + NVME_REG_BPMBL = offsetof(NvmeBar, bpmbl), + NVME_REG_CMBMSC = offsetof(NvmeBar, cmbmsc), + NVME_REG_CMBSTS = offsetof(NvmeBar, cmbsts), + NVME_REG_PMRCAP = offsetof(NvmeBar, pmrcap), + NVME_REG_PMRCTL = offsetof(NvmeBar, pmrctl), + NVME_REG_PMRSTS = offsetof(NvmeBar, pmrsts), + NVME_REG_PMREBS = offsetof(NvmeBar, pmrebs), + NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp), + NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl), + NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu), +}; + enum NvmeCapShift { CAP_MQES_SHIFT = 0, CAP_CQR_SHIFT = 16, From 5029de44b5352d466f1b6e7c0a9f19e1259d33b3 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 13 Jul 2021 19:24:04 +0200 Subject: [PATCH 09/11] hw/nvme: fix out-of-bounds reads Peter noticed that mmio access may read into the NvmeParams member in the NvmeCtrl struct. Fix the bounds check. Reported-by: Peter Maydell Signed-off-by: Klaus Jensen Reviewed-by: Stefan Hajnoczi Reviewed-by: Peter Maydell --- hw/nvme/ctrl.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 23ff71f65c..10c2363c1d 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5969,23 +5969,26 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) /* should RAZ, fall through for now */ } - if (addr < sizeof(n->bar)) { - /* - * When PMRWBM bit 1 is set then read from - * from PMRSTS should ensure prior writes - * made it to persistent media - */ - if (addr == NVME_REG_PMRSTS && - (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { - memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); - } - memcpy(&val, ptr + addr, size); - } else { + if (addr > sizeof(n->bar) - size) { NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, "MMIO read beyond last register," " offset=0x%"PRIx64", returning 0", addr); + + return 0; } + /* + * When PMRWBM bit 1 is set then read from + * from PMRSTS should ensure prior writes + * made it to persistent media + */ + if (addr == NVME_REG_PMRSTS && + (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { + memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); + } + + memcpy(&val, ptr + addr, size); + return val; } From 49e03457f1d7dfd2a3fd6affc32d8b69f066fb75 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 13 Jul 2021 19:31:27 +0200 Subject: [PATCH 10/11] hw/nvme: fix mmio read The new PMR test unearthed a long-standing issue with MMIO reads on big-endian hosts. Fix this by unconditionally storing all controller registers in little endian. Cc: Gollu Appalanaidu Reported-by: Peter Maydell Signed-off-by: Klaus Jensen Reviewed-by: Peter Maydell --- hw/nvme/ctrl.c | 291 +++++++++++++++++++++++++++---------------------- 1 file changed, 162 insertions(+), 129 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 10c2363c1d..43dfaeac9f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq) static void nvme_irq_check(NvmeCtrl *n) { + uint32_t intms = ldl_le_p(&n->bar.intms); + if (msix_enabled(&(n->parent_obj))) { return; } - if (~n->bar.intms & n->irq_status) { + if (~intms & n->irq_status) { pci_irq_assert(&n->parent_obj); } else { pci_irq_deassert(&n->parent_obj); @@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque) if (ret) { trace_pci_nvme_err_addr_write(addr); trace_pci_nvme_err_cfs(); - n->bar.csts = NVME_CSTS_FAILED; + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); break; } QTAILQ_REMOVE(&cq->req_list, req, entry); @@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) { trace_pci_nvme_err_invalid_create_sq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } @@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return NVME_INVALID_FIELD | NVME_DNR; } - switch (NVME_CC_CSS(n->bar.cc)) { + switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) { case NVME_CC_CSS_NVM: src_iocs = nvme_cse_iocs_nvm; /* fall through */ @@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_QID | NVME_DNR; } - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) { trace_pci_nvme_err_invalid_create_cq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } @@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n) static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) { + uint32_t cc = ldl_le_p(&n->bar.cc); + ns->iocs = nvme_cse_iocs_none; switch (ns->csi) { case NVME_CSI_NVM: - if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { + if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) { ns->iocs = nvme_cse_iocs_nvm; } break; case NVME_CSI_ZONED: - if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { + if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) { ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { + } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) { ns->iocs = nvme_cse_iocs_nvm; } break; @@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque) if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) { trace_pci_nvme_err_addr_read(addr); trace_pci_nvme_err_cfs(); - n->bar.csts = NVME_CSTS_FAILED; + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); break; } nvme_inc_sq_head(sq); @@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n) n->aer_queued = 0; n->outstanding_aers = 0; n->qs_created = false; - - n->bar.cc = 0; } static void nvme_ctrl_shutdown(NvmeCtrl *n) @@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n) static int nvme_start_ctrl(NvmeCtrl *n) { - uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; + uint64_t cap = ldq_le_p(&n->bar.cap); + uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t aqa = ldl_le_p(&n->bar.aqa); + uint64_t asq = ldq_le_p(&n->bar.asq); + uint64_t acq = ldq_le_p(&n->bar.acq); + uint32_t page_bits = NVME_CC_MPS(cc) + 12; uint32_t page_size = 1 << page_bits; if (unlikely(n->cq[0])) { @@ -5616,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n) trace_pci_nvme_err_startfail_sq(); return -1; } - if (unlikely(!n->bar.asq)) { + if (unlikely(!asq)) { trace_pci_nvme_err_startfail_nbarasq(); return -1; } - if (unlikely(!n->bar.acq)) { + if (unlikely(!acq)) { trace_pci_nvme_err_startfail_nbaracq(); return -1; } - if (unlikely(n->bar.asq & (page_size - 1))) { - trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq); + if (unlikely(asq & (page_size - 1))) { + trace_pci_nvme_err_startfail_asq_misaligned(asq); return -1; } - if (unlikely(n->bar.acq & (page_size - 1))) { - trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq); + if (unlikely(acq & (page_size - 1))) { + trace_pci_nvme_err_startfail_acq_misaligned(acq); return -1; } - if (unlikely(!(NVME_CAP_CSS(n->bar.cap) & (1 << NVME_CC_CSS(n->bar.cc))))) { - trace_pci_nvme_err_startfail_css(NVME_CC_CSS(n->bar.cc)); + if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) { + trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc)); return -1; } - if (unlikely(NVME_CC_MPS(n->bar.cc) < - NVME_CAP_MPSMIN(n->bar.cap))) { + if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) { trace_pci_nvme_err_startfail_page_too_small( - NVME_CC_MPS(n->bar.cc), - NVME_CAP_MPSMIN(n->bar.cap)); + NVME_CC_MPS(cc), + NVME_CAP_MPSMIN(cap)); return -1; } - if (unlikely(NVME_CC_MPS(n->bar.cc) > - NVME_CAP_MPSMAX(n->bar.cap))) { + if (unlikely(NVME_CC_MPS(cc) > + NVME_CAP_MPSMAX(cap))) { trace_pci_nvme_err_startfail_page_too_large( - NVME_CC_MPS(n->bar.cc), - NVME_CAP_MPSMAX(n->bar.cap)); + NVME_CC_MPS(cc), + NVME_CAP_MPSMAX(cap)); return -1; } - if (unlikely(NVME_CC_IOCQES(n->bar.cc) < + if (unlikely(NVME_CC_IOCQES(cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { trace_pci_nvme_err_startfail_cqent_too_small( - NVME_CC_IOCQES(n->bar.cc), - NVME_CTRL_CQES_MIN(n->bar.cap)); + NVME_CC_IOCQES(cc), + NVME_CTRL_CQES_MIN(cap)); return -1; } - if (unlikely(NVME_CC_IOCQES(n->bar.cc) > + if (unlikely(NVME_CC_IOCQES(cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { trace_pci_nvme_err_startfail_cqent_too_large( - NVME_CC_IOCQES(n->bar.cc), - NVME_CTRL_CQES_MAX(n->bar.cap)); + NVME_CC_IOCQES(cc), + NVME_CTRL_CQES_MAX(cap)); return -1; } - if (unlikely(NVME_CC_IOSQES(n->bar.cc) < + if (unlikely(NVME_CC_IOSQES(cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { trace_pci_nvme_err_startfail_sqent_too_small( - NVME_CC_IOSQES(n->bar.cc), - NVME_CTRL_SQES_MIN(n->bar.cap)); + NVME_CC_IOSQES(cc), + NVME_CTRL_SQES_MIN(cap)); return -1; } - if (unlikely(NVME_CC_IOSQES(n->bar.cc) > + if (unlikely(NVME_CC_IOSQES(cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { trace_pci_nvme_err_startfail_sqent_too_large( - NVME_CC_IOSQES(n->bar.cc), - NVME_CTRL_SQES_MAX(n->bar.cap)); + NVME_CC_IOSQES(cc), + NVME_CTRL_SQES_MAX(cap)); return -1; } - if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { + if (unlikely(!NVME_AQA_ASQS(aqa))) { trace_pci_nvme_err_startfail_asqent_sz_zero(); return -1; } - if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { + if (unlikely(!NVME_AQA_ACQS(aqa))) { trace_pci_nvme_err_startfail_acqent_sz_zero(); return -1; } @@ -5690,12 +5696,10 @@ static int nvme_start_ctrl(NvmeCtrl *n) n->page_bits = page_bits; n->page_size = page_size; n->max_prp_ents = n->page_size / sizeof(uint64_t); - n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc); - n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc); - nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0, - NVME_AQA_ACQS(n->bar.aqa) + 1, 1); - nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, - NVME_AQA_ASQS(n->bar.aqa) + 1); + n->cqe_size = 1 << NVME_CC_IOCQES(cc); + n->sqe_size = 1 << NVME_CC_IOSQES(cc); + nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1); + nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1); nvme_set_timestamp(n, 0ULL); @@ -5708,22 +5712,33 @@ static int nvme_start_ctrl(NvmeCtrl *n) static void nvme_cmb_enable_regs(NvmeCtrl *n) { - NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1); - NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1); - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR); + uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc); + uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz); - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + NVME_CMBLOC_SET_CDPCILS(cmbloc, 1); + NVME_CMBLOC_SET_CDPMLS(cmbloc, 1); + NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR); + stl_le_p(&n->bar.cmbloc, cmbloc); + + NVME_CMBSZ_SET_SQS(cmbsz, 1); + NVME_CMBSZ_SET_CQS(cmbsz, 0); + NVME_CMBSZ_SET_LISTS(cmbsz, 1); + NVME_CMBSZ_SET_RDS(cmbsz, 1); + NVME_CMBSZ_SET_WDS(cmbsz, 1); + NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb); + stl_le_p(&n->bar.cmbsz, cmbsz); } static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, unsigned size) { + uint64_t cap = ldq_le_p(&n->bar.cap); + uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t intms = ldl_le_p(&n->bar.intms); + uint32_t csts = ldl_le_p(&n->bar.csts); + uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts); + if (unlikely(offset & (sizeof(uint32_t) - 1))) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32, "MMIO write not 32-bit aligned," @@ -5747,9 +5762,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, " when MSI-X is enabled"); /* should be ignored, fall through for now */ } - n->bar.intms |= data & 0xffffffff; + intms |= data; + stl_le_p(&n->bar.intms, intms); n->bar.intmc = n->bar.intms; - trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc); + trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms); nvme_irq_check(n); break; case NVME_REG_INTMC: @@ -5759,44 +5775,55 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, " when MSI-X is enabled"); /* should be ignored, fall through for now */ } - n->bar.intms &= ~(data & 0xffffffff); + intms &= ~data; + stl_le_p(&n->bar.intms, intms); n->bar.intmc = n->bar.intms; - trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc); + trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms); nvme_irq_check(n); break; case NVME_REG_CC: trace_pci_nvme_mmio_cfg(data & 0xffffffff); + /* Windows first sends data, then sends enable bit */ - if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && - !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) + if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) && + !NVME_CC_SHN(data) && !NVME_CC_SHN(cc)) { - n->bar.cc = data; + cc = data; } - if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { - n->bar.cc = data; + if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) { + cc = data; + + /* flush CC since nvme_start_ctrl() needs the value */ + stl_le_p(&n->bar.cc, cc); if (unlikely(nvme_start_ctrl(n))) { trace_pci_nvme_err_startfail(); - n->bar.csts = NVME_CSTS_FAILED; + csts = NVME_CSTS_FAILED; } else { trace_pci_nvme_mmio_start_success(); - n->bar.csts = NVME_CSTS_READY; + csts = NVME_CSTS_READY; } - } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { + } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) { trace_pci_nvme_mmio_stopped(); nvme_ctrl_reset(n); - n->bar.csts &= ~NVME_CSTS_READY; + cc = 0; + csts &= ~NVME_CSTS_READY; } - if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { + + if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) { trace_pci_nvme_mmio_shutdown_set(); nvme_ctrl_shutdown(n); - n->bar.cc = data; - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; - } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { + cc = data; + csts |= NVME_CSTS_SHST_COMPLETE; + } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) { trace_pci_nvme_mmio_shutdown_cleared(); - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; - n->bar.cc = data; + csts &= ~NVME_CSTS_SHST_COMPLETE; + cc = data; } + + stl_le_p(&n->bar.cc, cc); + stl_le_p(&n->bar.csts, csts); + break; case NVME_REG_CSTS: if (data & (1 << 4)) { @@ -5818,26 +5845,24 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } break; case NVME_REG_AQA: - n->bar.aqa = data & 0xffffffff; + stl_le_p(&n->bar.aqa, data); trace_pci_nvme_mmio_aqattr(data & 0xffffffff); break; case NVME_REG_ASQ: - n->bar.asq = size == 8 ? data : - (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff); + stn_le_p(&n->bar.asq, size, data); trace_pci_nvme_mmio_asqaddr(data); break; case NVME_REG_ASQ + 4: - n->bar.asq = (n->bar.asq & 0xffffffff) | (data << 32); - trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); + stl_le_p((uint8_t *)&n->bar.asq + 4, data); + trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq)); break; case NVME_REG_ACQ: trace_pci_nvme_mmio_acqaddr(data); - n->bar.acq = size == 8 ? data : - (n->bar.acq & ~0xffffffffULL) | (data & 0xffffffff); + stn_le_p(&n->bar.acq, size, data); break; case NVME_REG_ACQ + 4: - n->bar.acq = (n->bar.acq & 0xffffffff) | (data << 32); - trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); + stl_le_p((uint8_t *)&n->bar.acq + 4, data); + trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq)); break; case NVME_REG_CMBLOC: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, @@ -5849,21 +5874,23 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, "invalid write to read only CMBSZ, ignored"); return; case NVME_REG_CMBMSC: - if (!NVME_CAP_CMBS(n->bar.cap)) { + if (!NVME_CAP_CMBS(cap)) { return; } - n->bar.cmbmsc = size == 8 ? data : - (n->bar.cmbmsc & ~0xffffffff) | (data & 0xffffffff); + stn_le_p(&n->bar.cmbmsc, size, data); n->cmb.cmse = false; if (NVME_CMBMSC_CRE(data)) { nvme_cmb_enable_regs(n); if (NVME_CMBMSC_CMSE(data)) { - hwaddr cba = NVME_CMBMSC_CBA(data) << CMBMSC_CBA_SHIFT; + uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc); + hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT; if (cba + int128_get64(n->cmb.mem.size) < cba) { - NVME_CMBSTS_SET_CBAI(n->bar.cmbsts, 1); + uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts); + NVME_CMBSTS_SET_CBAI(cmbsts, 1); + stl_le_p(&n->bar.cmbsts, cmbsts); return; } @@ -5877,7 +5904,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; case NVME_REG_CMBMSC + 4: - n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32); + stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data); return; case NVME_REG_PMRCAP: @@ -5885,19 +5912,20 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, "invalid write to PMRCAP register, ignored"); return; case NVME_REG_PMRCTL: - if (!NVME_CAP_PMRS(n->bar.cap)) { + if (!NVME_CAP_PMRS(cap)) { return; } - n->bar.pmrctl = data; + stl_le_p(&n->bar.pmrctl, data); if (NVME_PMRCTL_EN(data)) { memory_region_set_enabled(&n->pmr.dev->mr, true); - n->bar.pmrsts = 0; + pmrsts = 0; } else { memory_region_set_enabled(&n->pmr.dev->mr, false); - NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 1); + NVME_PMRSTS_SET_NRDY(pmrsts, 1); n->pmr.cmse = false; } + stl_le_p(&n->bar.pmrsts, pmrsts); return; case NVME_REG_PMRSTS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, @@ -5912,19 +5940,20 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, "invalid write to PMRSWTP register, ignored"); return; case NVME_REG_PMRMSCL: - if (!NVME_CAP_PMRS(n->bar.cap)) { + if (!NVME_CAP_PMRS(cap)) { return; } - n->bar.pmrmscl = data; + stl_le_p(&n->bar.pmrmscl, data); n->pmr.cmse = false; - if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) { - uint64_t pmrmscu = n->bar.pmrmscu; - hwaddr cba = (pmrmscu << 32) | - (NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT); + if (NVME_PMRMSCL_CMSE(data)) { + uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu); + hwaddr cba = pmrmscu << 32 | + (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT); if (cba + int128_get64(n->pmr.dev->mr.size) < cba) { - NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1); + NVME_PMRSTS_SET_CBAI(pmrsts, 1); + stl_le_p(&n->bar.pmrsts, pmrsts); return; } @@ -5934,11 +5963,11 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; case NVME_REG_PMRMSCU: - if (!NVME_CAP_PMRS(n->bar.cap)) { + if (!NVME_CAP_PMRS(cap)) { return; } - n->bar.pmrmscu = data; + stl_le_p(&n->bar.pmrmscu, data); return; default: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, @@ -5953,7 +5982,6 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) { NvmeCtrl *n = (NvmeCtrl *)opaque; uint8_t *ptr = (uint8_t *)&n->bar; - uint64_t val = 0; trace_pci_nvme_mmio_read(addr, size); @@ -5983,13 +6011,11 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) * made it to persistent media */ if (addr == NVME_REG_PMRSTS && - (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { + (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) { memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } - memcpy(&val, ptr + addr, size); - - return val; + return ldn_le_p(ptr + addr, size); } static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) @@ -6247,6 +6273,7 @@ static void nvme_init_state(NvmeCtrl *n) static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) { uint64_t cmb_size = n->params.cmb_size_mb * MiB; + uint64_t cap = ldq_le_p(&n->bar.cap); n->cmb.buf = g_malloc0(cmb_size); memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n, @@ -6256,7 +6283,8 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem); - NVME_CAP_SET_CMBS(n->bar.cap, 1); + NVME_CAP_SET_CMBS(cap, 1); + stq_le_p(&n->bar.cap, cap); if (n->params.legacy_cmb) { nvme_cmb_enable_regs(n); @@ -6266,14 +6294,17 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) { - NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 1); - NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 1); - NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR); - /* Turn on bit 1 support */ - NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); - NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 1); + uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap); - pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), + NVME_PMRCAP_SET_RDS(pmrcap, 1); + NVME_PMRCAP_SET_WDS(pmrcap, 1); + NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR); + /* Turn on bit 1 support */ + NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02); + NVME_PMRCAP_SET_CMSS(pmrcap, 1); + stl_le_p(&n->bar.pmrcap, pmrcap); + + pci_register_bar(pci_dev, NVME_PMR_BIR, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr); @@ -6363,6 +6394,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) { NvmeIdCtrl *id = &n->id_ctrl; uint8_t *pci_conf = pci_dev->config; + uint64_t cap = ldq_le_p(&n->bar.cap); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -6441,17 +6473,18 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cmic |= NVME_CMIC_MULTI_CTRL; } - NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); - NVME_CAP_SET_CQR(n->bar.cap, 1); - NVME_CAP_SET_TO(n->bar.cap, 0xf); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_NVM); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_CSI_SUPP); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_ADMIN_ONLY); - NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - NVME_CAP_SET_CMBS(n->bar.cap, n->params.cmb_size_mb ? 1 : 0); - NVME_CAP_SET_PMRS(n->bar.cap, n->pmr.dev ? 1 : 0); + NVME_CAP_SET_MQES(cap, 0x7ff); + NVME_CAP_SET_CQR(cap, 1); + NVME_CAP_SET_TO(cap, 0xf); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY); + NVME_CAP_SET_MPSMAX(cap, 4); + NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0); + NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0); + stq_le_p(&n->bar.cap, cap); - n->bar.vs = NVME_SPEC_VER; + stl_le_p(&n->bar.vs, NVME_SPEC_VER); n->bar.intmc = n->bar.intms = 0; } @@ -6602,7 +6635,7 @@ static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA; - if (NVME_CAP_PMRS(n->bar.cap)) { + if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) { cap |= NVME_SMART_PMR_UNRELIABLE; } From 9631a8ab21679e3d605f7f540dd8c692b9593e02 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 13 Jul 2021 19:33:14 +0200 Subject: [PATCH 11/11] tests/qtest/nvme-test: add mmio read test Add a regression test for mmio read on big-endian hosts. Signed-off-by: Klaus Jensen Reviewed-by: Gollu Appalanaidu --- tests/qtest/nvme-test.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index 47e757d7e2..f8bafb5d70 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -67,6 +67,30 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_reg_read_test(void *obj, void *data, QGuestAllocator *alloc) +{ + QNvme *nvme = obj; + QPCIDevice *pdev = &nvme->dev; + QPCIBar bar; + uint32_t cap_lo, cap_hi; + uint64_t cap; + + qpci_device_enable(pdev); + bar = qpci_iomap(pdev, 0, NULL); + + cap_lo = qpci_io_readl(pdev, bar, 0x0); + g_assert_cmpint(NVME_CAP_MQES(cap_lo), ==, 0x7ff); + + cap_hi = qpci_io_readl(pdev, bar, 0x4); + g_assert_cmpint(NVME_CAP_MPSMAX((uint64_t)cap_hi << 32), ==, 0x4); + + cap = qpci_io_readq(pdev, bar, 0x0); + g_assert_cmpint(NVME_CAP_MQES(cap), ==, 0x7ff); + g_assert_cmpint(NVME_CAP_MPSMAX(cap), ==, 0x4); + + qpci_iounmap(pdev, bar); +} + static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator *alloc) { QNvme *nvme = obj; @@ -142,6 +166,8 @@ static void nvme_register_nodes(void) &(QOSGraphTestOptions) { .edge.extra_device_opts = "pmrdev=pmr0" }); + + qos_add_test("reg-read", "nvme", nvmetest_reg_read_test, NULL); } libqos_init(nvme_register_nodes);