diff --git a/MAINTAINERS b/MAINTAINERS index 515316f7b2..eab178aeee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1956,7 +1956,7 @@ M: Keith Busch M: Klaus Jensen L: qemu-block@nongnu.org S: Supported -F: hw/block/nvme* +F: hw/nvme/* F: include/block/nvme.h F: tests/qtest/nvme-test.c F: docs/system/nvme.rst diff --git a/hw/Kconfig b/hw/Kconfig index aa10357adf..805860f564 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -21,6 +21,7 @@ source mem/Kconfig source misc/Kconfig source net/Kconfig source nubus/Kconfig +source nvme/Kconfig source nvram/Kconfig source pci-bridge/Kconfig source pci-host/Kconfig diff --git a/hw/block/Kconfig b/hw/block/Kconfig index 4fcd152166..295441e64a 100644 --- a/hw/block/Kconfig +++ b/hw/block/Kconfig @@ -25,11 +25,6 @@ config ONENAND config TC58128 bool -config NVME_PCI - bool - default y if PCI_DEVICES - depends on PCI - config VIRTIO_BLK bool default y diff --git a/hw/block/meson.build b/hw/block/meson.build index 5b4a7699f9..8b0de54db1 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -13,7 +13,6 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c', 'nvme-dif.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h deleted file mode 100644 index 524faffbd7..0000000000 --- a/hw/block/nvme-dif.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * QEMU NVM Express End-to-End Data Protection support - * - * Copyright (c) 2021 Samsung Electronics Co., Ltd. - * - * Authors: - * Klaus Jensen - * Gollu Appalanaidu - */ - -#ifndef HW_NVME_DIF_H -#define HW_NVME_DIF_H - -/* from Linux kernel (crypto/crct10dif_common.c) */ -static const uint16_t t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, - uint32_t reftag); -uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, - uint64_t slba); -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t reftag); -uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t ctrl, - uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t reftag); -uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); - -#endif /* HW_NVME_DIF_H */ diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h deleted file mode 100644 index fb0a41f912..0000000000 --- a/hw/block/nvme-ns.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * QEMU NVM Express Virtual Namespace - * - * Copyright (c) 2019 CNEX Labs - * Copyright (c) 2020 Samsung Electronics - * - * Authors: - * Klaus Jensen - * - * This work is licensed under the terms of the GNU GPL, version 2. See the - * COPYING file in the top-level directory. - * - */ - -#ifndef NVME_NS_H -#define NVME_NS_H - -#include "qemu/uuid.h" - -#define TYPE_NVME_NS "nvme-ns" -#define NVME_NS(obj) \ - OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) - -typedef struct NvmeZone { - NvmeZoneDescr d; - uint64_t w_ptr; - QTAILQ_ENTRY(NvmeZone) entry; -} NvmeZone; - -typedef struct NvmeNamespaceParams { - bool detached; - bool shared; - uint32_t nsid; - QemuUUID uuid; - - uint16_t ms; - uint8_t mset; - uint8_t pi; - uint8_t pil; - - uint16_t mssrl; - uint32_t mcl; - uint8_t msrc; - - bool zoned; - bool cross_zone_read; - uint64_t zone_size_bs; - uint64_t zone_cap_bs; - uint32_t max_active_zones; - uint32_t max_open_zones; - uint32_t zd_extension_size; -} NvmeNamespaceParams; - -typedef struct NvmeNamespace { - DeviceState parent_obj; - BlockConf blkconf; - int32_t bootindex; - int64_t size; - int64_t mdata_offset; - NvmeIdNs id_ns; - const uint32_t *iocs; - uint8_t csi; - uint16_t status; - int attached; - - QTAILQ_ENTRY(NvmeNamespace) entry; - - NvmeIdNsZoned *id_ns_zoned; - NvmeZone *zone_array; - QTAILQ_HEAD(, NvmeZone) exp_open_zones; - QTAILQ_HEAD(, NvmeZone) imp_open_zones; - QTAILQ_HEAD(, NvmeZone) closed_zones; - QTAILQ_HEAD(, NvmeZone) full_zones; - uint32_t num_zones; - uint64_t zone_size; - uint64_t zone_capacity; - uint32_t zone_size_log2; - uint8_t *zd_extensions; - int32_t nr_open_zones; - int32_t nr_active_zones; - - NvmeNamespaceParams params; - - struct { - uint32_t err_rec; - } features; -} NvmeNamespace; - -static inline uint16_t nvme_ns_status(NvmeNamespace *ns) -{ - return ns->status; -} - -static inline uint32_t nvme_nsid(NvmeNamespace *ns) -{ - if (ns) { - return ns->params.nsid; - } - - return 0; -} - -static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns) -{ - NvmeIdNs *id_ns = &ns->id_ns; - return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; -} - -static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns)->ds; -} - -/* convert an LBA to the equivalent in bytes */ -static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) -{ - return lba << nvme_ns_lbads(ns); -} - -static inline size_t nvme_lsize(NvmeNamespace *ns) -{ - return 1 << nvme_ns_lbads(ns); -} - -static inline uint16_t nvme_msize(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns)->ms; -} - -static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) -{ - return nvme_msize(ns) * lba; -} - -static inline bool nvme_ns_ext(NvmeNamespace *ns) -{ - return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); -} - -/* calculate the number of LBAs that the namespace can accomodate */ -static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns) -{ - if (nvme_msize(ns)) { - return ns->size / (nvme_lsize(ns) + nvme_msize(ns)); - } - return ns->size >> nvme_ns_lbads(ns); -} - -typedef struct NvmeCtrl NvmeCtrl; - -static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) -{ - return zone->d.zs >> 4; -} - -static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) -{ - zone->d.zs = state << 4; -} - -static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) -{ - return zone->d.zslba + ns->zone_size; -} - -static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) -{ - return zone->d.zslba + zone->d.zcap; -} - -static inline bool nvme_wp_is_valid(NvmeZone *zone) -{ - uint8_t st = nvme_get_zone_state(zone); - - return st != NVME_ZONE_STATE_FULL && - st != NVME_ZONE_STATE_READ_ONLY && - st != NVME_ZONE_STATE_OFFLINE; -} - -static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, - uint32_t zone_idx) -{ - return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; -} - -static inline void nvme_aor_inc_open(NvmeNamespace *ns) -{ - assert(ns->nr_open_zones >= 0); - if (ns->params.max_open_zones) { - ns->nr_open_zones++; - assert(ns->nr_open_zones <= ns->params.max_open_zones); - } -} - -static inline void nvme_aor_dec_open(NvmeNamespace *ns) -{ - if (ns->params.max_open_zones) { - assert(ns->nr_open_zones > 0); - ns->nr_open_zones--; - } - assert(ns->nr_open_zones >= 0); -} - -static inline void nvme_aor_inc_active(NvmeNamespace *ns) -{ - assert(ns->nr_active_zones >= 0); - if (ns->params.max_active_zones) { - ns->nr_active_zones++; - assert(ns->nr_active_zones <= ns->params.max_active_zones); - } -} - -static inline void nvme_aor_dec_active(NvmeNamespace *ns) -{ - if (ns->params.max_active_zones) { - assert(ns->nr_active_zones > 0); - ns->nr_active_zones--; - assert(ns->nr_active_zones >= ns->nr_open_zones); - } - assert(ns->nr_active_zones >= 0); -} - -void nvme_ns_init_format(NvmeNamespace *ns); -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); -void nvme_ns_drain(NvmeNamespace *ns); -void nvme_ns_shutdown(NvmeNamespace *ns); -void nvme_ns_cleanup(NvmeNamespace *ns); - -#endif /* NVME_NS_H */ diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h deleted file mode 100644 index 7d7ef5f7f1..0000000000 --- a/hw/block/nvme-subsys.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * QEMU NVM Express Subsystem: nvme-subsys - * - * Copyright (c) 2021 Minwoo Im - * - * This code is licensed under the GNU GPL v2. Refer COPYING. - */ - -#ifndef NVME_SUBSYS_H -#define NVME_SUBSYS_H - -#define TYPE_NVME_SUBSYS "nvme-subsys" -#define NVME_SUBSYS(obj) \ - OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) - -#define NVME_SUBSYS_MAX_CTRLS 32 -#define NVME_MAX_NAMESPACES 256 - -typedef struct NvmeCtrl NvmeCtrl; -typedef struct NvmeNamespace NvmeNamespace; -typedef struct NvmeSubsystem { - DeviceState parent_obj; - uint8_t subnqn[256]; - - NvmeCtrl *ctrls[NVME_SUBSYS_MAX_CTRLS]; - /* Allocated namespaces for this subsystem */ - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; - - struct { - char *nqn; - } params; -} NvmeSubsystem; - -int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); - -static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, - uint32_t cntlid) -{ - if (!subsys || cntlid >= NVME_SUBSYS_MAX_CTRLS) { - return NULL; - } - - return subsys->ctrls[cntlid]; -} - -/* - * Return allocated namespace of the specified nsid in the subsystem. - */ -static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, - uint32_t nsid) -{ - if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { - return NULL; - } - - return subsys->namespaces[nsid]; -} - -#endif /* NVME_SUBSYS_H */ diff --git a/hw/block/nvme.h b/hw/block/nvme.h deleted file mode 100644 index 5d05ec368f..0000000000 --- a/hw/block/nvme.h +++ /dev/null @@ -1,266 +0,0 @@ -#ifndef HW_NVME_H -#define HW_NVME_H - -#include "block/nvme.h" -#include "hw/pci/pci.h" -#include "nvme-subsys.h" -#include "nvme-ns.h" - -#define NVME_DEFAULT_ZONE_SIZE (128 * MiB) -#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) - -typedef struct NvmeParams { - char *serial; - uint32_t num_queues; /* deprecated since 5.1 */ - uint32_t max_ioqpairs; - uint16_t msix_qsize; - uint32_t cmb_size_mb; - uint8_t aerl; - uint32_t aer_max_queued; - uint8_t mdts; - uint8_t vsl; - bool use_intel_id; - uint8_t zasl; - bool legacy_cmb; -} NvmeParams; - -typedef struct NvmeAsyncEvent { - QTAILQ_ENTRY(NvmeAsyncEvent) entry; - NvmeAerResult result; -} NvmeAsyncEvent; - -enum { - NVME_SG_ALLOC = 1 << 0, - NVME_SG_DMA = 1 << 1, -}; - -typedef struct NvmeSg { - int flags; - - union { - QEMUSGList qsg; - QEMUIOVector iov; - }; -} NvmeSg; - -typedef struct NvmeRequest { - struct NvmeSQueue *sq; - struct NvmeNamespace *ns; - BlockAIOCB *aiocb; - uint16_t status; - void *opaque; - NvmeCqe cqe; - NvmeCmd cmd; - BlockAcctCookie acct; - NvmeSg sg; - QTAILQ_ENTRY(NvmeRequest)entry; -} NvmeRequest; - -typedef struct NvmeBounceContext { - NvmeRequest *req; - - struct { - QEMUIOVector iov; - uint8_t *bounce; - } data, mdata; -} NvmeBounceContext; - -static inline const char *nvme_adm_opc_str(uint8_t opc) -{ - switch (opc) { - case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; - case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; - case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; - case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; - case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; - case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; - case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; - case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; - case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; - case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; - case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; - case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; - default: return "NVME_ADM_CMD_UNKNOWN"; - } -} - -static inline const char *nvme_io_opc_str(uint8_t opc) -{ - switch (opc) { - case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; - case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; - case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; - case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; - case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; - case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; - case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; - case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; - case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; - case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; - case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; - default: return "NVME_NVM_CMD_UNKNOWN"; - } -} - -typedef struct NvmeSQueue { - struct NvmeCtrl *ctrl; - uint16_t sqid; - uint16_t cqid; - uint32_t head; - uint32_t tail; - uint32_t size; - uint64_t dma_addr; - QEMUTimer *timer; - NvmeRequest *io_req; - QTAILQ_HEAD(, NvmeRequest) req_list; - QTAILQ_HEAD(, NvmeRequest) out_req_list; - QTAILQ_ENTRY(NvmeSQueue) entry; -} NvmeSQueue; - -typedef struct NvmeCQueue { - struct NvmeCtrl *ctrl; - uint8_t phase; - uint16_t cqid; - uint16_t irq_enabled; - uint32_t head; - uint32_t tail; - uint32_t vector; - uint32_t size; - uint64_t dma_addr; - QEMUTimer *timer; - QTAILQ_HEAD(, NvmeSQueue) sq_list; - QTAILQ_HEAD(, NvmeRequest) req_list; -} NvmeCQueue; - -#define TYPE_NVME_BUS "nvme-bus" -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) - -typedef struct NvmeBus { - BusState parent_bus; -} NvmeBus; - -#define TYPE_NVME "nvme" -#define NVME(obj) \ - OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) - -typedef struct NvmeFeatureVal { - struct { - uint16_t temp_thresh_hi; - uint16_t temp_thresh_low; - }; - uint32_t async_config; -} NvmeFeatureVal; - -typedef struct NvmeCtrl { - PCIDevice parent_obj; - MemoryRegion bar0; - MemoryRegion iomem; - NvmeBar bar; - NvmeParams params; - NvmeBus bus; - - uint16_t cntlid; - bool qs_created; - uint32_t page_size; - uint16_t page_bits; - uint16_t max_prp_ents; - uint16_t cqe_size; - uint16_t sqe_size; - uint32_t reg_size; - uint32_t num_namespaces; - uint32_t max_q_ents; - uint8_t outstanding_aers; - uint32_t irq_status; - uint64_t host_timestamp; /* Timestamp sent by the host */ - uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - uint64_t starttime_ms; - uint16_t temperature; - uint8_t smart_critical_warning; - - struct { - MemoryRegion mem; - uint8_t *buf; - bool cmse; - hwaddr cba; - } cmb; - - struct { - HostMemoryBackend *dev; - bool cmse; - hwaddr cba; - } pmr; - - uint8_t aer_mask; - NvmeRequest **aer_reqs; - QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; - int aer_queued; - - uint32_t dmrsl; - - /* Namespace ID is started with 1 so bitmap should be 1-based */ -#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) - DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); - - NvmeSubsystem *subsys; - - NvmeNamespace namespace; - /* - * Attached namespaces to this controller. If subsys is not given, all - * namespaces in this list will always be attached. - */ - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES]; - NvmeSQueue **sq; - NvmeCQueue **cq; - NvmeSQueue admin_sq; - NvmeCQueue admin_cq; - NvmeIdCtrl id_ctrl; - NvmeFeatureVal features; -} NvmeCtrl; - -static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) -{ - if (!nsid || nsid > n->num_namespaces) { - return NULL; - } - - return n->namespaces[nsid - 1]; -} - -static inline NvmeCQueue *nvme_cq(NvmeRequest *req) -{ - NvmeSQueue *sq = req->sq; - NvmeCtrl *n = sq->ctrl; - - return n->cq[sq->cqid]; -} - -static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) -{ - NvmeSQueue *sq = req->sq; - return sq->ctrl; -} - -static inline uint16_t nvme_cid(NvmeRequest *req) -{ - if (!req) { - return 0xffff; - } - - return le16_to_cpu(req->cqe.cid); -} - -typedef enum NvmeTxDirection { - NVME_TX_DIRECTION_TO_DEVICE = 0, - NVME_TX_DIRECTION_FROM_DEVICE = 1, -} NvmeTxDirection; - -void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); -uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req); -uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req); -void nvme_rw_complete_cb(void *opaque, int ret); -uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd); - -#endif /* HW_NVME_H */ diff --git a/hw/block/trace-events b/hw/block/trace-events index fa12e3a67a..646917d045 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -49,212 +49,6 @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" -# nvme.c -# nvme traces for successful events -pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" -pci_nvme_irq_pin(void) "pulsing IRQ pin" -pci_nvme_irq_masked(void) "IRQ is masked" -pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" -pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" -pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" -pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" -pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" -pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" -pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" -pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" -pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" -pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" -pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" -pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" -pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" -pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" -pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" -pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" -pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" -pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" -pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" -pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" -pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" -pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" -pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" -pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" -pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" -pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8"" -pci_nvme_identify_ctrl(void) "identify controller" -pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" -pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" -pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" -pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" -pci_nvme_identify_cmd_set(void) "identify i/o command set" -pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" -pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32"" -pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32"" -pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" -pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" -pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" -pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" -pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" -pci_nvme_process_aers(int queued) "queued %d" -pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" -pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" -pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" -pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" -pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" -pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" -pci_nvme_enqueue_event_noqueue(int queued) "queued %d" -pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" -pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" -pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" -pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" -pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" -pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" -pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" -pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" -pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" -pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" -pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" -pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" -pci_nvme_mmio_stopped(void) "cleared controller enable bit" -pci_nvme_mmio_shutdown_set(void) "shutdown bit set" -pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" -pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" -pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" -pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" -pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" - -# nvme traces for error conditions -pci_nvme_err_mdts(size_t len) "len %zu" -pci_nvme_err_zasl(size_t len) "len %zu" -pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8"" -pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" -pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" -pci_nvme_err_cfs(void) "controller fatal status" -pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16"" -pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8"" -pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" -pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" -pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32"" -pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" -pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64"" -pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" -pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" -pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" -pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" -pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64"" -pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" -pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" -pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64"" -pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32"" -pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64"" -pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64"" -pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64"" -pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64"" -pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" -pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" -pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded" -pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded" -pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32"" -pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32"" -pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" -pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" -pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" -pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" -pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" -pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" -pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" -pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" -pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" -pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" -pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" -pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" -pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" -pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" -pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" -pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" -pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" -pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" -pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" -pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" -pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u" -pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" -pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" -pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" -pci_nvme_err_startfail(void) "setting controller enable bit failed" -pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" - -# Traces for undefined behavior -pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" -pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" -pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" -pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" -pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" -pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" -pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" -pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" -pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" -pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" -pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" -pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" -pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" -pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" -pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" -pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" -pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" -pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" -pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" -pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" -pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" -pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" - # xen-block.c xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" xen_block_connect(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" diff --git a/hw/meson.build b/hw/meson.build index 6bdbae0e81..ba0601e36e 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -21,6 +21,7 @@ subdir('mem') subdir('misc') subdir('net') subdir('nubus') +subdir('nvme') subdir('nvram') subdir('pci') subdir('pci-bridge') diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig new file mode 100644 index 0000000000..8ac90942e5 --- /dev/null +++ b/hw/nvme/Kconfig @@ -0,0 +1,4 @@ +config NVME_PCI + bool + default y if PCI_DEVICES + depends on PCI diff --git a/hw/block/nvme.c b/hw/nvme/ctrl.c similarity index 96% rename from hw/block/nvme.c rename to hw/nvme/ctrl.c index 5fe082ec34..0bcaf7192f 100644 --- a/hw/block/nvme.c +++ b/hw/nvme/ctrl.c @@ -12,10 +12,19 @@ * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e * * https://nvmexpress.org/developers/nvme-specification/ - */ - -/** - * Usage: add options: + * + * + * Notes on coding style + * --------------------- + * While QEMU coding style prefers lowercase hexadecimals in constants, the + * NVMe subsystem use thes format from the NVMe specifications in the comments + * (i.e. 'h' suffix instead of '0x' prefix). + * + * Usage + * ----- + * See docs/system/nvme.rst for extensive documentation. + * + * Add options: * -drive file=,if=none,id= * -device nvme-subsys,id=,nqn= * -device nvme,serial=,id=, \ @@ -135,26 +144,20 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" -#include "hw/block/block.h" -#include "hw/pci/msix.h" -#include "hw/pci/pci.h" -#include "hw/qdev-properties.h" -#include "migration/vmstate.h" -#include "sysemu/sysemu.h" +#include "qemu/log.h" +#include "qemu/units.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "sysemu/hostmem.h" +#include "sysemu/sysemu.h" #include "sysemu/block-backend.h" -#include "exec/memory.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/cutils.h" -#include "trace.h" +#include "sysemu/hostmem.h" +#include "hw/pci/msix.h" +#include "migration/vmstate.h" + #include "nvme.h" -#include "nvme-ns.h" -#include "nvme-dif.h" +#include "trace.h" #define NVME_MAX_IOQPAIRS 0xffff #define NVME_DB_SIZE 4 @@ -165,6 +168,7 @@ #define NVME_TEMPERATURE_WARNING 0x157 #define NVME_TEMPERATURE_CRITICAL 0x175 #define NVME_NUM_FW_SLOTS 1 +#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -185,6 +189,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_WRITE_ATOMICITY] = true, [NVME_ASYNCHRONOUS_EVENT_CONF] = true, [NVME_TIMESTAMP] = true, + [NVME_COMMAND_SET_PROFILE] = true, }; static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { @@ -194,6 +199,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, + [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, }; static const uint32_t nvme_cse_acs[256] = { @@ -387,7 +393,8 @@ static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) { - return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces); + return nsid && + (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES); } static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) @@ -511,9 +518,7 @@ static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, NvmeSg *mdata) { NvmeSg *dst = data; - size_t size = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - uint32_t trans_len, count = size; + uint32_t trans_len, count = ns->lbasz; uint64_t offset = 0; bool dma = sg->flags & NVME_SG_DMA; size_t sge_len; @@ -545,7 +550,7 @@ static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, if (count == 0) { dst = (dst == data) ? mdata : data; - count = (dst == data) ? size : msize; + count = (dst == data) ? ns->lbasz : ns->lbaf.ms; } if (sge_len == offset) { @@ -574,7 +579,7 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, } static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, - size_t len) + size_t len) { if (!len) { return NVME_SUCCESS; @@ -1004,7 +1009,7 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) uint16_t status; if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { + (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) { goto out; } @@ -1187,12 +1192,9 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint16_t ctrl = le16_to_cpu(rw->control); if (nvme_ns_ext(ns) && - !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - - return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0, - dir); + !(ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) { + return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz, + ns->lbaf.ms, 0, dir); } return nvme_tx(n, &req->sg, ptr, len, dir); @@ -1205,11 +1207,8 @@ uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint16_t status; if (nvme_ns_ext(ns)) { - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - - return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize, - dir); + return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms, + ns->lbasz, ns->lbasz, dir); } nvme_sg_unmap(&req->sg); @@ -1426,6 +1425,7 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba, uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -1682,8 +1682,12 @@ static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) } } -static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, - bool implicit) +enum { + NVME_ZRM_AUTO = 1 << 0, +}; + +static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone, + int flags) { int act = 0; uint16_t status; @@ -1707,7 +1711,7 @@ static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, nvme_aor_inc_open(ns); - if (implicit) { + if (flags & NVME_ZRM_AUTO) { nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN); return NVME_SUCCESS; } @@ -1715,7 +1719,7 @@ static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, /* fallthrough */ case NVME_ZONE_STATE_IMPLICITLY_OPEN: - if (implicit) { + if (flags & NVME_ZRM_AUTO) { return NVME_SUCCESS; } @@ -1733,16 +1737,16 @@ static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone) { - return __nvme_zrm_open(ns, zone, true); + return nvme_zrm_open_flags(ns, zone, NVME_ZRM_AUTO); } static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone) { - return __nvme_zrm_open(ns, zone, false); + return nvme_zrm_open_flags(ns, zone, 0); } -static void __nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, - uint32_t nlb) +static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, + uint32_t nlb) { zone->d.wp += nlb; @@ -1762,7 +1766,7 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) nlb = le16_to_cpu(rw->nlb) + 1; zone = nvme_get_zone_by_slba(ns, slba); - __nvme_advance_zone_wp(ns, zone, nlb); + nvme_advance_zone_wp(ns, zone, nlb); } static inline bool nvme_is_write(NvmeRequest *req) @@ -1832,11 +1836,11 @@ static void nvme_rw_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) { size_t mlen = nvme_m2b(ns, nlb); @@ -2002,7 +2006,7 @@ static void nvme_verify_mdata_in_cb(void *opaque, int ret) uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk)); @@ -2104,8 +2108,8 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { - int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba); + if (ns->lbaf.ms) { + int64_t offset = nvme_moff(ns, zone->d.zslba); blk_aio_pwrite_zeroes(ns->blkconf.blk, offset, nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, @@ -2151,7 +2155,7 @@ out: uint64_t sdlba = le64_to_cpu(copy->sdlba); NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba); - __nvme_advance_zone_wp(ns, zone, ctx->nlb); + nvme_advance_zone_wp(ns, zone, ctx->nlb); } g_free(ctx->bounce); @@ -2173,10 +2177,10 @@ static void nvme_copy_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; uint64_t sdlba = le64_to_cpu(copy->sdlba); - int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba); + int64_t offset = nvme_moff(ns, sdlba); qemu_iovec_reset(&req->sg.iov); qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb)); @@ -2268,7 +2272,6 @@ static void nvme_copy_in_complete(NvmeRequest *req) status = nvme_check_bounds(ns, sdlba, ctx->nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze); goto invalid; } @@ -2369,10 +2372,19 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint32_t reftag = le32_to_cpu(rw->reftag); struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); uint16_t status = NVME_SUCCESS; trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + buf = g_malloc(ctx->mdata.iov.size); status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size, @@ -2387,7 +2399,6 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint8_t *bufp; uint8_t *mbufp = ctx->mdata.bounce; uint8_t *end = mbufp + ctx->mdata.iov.size; - size_t msize = nvme_msize(ns); int16_t pil = 0; status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, @@ -2403,11 +2414,11 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) * tuple. */ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) { - if (memcmp(bufp + pil, mbufp + pil, msize - pil)) { + for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) { + if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) { req->status = NVME_CMP_FAILURE; goto out; } @@ -2421,6 +2432,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) goto out; } + block_acct_done(stats, acct); + out: qemu_iovec_destroy(&ctx->data.iov); g_free(ctx->data.bounce); @@ -2468,12 +2481,12 @@ static void nvme_compare_data_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); ctx->mdata.bounce = g_malloc(mlen); @@ -2530,8 +2543,6 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) uint32_t nlb = le32_to_cpu(range[i].nlb); if (nvme_check_bounds(ns, slba, nlb)) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, - ns->id_ns.nsze); continue; } @@ -2604,7 +2615,6 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return status; } @@ -2689,7 +2699,6 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, _nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze); goto out; } @@ -2716,7 +2725,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } bounce = bouncep = g_malloc(nvme_l2b(ns, nlb)); - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb)); } @@ -2752,9 +2761,9 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) bouncep += len; - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { len = nvme_m2b(ns, nlb); - offset = ns->mdata_offset + nvme_m2b(ns, slba); + offset = nvme_moff(ns, slba); in_ctx = g_new(struct nvme_copy_in_ctx, 1); in_ctx->req = req; @@ -2818,7 +2827,6 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return status; } @@ -2875,7 +2883,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) /* 1-initialize; see comment in nvme_dsm */ *num_flushes = 1; - for (int i = 1; i <= n->num_namespaces; i++) { + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -2923,7 +2931,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = ctrl & NVME_RW_PRINFO_PRACT; - if (pract && nvme_msize(ns) == 8) { + if (pract && ns->lbaf.ms == 8) { mapped_size = data_size; } } @@ -2938,7 +2946,6 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); goto invalid; } @@ -3000,7 +3007,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = ctrl & NVME_RW_PRINFO_PRACT; - if (pract && nvme_msize(ns) == 8) { + if (pract && ns->lbaf.ms == 8) { mapped_size -= nvme_m2b(ns, nlb); } } @@ -3018,7 +3025,6 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); goto invalid; } @@ -3595,8 +3601,8 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { + NvmeNamespace *ns; uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uint16_t status; trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); @@ -3607,18 +3613,18 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) /* * In the base NVM command set, Flush may apply to all namespaces - * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used + * (indicated by NSID being set to FFFFFFFFh). But if that feature is used * along with TP 4056 (Namespace Types), it may be pretty screwed up. * - * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the + * If NSID is indeed set to FFFFFFFFh, we simply cannot associate the * opcode with a specific command since we cannot determine a unique I/O - * command set. Opcode 0x0 could have any other meaning than something + * command set. Opcode 0h could have any other meaning than something * equivalent to flushing and say it DOES have completely different - * semantics in some other command set - does an NSID of 0xFFFFFFFF then + * semantics in some other command set - does an NSID of FFFFFFFFh then * mean "for all namespaces, apply whatever command set specific command - * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply - * whatever command that uses the 0x0 opcode if, and only if, it allows - * NSID to be 0xFFFFFFFF"? + * that uses the 0h opcode?" Or does it mean "for all namespaces, apply + * whatever command that uses the 0h opcode if, and only if, it allows NSID + * to be FFFFFFFFh"? * * Anyway (and luckily), for now, we do not care about this since the * device only supports namespace types that includes the NVM Flush command @@ -3628,21 +3634,22 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_flush(n, req); } - req->ns = nvme_ns(n, nsid); - if (unlikely(!req->ns)) { + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { return NVME_INVALID_FIELD | NVME_DNR; } - if (!(req->ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { trace_pci_nvme_err_invalid_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } - status = nvme_ns_status(req->ns); - if (unlikely(status)) { - return status; + if (ns->status) { + return ns->status; } + req->ns = ns; + switch (req->cmd.opcode) { case NVME_CMD_WRITE_ZEROES: return nvme_write_zeroes(n, req); @@ -3844,7 +3851,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, } else { int i; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -3934,7 +3941,7 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, NVME_CHANGED_NSID_SIZE) { /* * If more than 1024 namespaces, the first entry in the log page should - * be set to 0xffffffff and the others to 0 as spec. + * be set to FFFFFFFFh and the others to 0 as spec. */ if (i == ARRAY_SIZE(nslist)) { memset(nslist, 0x0, sizeof(nslist)); @@ -4332,7 +4339,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist(min_nsid); /* - * Both 0xffffffff (NVME_NSID_BROADCAST) and 0xfffffffe are invalid values + * Both FFFFFFFFh (NVME_NSID_BROADCAST) and FFFFFFFFEh are invalid values * since the Active Namespace ID List should return namespaces with ids * *higher* than the NSID specified in the command. This is also specified * in the spec (NVM Express v1.3d, Section 5.15.4). @@ -4341,7 +4348,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, return NVME_INVALID_NSID | NVME_DNR; } - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { if (!active) { @@ -4379,7 +4386,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi); /* - * Same as in nvme_identify_nslist(), 0xffffffff/0xfffffffe are invalid. + * Same as in nvme_identify_nslist(), FFFFFFFFh/FFFFFFFFEh are invalid. */ if (min_nsid >= NVME_NSID_BROADCAST - 1) { return NVME_INVALID_NSID | NVME_DNR; @@ -4389,7 +4396,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, return NVME_INVALID_FIELD | NVME_DNR; } - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { if (!active) { @@ -4446,7 +4453,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) /* * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data - * structure, a Namespace UUID (nidt = 0x3) must be reported in the + * structure, a Namespace UUID (nidt = 3h) must be reported in the * Namespace Identification Descriptor. Add the namespace UUID here. */ ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID; @@ -4595,7 +4602,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) /* * The Reservation Notification Mask and Reservation Persistence * features require a status code of Invalid Field in Command when - * NSID is 0xFFFFFFFF. Since the device does not support those + * NSID is FFFFFFFFh. Since the device does not support those * features we can always return Invalid Namespace or Format as we * should do for all other features. */ @@ -4655,7 +4662,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) goto out; case NVME_VOLATILE_WRITE_CACHE: result = 0; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -4707,9 +4714,6 @@ defaults: result |= NVME_INTVC_NOCOALESCING; } break; - case NVME_COMMAND_SET_PROFILE: - result = 0; - break; default: result = nvme_feature_default[fid]; break; @@ -4805,7 +4809,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) break; case NVME_ERROR_RECOVERY: if (nsid == NVME_NSID_BROADCAST) { - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { @@ -4826,7 +4830,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) } break; case NVME_VOLATILE_WRITE_CACHE: - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -4847,15 +4851,15 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) } /* - * NVMe v1.3, Section 5.21.1.7: 0xffff is not an allowed value for NCQR + * NVMe v1.3, Section 5.21.1.7: FFFFh is not an allowed value for NCQR * and NSQR. */ if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { return NVME_INVALID_FIELD | NVME_DNR; } - trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, - ((dw11 >> 16) & 0xFFFF) + 1, + trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1, + ((dw11 >> 16) & 0xffff) + 1, n->params.max_ioqpairs, n->params.max_ioqpairs); req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | @@ -4912,7 +4916,25 @@ static void nvme_update_dmrsl(NvmeCtrl *n) } } -static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns); +static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + ns->iocs = nvme_cse_iocs_none; + switch (ns->csi) { + case NVME_CSI_NVM: + if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { + ns->iocs = nvme_cse_iocs_nvm; + } + break; + case NVME_CSI_ZONED: + if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { + ns->iocs = nvme_cse_iocs_zoned; + } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { + ns->iocs = nvme_cse_iocs_nvm; + } + break; + } +} + static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4963,13 +4985,13 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) } nvme_attach_ns(ctrl, ns); - __nvme_select_ns_iocs(ctrl, ns); + nvme_select_iocs_ns(ctrl, ns); } else { if (!nvme_ns(ctrl, nsid)) { return NVME_NS_NOT_ATTACHED | NVME_DNR; } - ctrl->namespaces[nsid - 1] = NULL; + ctrl->namespaces[nsid] = NULL; ns->attached--; nvme_update_dmrsl(ctrl); @@ -5101,7 +5123,7 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) req->status = status; } } else { - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -5212,7 +5234,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n) NvmeNamespace *ns; int i; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -5254,7 +5276,7 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -5264,37 +5286,18 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) } } -static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns) -{ - ns->iocs = nvme_cse_iocs_none; - switch (ns->csi) { - case NVME_CSI_NVM: - if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; - case NVME_CSI_ZONED: - if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { - ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; - } -} - -static void nvme_select_ns_iocs(NvmeCtrl *n) +static void nvme_select_iocs(NvmeCtrl *n) { NvmeNamespace *ns; int i; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; } - __nvme_select_ns_iocs(n, ns); + nvme_select_iocs_ns(n, ns); } } @@ -5396,7 +5399,7 @@ static int nvme_start_ctrl(NvmeCtrl *n) QTAILQ_INIT(&n->aer_queue); - nvme_select_ns_iocs(n); + nvme_select_iocs(n); return 0; } @@ -5493,7 +5496,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->bar.cc = data; } break; - case 0x1C: /* CSTS */ + case 0x1c: /* CSTS */ if (data & (1 << 4)) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, "attempted to W1C CSTS.NSSRO" @@ -5505,7 +5508,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } break; case 0x20: /* NSSR */ - if (data == 0x4E564D65) { + if (data == 0x4e564d65) { trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { /* The spec says that writes of other values have no effect */ @@ -5575,11 +5578,11 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32); return; - case 0xE00: /* PMRCAP */ + case 0xe00: /* PMRCAP */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; - case 0xE04: /* PMRCTL */ + case 0xe04: /* PMRCTL */ n->bar.pmrctl = data; if (NVME_PMRCTL_EN(data)) { memory_region_set_enabled(&n->pmr.dev->mr, true); @@ -5590,19 +5593,19 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->pmr.cmse = false; } return; - case 0xE08: /* PMRSTS */ + case 0xe08: /* PMRSTS */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; - case 0xE0C: /* PMREBS */ + case 0xe0C: /* PMREBS */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; - case 0xE10: /* PMRSWTP */ + case 0xe10: /* PMRSWTP */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; - case 0xE14: /* PMRMSCL */ + case 0xe14: /* PMRMSCL */ if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5622,7 +5625,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; - case 0xE18: /* PMRMSCU */ + case 0xe18: /* PMRMSCU */ if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5664,7 +5667,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) * from PMRSTS should ensure prior writes * made it to persistent media */ - if (addr == 0xE08 && + if (addr == 0xe08 && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } @@ -5915,7 +5918,6 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) static void nvme_init_state(NvmeCtrl *n) { - n->num_namespaces = NVME_MAX_NAMESPACES; /* add one to max_ioqpairs to account for the admin queue pair */ n->reg_size = pow2ceil(sizeof(NvmeBar) + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); @@ -6096,7 +6098,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; - id->nn = cpu_to_le32(n->num_namespaces); + id->nn = cpu_to_le32(NVME_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | NVME_ONCS_FEATURES | NVME_ONCS_DSM | NVME_ONCS_COMPARE | NVME_ONCS_COPY); @@ -6161,7 +6163,7 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) uint32_t nsid = ns->params.nsid; assert(nsid && nsid <= NVME_MAX_NAMESPACES); - n->namespaces[nsid - 1] = ns; + n->namespaces[nsid] = ns; ns->attached++; n->dmrsl = MIN_NON_ZERO(n->dmrsl, @@ -6215,7 +6217,7 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n); - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; diff --git a/hw/block/nvme-dif.c b/hw/nvme/dif.c similarity index 90% rename from hw/block/nvme-dif.c rename to hw/nvme/dif.c index 81b0a4cb13..88efcbe9bd 100644 --- a/hw/block/nvme-dif.c +++ b/hw/nvme/dif.c @@ -9,13 +9,11 @@ */ #include "qemu/osdep.h" -#include "hw/block/block.h" -#include "sysemu/dma.h" -#include "sysemu/block-backend.h" #include "qapi/error.h" -#include "trace.h" +#include "sysemu/block-backend.h" + #include "nvme.h" -#include "nvme-dif.h" +#include "trace.h" uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, uint32_t reftag) @@ -46,20 +44,18 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint32_t reftag) { uint8_t *end = buf + len; - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); int16_t pil = 0; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag, - reftag); + trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, + apptag, reftag); - for (; buf < end; buf += lsize, mbuf += msize) { + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - uint16_t crc = crc_t10dif(0x0, buf, lsize); + uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { crc = crc_t10dif(crc, mbuf, pil); @@ -100,7 +96,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, } if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) { - uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns)); + uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { crc = crc_t10dif(crc, mbuf, pil); @@ -139,8 +135,6 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint16_t appmask, uint32_t reftag) { uint8_t *end = buf + len; - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); int16_t pil = 0; uint16_t status; @@ -150,12 +144,12 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, } if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil); + trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil); - for (; buf < end; buf += lsize, mbuf += msize) { + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, @@ -178,20 +172,18 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, BlockBackend *blk = ns->blkconf.blk; BlockDriverState *bs = blk_bs(blk); - size_t msize = nvme_msize(ns); - size_t lsize = nvme_lsize(ns); int64_t moffset = 0, offset = nvme_l2b(ns, slba); uint8_t *mbufp, *end; bool zeroed; int16_t pil = 0; - int64_t bytes = (mlen / msize) * lsize; + int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds; int64_t pnum = 0; Error *err = NULL; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } do { @@ -213,15 +205,15 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, if (zeroed) { mbufp = mbuf + moffset; - mlen = (pnum / lsize) * msize; + mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms; end = mbufp + mlen; - for (; mbufp < end; mbufp += msize) { + for (; mbufp < end; mbufp += ns->lbaf.ms) { memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); } } - moffset += (pnum / lsize) * msize; + moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms; offset += pnum; } while (pnum != bytes); @@ -291,7 +283,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) goto out; } - if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) { + if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) { goto out; } @@ -314,7 +306,7 @@ static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); @@ -343,7 +335,7 @@ static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); @@ -395,8 +387,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) if (pract) { uint8_t *mbuf, *end; - size_t msize = nvme_msize(ns); - int16_t pil = msize - sizeof(NvmeDifTuple); + int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); status = nvme_check_prinfo(ns, ctrl, slba, reftag); if (status) { @@ -417,7 +408,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) pil = 0; } - for (; mbuf < end; mbuf += msize) { + for (; mbuf < end; mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); dif->apptag = cpu_to_be16(apptag); @@ -436,7 +427,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } - if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) { + if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { mapped_len += mlen; } @@ -470,7 +461,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) qemu_iovec_init(&ctx->mdata.iov, 1); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - if (!(pract && nvme_msize(ns) == 8)) { + if (!(pract && ns->lbaf.ms == 8)) { status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build new file mode 100644 index 0000000000..3cf40046ee --- /dev/null +++ b/hw/nvme/meson.build @@ -0,0 +1 @@ +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) diff --git a/hw/block/nvme-ns.c b/hw/nvme/ns.c similarity index 87% rename from hw/block/nvme-ns.c rename to hw/nvme/ns.c index 7bb618f182..992e5a13f5 100644 --- a/hw/block/nvme-ns.c +++ b/hw/nvme/ns.c @@ -14,23 +14,16 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "qemu/cutils.h" -#include "qemu/log.h" #include "qemu/error-report.h" -#include "hw/block/block.h" -#include "hw/pci/pci.h" +#include "qapi/error.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" -#include "qapi/error.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-core.h" - -#include "trace.h" #include "nvme.h" -#include "nvme-ns.h" +#include "trace.h" #define MIN_DISCARD_GRANULARITY (4 * KiB) +#define NVME_DEFAULT_ZONE_SIZE (128 * MiB) void nvme_ns_init_format(NvmeNamespace *ns) { @@ -38,7 +31,10 @@ void nvme_ns_init_format(NvmeNamespace *ns) BlockDriverInfo bdi; int npdg, nlbas, ret; - nlbas = nvme_ns_nlbas(ns); + ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; + ns->lbasz = 1 << ns->lbaf.ds; + + nlbas = ns->size / (ns->lbasz + ns->lbaf.ms); id_ns->nsze = cpu_to_le64(nlbas); @@ -46,13 +42,13 @@ void nvme_ns_init_format(NvmeNamespace *ns) id_ns->ncap = id_ns->nsze; id_ns->nuse = id_ns->ncap; - ns->mdata_offset = nvme_l2b(ns, nlbas); + ns->moff = (int64_t)nlbas << ns->lbaf.ds; - npdg = ns->blkconf.discard_granularity / nvme_lsize(ns); + npdg = ns->blkconf.discard_granularity / ns->lbasz; ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { - npdg = bdi.cluster_size / nvme_lsize(ns); + npdg = bdi.cluster_size / ns->lbasz; } id_ns->npda = id_ns->npdg = npdg - 1; @@ -170,7 +166,6 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) { uint64_t zone_size, zone_cap; - uint32_t lbasz = nvme_lsize(ns); /* Make sure that the values of ZNS properties are sane */ if (ns->params.zone_size_bs) { @@ -188,14 +183,14 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) "zone size %"PRIu64"B", zone_cap, zone_size); return -1; } - if (zone_size < lbasz) { + if (zone_size < ns->lbasz) { error_setg(errp, "zone size %"PRIu64"B too small, " - "must be at least %"PRIu32"B", zone_size, lbasz); + "must be at least %zuB", zone_size, ns->lbasz); return -1; } - if (zone_cap < lbasz) { + if (zone_cap < ns->lbasz) { error_setg(errp, "zone capacity %"PRIu64"B too small, " - "must be at least %"PRIu32"B", zone_cap, lbasz); + "must be at least %zuB", zone_cap, ns->lbasz); return -1; } @@ -203,9 +198,9 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) * Save the main zone geometry values to avoid * calculating them later again. */ - ns->zone_size = zone_size / lbasz; - ns->zone_capacity = zone_cap / lbasz; - ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size; + ns->zone_size = zone_size / ns->lbasz; + ns->zone_capacity = zone_cap / ns->lbasz; + ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size; /* Do a few more sanity checks of ZNS properties */ if (!ns->num_zones) { @@ -215,43 +210,6 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) return -1; } - if (ns->params.max_open_zones > ns->num_zones) { - error_setg(errp, - "max_open_zones value %u exceeds the number of zones %u", - ns->params.max_open_zones, ns->num_zones); - return -1; - } - if (ns->params.max_active_zones > ns->num_zones) { - error_setg(errp, - "max_active_zones value %u exceeds the number of zones %u", - ns->params.max_active_zones, ns->num_zones); - return -1; - } - - if (ns->params.max_active_zones) { - if (ns->params.max_open_zones > ns->params.max_active_zones) { - error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)", - ns->params.max_open_zones, ns->params.max_active_zones); - return -1; - } - - if (!ns->params.max_open_zones) { - ns->params.max_open_zones = ns->params.max_active_zones; - } - } - - if (ns->params.zd_extension_size) { - if (ns->params.zd_extension_size & 0x3f) { - error_setg(errp, - "zone descriptor extension size must be a multiple of 64B"); - return -1; - } - if ((ns->params.zd_extension_size >> 6) > 0xff) { - error_setg(errp, "zone descriptor extension size is too large"); - return -1; - } - } - return 0; } @@ -303,7 +261,7 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); - /* MAR/MOR are zeroes-based, 0xffffffff means no limit */ + /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); id_ns_z->zoc = 0; @@ -421,6 +379,34 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, } } + if (ns->params.zoned) { + if (ns->params.max_active_zones) { + if (ns->params.max_open_zones > ns->params.max_active_zones) { + error_setg(errp, "max_open_zones (%u) exceeds " + "max_active_zones (%u)", ns->params.max_open_zones, + ns->params.max_active_zones); + return -1; + } + + if (!ns->params.max_open_zones) { + ns->params.max_open_zones = ns->params.max_active_zones; + } + } + + if (ns->params.zd_extension_size) { + if (ns->params.zd_extension_size & 0x3f) { + error_setg(errp, "zone descriptor extension size must be a " + "multiple of 64B"); + return -1; + } + if ((ns->params.zd_extension_size >> 6) > 0xff) { + error_setg(errp, + "zone descriptor extension size is too large"); + return -1; + } + } + } + return 0; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h new file mode 100644 index 0000000000..81a35cda14 --- /dev/null +++ b/hw/nvme/nvme.h @@ -0,0 +1,547 @@ +/* + * QEMU NVM Express + * + * Copyright (c) 2012 Intel Corporation + * Copyright (c) 2021 Minwoo Im + * Copyright (c) 2021 Samsung Electronics Co., Ltd. + * + * Authors: + * Keith Busch + * Klaus Jensen + * Gollu Appalanaidu + * Dmitry Fomichev + * Minwoo Im + * + * This code is licensed under the GNU GPL v2 or later. + */ + +#ifndef HW_NVME_INTERNAL_H +#define HW_NVME_INTERNAL_H + +#include "qemu/uuid.h" +#include "hw/pci/pci.h" +#include "hw/block/block.h" + +#include "block/nvme.h" + +#define NVME_MAX_CONTROLLERS 32 +#define NVME_MAX_NAMESPACES 256 + +typedef struct NvmeCtrl NvmeCtrl; +typedef struct NvmeNamespace NvmeNamespace; + +#define TYPE_NVME_SUBSYS "nvme-subsys" +#define NVME_SUBSYS(obj) \ + OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) + +typedef struct NvmeSubsystem { + DeviceState parent_obj; + uint8_t subnqn[256]; + + NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + + struct { + char *nqn; + } params; +} NvmeSubsystem; + +int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); + +static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, + uint32_t cntlid) +{ + if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { + return NULL; + } + + return subsys->ctrls[cntlid]; +} + +static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, + uint32_t nsid) +{ + if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { + return NULL; + } + + return subsys->namespaces[nsid]; +} + +#define TYPE_NVME_NS "nvme-ns" +#define NVME_NS(obj) \ + OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) + +typedef struct NvmeZone { + NvmeZoneDescr d; + uint64_t w_ptr; + QTAILQ_ENTRY(NvmeZone) entry; +} NvmeZone; + +typedef struct NvmeNamespaceParams { + bool detached; + bool shared; + uint32_t nsid; + QemuUUID uuid; + + uint16_t ms; + uint8_t mset; + uint8_t pi; + uint8_t pil; + + uint16_t mssrl; + uint32_t mcl; + uint8_t msrc; + + bool zoned; + bool cross_zone_read; + uint64_t zone_size_bs; + uint64_t zone_cap_bs; + uint32_t max_active_zones; + uint32_t max_open_zones; + uint32_t zd_extension_size; +} NvmeNamespaceParams; + +typedef struct NvmeNamespace { + DeviceState parent_obj; + BlockConf blkconf; + int32_t bootindex; + int64_t size; + int64_t moff; + NvmeIdNs id_ns; + NvmeLBAF lbaf; + size_t lbasz; + const uint32_t *iocs; + uint8_t csi; + uint16_t status; + int attached; + + QTAILQ_ENTRY(NvmeNamespace) entry; + + NvmeIdNsZoned *id_ns_zoned; + NvmeZone *zone_array; + QTAILQ_HEAD(, NvmeZone) exp_open_zones; + QTAILQ_HEAD(, NvmeZone) imp_open_zones; + QTAILQ_HEAD(, NvmeZone) closed_zones; + QTAILQ_HEAD(, NvmeZone) full_zones; + uint32_t num_zones; + uint64_t zone_size; + uint64_t zone_capacity; + uint32_t zone_size_log2; + uint8_t *zd_extensions; + int32_t nr_open_zones; + int32_t nr_active_zones; + + NvmeNamespaceParams params; + + struct { + uint32_t err_rec; + } features; +} NvmeNamespace; + +static inline uint32_t nvme_nsid(NvmeNamespace *ns) +{ + if (ns) { + return ns->params.nsid; + } + + return 0; +} + +static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) +{ + return lba << ns->lbaf.ds; +} + +static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) +{ + return ns->lbaf.ms * lba; +} + +static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) +{ + return ns->moff + nvme_m2b(ns, lba); +} + +static inline bool nvme_ns_ext(NvmeNamespace *ns) +{ + return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); +} + +static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) +{ + return zone->d.zs >> 4; +} + +static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) +{ + zone->d.zs = state << 4; +} + +static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) +{ + return zone->d.zslba + ns->zone_size; +} + +static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) +{ + return zone->d.zslba + zone->d.zcap; +} + +static inline bool nvme_wp_is_valid(NvmeZone *zone) +{ + uint8_t st = nvme_get_zone_state(zone); + + return st != NVME_ZONE_STATE_FULL && + st != NVME_ZONE_STATE_READ_ONLY && + st != NVME_ZONE_STATE_OFFLINE; +} + +static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, + uint32_t zone_idx) +{ + return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; +} + +static inline void nvme_aor_inc_open(NvmeNamespace *ns) +{ + assert(ns->nr_open_zones >= 0); + if (ns->params.max_open_zones) { + ns->nr_open_zones++; + assert(ns->nr_open_zones <= ns->params.max_open_zones); + } +} + +static inline void nvme_aor_dec_open(NvmeNamespace *ns) +{ + if (ns->params.max_open_zones) { + assert(ns->nr_open_zones > 0); + ns->nr_open_zones--; + } + assert(ns->nr_open_zones >= 0); +} + +static inline void nvme_aor_inc_active(NvmeNamespace *ns) +{ + assert(ns->nr_active_zones >= 0); + if (ns->params.max_active_zones) { + ns->nr_active_zones++; + assert(ns->nr_active_zones <= ns->params.max_active_zones); + } +} + +static inline void nvme_aor_dec_active(NvmeNamespace *ns) +{ + if (ns->params.max_active_zones) { + assert(ns->nr_active_zones > 0); + ns->nr_active_zones--; + assert(ns->nr_active_zones >= ns->nr_open_zones); + } + assert(ns->nr_active_zones >= 0); +} + +void nvme_ns_init_format(NvmeNamespace *ns); +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); +void nvme_ns_drain(NvmeNamespace *ns); +void nvme_ns_shutdown(NvmeNamespace *ns); +void nvme_ns_cleanup(NvmeNamespace *ns); + +typedef struct NvmeAsyncEvent { + QTAILQ_ENTRY(NvmeAsyncEvent) entry; + NvmeAerResult result; +} NvmeAsyncEvent; + +enum { + NVME_SG_ALLOC = 1 << 0, + NVME_SG_DMA = 1 << 1, +}; + +typedef struct NvmeSg { + int flags; + + union { + QEMUSGList qsg; + QEMUIOVector iov; + }; +} NvmeSg; + +typedef enum NvmeTxDirection { + NVME_TX_DIRECTION_TO_DEVICE = 0, + NVME_TX_DIRECTION_FROM_DEVICE = 1, +} NvmeTxDirection; + +typedef struct NvmeRequest { + struct NvmeSQueue *sq; + struct NvmeNamespace *ns; + BlockAIOCB *aiocb; + uint16_t status; + void *opaque; + NvmeCqe cqe; + NvmeCmd cmd; + BlockAcctCookie acct; + NvmeSg sg; + QTAILQ_ENTRY(NvmeRequest)entry; +} NvmeRequest; + +typedef struct NvmeBounceContext { + NvmeRequest *req; + + struct { + QEMUIOVector iov; + uint8_t *bounce; + } data, mdata; +} NvmeBounceContext; + +static inline const char *nvme_adm_opc_str(uint8_t opc) +{ + switch (opc) { + case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; + case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; + case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; + case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; + case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; + case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; + case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; + case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; + case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; + case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; + case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; + case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; + default: return "NVME_ADM_CMD_UNKNOWN"; + } +} + +static inline const char *nvme_io_opc_str(uint8_t opc) +{ + switch (opc) { + case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; + case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; + case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; + case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; + case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; + case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; + case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; + case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; + case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; + case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; + case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; + default: return "NVME_NVM_CMD_UNKNOWN"; + } +} + +typedef struct NvmeSQueue { + struct NvmeCtrl *ctrl; + uint16_t sqid; + uint16_t cqid; + uint32_t head; + uint32_t tail; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + NvmeRequest *io_req; + QTAILQ_HEAD(, NvmeRequest) req_list; + QTAILQ_HEAD(, NvmeRequest) out_req_list; + QTAILQ_ENTRY(NvmeSQueue) entry; +} NvmeSQueue; + +typedef struct NvmeCQueue { + struct NvmeCtrl *ctrl; + uint8_t phase; + uint16_t cqid; + uint16_t irq_enabled; + uint32_t head; + uint32_t tail; + uint32_t vector; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + QTAILQ_HEAD(, NvmeSQueue) sq_list; + QTAILQ_HEAD(, NvmeRequest) req_list; +} NvmeCQueue; + +#define TYPE_NVME_BUS "nvme-bus" +#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; + +#define TYPE_NVME "nvme" +#define NVME(obj) \ + OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) + +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; /* deprecated since 5.1 */ + uint32_t max_ioqpairs; + uint16_t msix_qsize; + uint32_t cmb_size_mb; + uint8_t aerl; + uint32_t aer_max_queued; + uint8_t mdts; + uint8_t vsl; + bool use_intel_id; + uint8_t zasl; + bool legacy_cmb; +} NvmeParams; + +typedef struct NvmeCtrl { + PCIDevice parent_obj; + MemoryRegion bar0; + MemoryRegion iomem; + NvmeBar bar; + NvmeParams params; + NvmeBus bus; + + uint16_t cntlid; + bool qs_created; + uint32_t page_size; + uint16_t page_bits; + uint16_t max_prp_ents; + uint16_t cqe_size; + uint16_t sqe_size; + uint32_t reg_size; + uint32_t max_q_ents; + uint8_t outstanding_aers; + uint32_t irq_status; + uint64_t host_timestamp; /* Timestamp sent by the host */ + uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + uint64_t starttime_ms; + uint16_t temperature; + uint8_t smart_critical_warning; + + struct { + MemoryRegion mem; + uint8_t *buf; + bool cmse; + hwaddr cba; + } cmb; + + struct { + HostMemoryBackend *dev; + bool cmse; + hwaddr cba; + } pmr; + + uint8_t aer_mask; + NvmeRequest **aer_reqs; + QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; + int aer_queued; + + uint32_t dmrsl; + + /* Namespace ID is started with 1 so bitmap should be 1-based */ +#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) + DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); + + NvmeSubsystem *subsys; + + NvmeNamespace namespace; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeSQueue **sq; + NvmeCQueue **cq; + NvmeSQueue admin_sq; + NvmeCQueue admin_cq; + NvmeIdCtrl id_ctrl; + + struct { + struct { + uint16_t temp_thresh_hi; + uint16_t temp_thresh_low; + }; + uint32_t async_config; + } features; +} NvmeCtrl; + +static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) +{ + if (!nsid || nsid > NVME_MAX_NAMESPACES) { + return NULL; + } + + return n->namespaces[nsid]; +} + +static inline NvmeCQueue *nvme_cq(NvmeRequest *req) +{ + NvmeSQueue *sq = req->sq; + NvmeCtrl *n = sq->ctrl; + + return n->cq[sq->cqid]; +} + +static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) +{ + NvmeSQueue *sq = req->sq; + return sq->ctrl; +} + +static inline uint16_t nvme_cid(NvmeRequest *req) +{ + if (!req) { + return 0xffff; + } + + return le16_to_cpu(req->cqe.cid); +} + +void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +void nvme_rw_complete_cb(void *opaque, int ret); +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd); + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static const uint16_t t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, + uint32_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint32_t reftag); +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t ctrl, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint32_t reftag); +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); + + +#endif /* HW_NVME_INTERNAL_H */ diff --git a/hw/block/nvme-subsys.c b/hw/nvme/subsys.c similarity index 86% rename from hw/block/nvme-subsys.c rename to hw/nvme/subsys.c index 9604c19117..192223d17c 100644 --- a/hw/block/nvme-subsys.c +++ b/hw/nvme/subsys.c @@ -6,20 +6,10 @@ * This code is licensed under the GNU GPL v2. Refer COPYING. */ -#include "qemu/units.h" #include "qemu/osdep.h" -#include "qemu/uuid.h" -#include "qemu/iov.h" -#include "qemu/cutils.h" #include "qapi/error.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-core.h" -#include "hw/block/block.h" -#include "block/aio.h" -#include "block/accounting.h" -#include "hw/pci/pci.h" + #include "nvme.h" -#include "nvme-subsys.h" int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) { diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events new file mode 100644 index 0000000000..ea33d0ccc3 --- /dev/null +++ b/hw/nvme/trace-events @@ -0,0 +1,204 @@ +# successful events +pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" +pci_nvme_irq_pin(void) "pulsing IRQ pin" +pci_nvme_irq_masked(void) "IRQ is masked" +pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" +pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" +pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" +pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" +pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" +pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" +pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" +pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" +pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" +pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" +pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" +pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" +pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" +pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" +pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" +pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" +pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" +pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" +pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" +pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" +pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8"" +pci_nvme_identify_ctrl(void) "identify controller" +pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" +pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" +pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" +pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" +pci_nvme_identify_cmd_set(void) "identify i/o command set" +pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" +pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32"" +pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32"" +pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" +pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" +pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +pci_nvme_process_aers(int queued) "queued %d" +pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" +pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" +pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" +pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" +pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_enqueue_event_noqueue(int queued) "queued %d" +pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" +pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" +pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" +pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" +pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" +pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" +pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" +pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" +pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" +pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" +pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" +pci_nvme_mmio_stopped(void) "cleared controller enable bit" +pci_nvme_mmio_shutdown_set(void) "shutdown bit set" +pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" +pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" +pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" +pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" +pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" + +# error conditions +pci_nvme_err_mdts(size_t len) "len %zu" +pci_nvme_err_zasl(size_t len) "len %zu" +pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8"" +pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" +pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" +pci_nvme_err_cfs(void) "controller fatal status" +pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16"" +pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8"" +pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32"" +pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" +pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" +pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" +pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" +pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64"" +pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" +pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" +pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64"" +pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32"" +pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64"" +pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64"" +pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64"" +pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64"" +pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" +pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" +pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded" +pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded" +pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32"" +pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32"" +pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" +pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" +pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" +pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" +pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" +pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" +pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" +pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" +pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" +pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u" +pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" +pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" +pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" +pci_nvme_err_startfail(void) "setting controller enable bit failed" +pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" + +# undefined behavior +pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" +pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" +pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" +pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" +pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" +pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" +pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" +pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" +pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" diff --git a/hw/nvme/trace.h b/hw/nvme/trace.h new file mode 100644 index 0000000000..b398ea107f --- /dev/null +++ b/hw/nvme/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_nvme.h" diff --git a/include/block/nvme.h b/include/block/nvme.h index 4ac926fbc6..0ff9ce17a9 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -7,7 +7,7 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t intms; uint32_t intmc; uint32_t cc; - uint32_t rsvd1; + uint8_t rsvd24[4]; uint32_t csts; uint32_t nssrc; uint32_t aqa; @@ -848,8 +848,8 @@ enum NvmeStatusCodes { NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, NVME_NS_ALREADY_ATTACHED = 0x0118, NVME_NS_PRIVATE = 0x0119, - NVME_NS_NOT_ATTACHED = 0x011A, - NVME_NS_CTRL_LIST_INVALID = 0x011C, + NVME_NS_NOT_ATTACHED = 0x011a, + NVME_NS_CTRL_LIST_INVALID = 0x011c, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, @@ -1409,9 +1409,9 @@ typedef enum NvmeZoneState { NVME_ZONE_STATE_IMPLICITLY_OPEN = 0x02, NVME_ZONE_STATE_EXPLICITLY_OPEN = 0x03, NVME_ZONE_STATE_CLOSED = 0x04, - NVME_ZONE_STATE_READ_ONLY = 0x0D, - NVME_ZONE_STATE_FULL = 0x0E, - NVME_ZONE_STATE_OFFLINE = 0x0F, + NVME_ZONE_STATE_READ_ONLY = 0x0d, + NVME_ZONE_STATE_FULL = 0x0e, + NVME_ZONE_STATE_OFFLINE = 0x0f, } NvmeZoneState; static inline void _nvme_check_size(void) diff --git a/meson.build b/meson.build index 8e16e05c2a..1559e8d873 100644 --- a/meson.build +++ b/meson.build @@ -1822,6 +1822,7 @@ if have_system 'hw/misc/macio', 'hw/net', 'hw/net/can', + 'hw/nvme', 'hw/nvram', 'hw/pci', 'hw/pci-host',