Pull request

Userspace NVMe driver patches.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEhpWov9P5fNqsNXdanKSrs4Grc8gFAmE3H0UACgkQnKSrs4Gr
 c8johgf/R2gB82bGdgb4A3W1kQolwCGuOJ7IMq3vIuPUf9nps4G2QXM/0cAKmwW1
 B/72XA3VZGsRByaHEdhcGxOzSWblmdDnl5PTd31oAiIKYKiu7GI+UYkBQ/t0CMHq
 fVBb4utzFbtZu0fveF9t1ViWbQ3bB/W9S9UkeG9dVjGvTi9QqqyNf8e03FaW1alv
 rPSsr7Ks/cyTTTCttpuxkwv3dNAdYHHdglm9CR/9BkyznK+LOxEX2gn7YjORNZnb
 Od8xkPptdRkNGO8gJQoIwOdA4qotSW23Uy+57a7IIY0Zwi4G2YbBDRu8+yRP/6HU
 bf+xa1oM4i0ApOB1tbbq6JJtabp1Ww==
 =Jx3A
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stefanha-gitlab/tags/block-pull-request' into staging

Pull request

Userspace NVMe driver patches.

# gpg: Signature made Tue 07 Sep 2021 09:13:57 BST
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha-gitlab/tags/block-pull-request:
  block/nvme: Only report VFIO error on failed retry
  util/vfio-helpers: Let qemu_vfio_do_mapping() propagate Error
  util/vfio-helpers: Simplify qemu_vfio_dma_map() returning directly
  util/vfio-helpers: Use error_setg in qemu_vfio_find_[fixed/temp]_iova
  util/vfio-helpers: Extract qemu_vfio_water_mark_reached()
  util/vfio-helpers: Pass Error handle to qemu_vfio_dma_map()
  block/nvme: Have nvme_create_queue_pair() report errors consistently
  util/vfio-helpers: Remove unreachable code in qemu_vfio_dma_map()
  util/vfio-helpers: Replace qemu_mutex_lock() calls with QEMU_LOCK_GUARD
  util/vfio-helpers: Let qemu_vfio_verify_mappings() use error_report()
  block/nvme: Use safer trace format string

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-09-07 13:24:43 +01:00
commit f9128631fb
4 changed files with 76 additions and 56 deletions

View File

@ -176,12 +176,11 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
return false; return false;
} }
memset(q->queue, 0, bytes); memset(q->queue, 0, bytes);
r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova); r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova, errp);
if (r) { if (r) {
error_setg(errp, "Cannot map queue"); error_prepend(errp, "Cannot map queue: ");
return false;
} }
return true; return r == 0;
} }
static void nvme_free_queue_pair(NVMeQueuePair *q) static void nvme_free_queue_pair(NVMeQueuePair *q)
@ -220,6 +219,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
q = g_try_new0(NVMeQueuePair, 1); q = g_try_new0(NVMeQueuePair, 1);
if (!q) { if (!q) {
error_setg(errp, "Cannot allocate queue pair");
return NULL; return NULL;
} }
trace_nvme_create_queue_pair(idx, q, size, aio_context, trace_nvme_create_queue_pair(idx, q, size, aio_context,
@ -228,6 +228,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
qemu_real_host_page_size); qemu_real_host_page_size);
q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes); q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
if (!q->prp_list_pages) { if (!q->prp_list_pages) {
error_setg(errp, "Cannot allocate PRP page list");
goto fail; goto fail;
} }
memset(q->prp_list_pages, 0, bytes); memset(q->prp_list_pages, 0, bytes);
@ -237,8 +238,9 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
qemu_co_queue_init(&q->free_req_queue); qemu_co_queue_init(&q->free_req_queue);
q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q); q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes, r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
false, &prp_list_iova); false, &prp_list_iova, errp);
if (r) { if (r) {
error_prepend(errp, "Cannot map buffer for DMA: ");
goto fail; goto fail;
} }
q->free_req_head = -1; q->free_req_head = -1;
@ -531,9 +533,9 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
error_setg(errp, "Cannot allocate buffer for identify response"); error_setg(errp, "Cannot allocate buffer for identify response");
goto out; goto out;
} }
r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova); r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova, errp);
if (r) { if (r) {
error_setg(errp, "Cannot map buffer for DMA"); error_prepend(errp, "Cannot map buffer for DMA: ");
goto out; goto out;
} }
@ -1017,6 +1019,7 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
uint64_t *pagelist = req->prp_list_page; uint64_t *pagelist = req->prp_list_page;
int i, j, r; int i, j, r;
int entries = 0; int entries = 0;
Error *local_err = NULL, **errp = NULL;
assert(qiov->size); assert(qiov->size);
assert(QEMU_IS_ALIGNED(qiov->size, s->page_size)); assert(QEMU_IS_ALIGNED(qiov->size, s->page_size));
@ -1029,7 +1032,7 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
try_map: try_map:
r = qemu_vfio_dma_map(s->vfio, r = qemu_vfio_dma_map(s->vfio,
qiov->iov[i].iov_base, qiov->iov[i].iov_base,
len, true, &iova); len, true, &iova, errp);
if (r == -ENOSPC) { if (r == -ENOSPC) {
/* /*
* In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
@ -1064,6 +1067,8 @@ try_map:
goto fail; goto fail;
} }
} }
errp = &local_err;
goto try_map; goto try_map;
} }
if (r) { if (r) {
@ -1107,6 +1112,9 @@ fail:
* because they are already mapped before calling this function; for * because they are already mapped before calling this function; for
* temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by
* calling qemu_vfio_dma_reset_temporary when necessary. */ * calling qemu_vfio_dma_reset_temporary when necessary. */
if (local_err) {
error_reportf_err(local_err, "Cannot map buffer for DMA: ");
}
return r; return r;
} }
@ -1521,14 +1529,15 @@ static void nvme_aio_unplug(BlockDriverState *bs)
static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size) static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
{ {
int ret; int ret;
Error *local_err = NULL;
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL); ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
if (ret) { if (ret) {
/* FIXME: we may run out of IOVA addresses after repeated /* FIXME: we may run out of IOVA addresses after repeated
* bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
* doesn't reclaim addresses for fixed mappings. */ * doesn't reclaim addresses for fixed mappings. */
error_report("nvme_register_buf failed: %s", strerror(-ret)); error_reportf_err(local_err, "nvme_register_buf failed: ");
} }
} }

View File

@ -156,7 +156,7 @@ nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" byte
nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
nvme_dma_map_flush(void *s) "s %p" nvme_dma_map_flush(void *s) "s %p"
nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d" nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d"
nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p" nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64

View File

@ -18,7 +18,7 @@ typedef struct QEMUVFIOState QEMUVFIOState;
QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp); QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp);
void qemu_vfio_close(QEMUVFIOState *s); void qemu_vfio_close(QEMUVFIOState *s);
int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
bool temporary, uint64_t *iova_list); bool temporary, uint64_t *iova_list, Error **errp);
int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s); int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host); void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,

View File

@ -463,13 +463,15 @@ static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
size_t size, size_t max_size) size_t size, size_t max_size)
{ {
QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
Error *local_err = NULL;
int ret; int ret;
trace_qemu_vfio_ram_block_added(s, host, max_size); trace_qemu_vfio_ram_block_added(s, host, max_size);
ret = qemu_vfio_dma_map(s, host, max_size, false, NULL); ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, &local_err);
if (ret) { if (ret) {
error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, max_size, error_reportf_err(local_err,
strerror(-ret)); "qemu_vfio_dma_map(%p, %zu) failed: ",
host, max_size);
} }
} }
@ -608,7 +610,7 @@ static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
/* Do the DMA mapping with VFIO. */ /* Do the DMA mapping with VFIO. */
static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size, static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
uint64_t iova) uint64_t iova, Error **errp)
{ {
struct vfio_iommu_type1_dma_map dma_map = { struct vfio_iommu_type1_dma_map dma_map = {
.argsz = sizeof(dma_map), .argsz = sizeof(dma_map),
@ -620,7 +622,7 @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
trace_qemu_vfio_do_mapping(s, host, iova, size); trace_qemu_vfio_do_mapping(s, host, iova, size);
if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) { if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); error_setg_errno(errp, errno, "VFIO_MAP_DMA failed");
return -errno; return -errno;
} }
return 0; return 0;
@ -660,13 +662,13 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
if (QEMU_VFIO_DEBUG) { if (QEMU_VFIO_DEBUG) {
for (i = 0; i < s->nr_mappings - 1; ++i) { for (i = 0; i < s->nr_mappings - 1; ++i) {
if (!(s->mappings[i].host < s->mappings[i + 1].host)) { if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
fprintf(stderr, "item %d not sorted!\n", i); error_report("item %d not sorted!", i);
qemu_vfio_dump_mappings(s); qemu_vfio_dump_mappings(s);
return false; return false;
} }
if (!(s->mappings[i].host + s->mappings[i].size <= if (!(s->mappings[i].host + s->mappings[i].size <=
s->mappings[i + 1].host)) { s->mappings[i + 1].host)) {
fprintf(stderr, "item %d overlap with next!\n", i); error_report("item %d overlap with next!", i);
qemu_vfio_dump_mappings(s); qemu_vfio_dump_mappings(s);
return false; return false;
} }
@ -675,8 +677,8 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
return true; return true;
} }
static int static bool qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size,
qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova) uint64_t *iova, Error **errp)
{ {
int i; int i;
@ -691,14 +693,16 @@ qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) { s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
*iova = s->low_water_mark; *iova = s->low_water_mark;
s->low_water_mark += size; s->low_water_mark += size;
return 0; return true;
} }
} }
return -ENOMEM; error_setg(errp, "fixed iova range not found");
return false;
} }
static int static bool qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size,
qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova) uint64_t *iova, Error **errp)
{ {
int i; int i;
@ -713,10 +717,27 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) { s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
*iova = s->high_water_mark - size; *iova = s->high_water_mark - size;
s->high_water_mark = *iova; s->high_water_mark = *iova;
return 0; return true;
} }
} }
return -ENOMEM; error_setg(errp, "temporary iova range not found");
return false;
}
/**
* qemu_vfio_water_mark_reached:
*
* Returns %true if high watermark has been reached, %false otherwise.
*/
static bool qemu_vfio_water_mark_reached(QEMUVFIOState *s, size_t size,
Error **errp)
{
if (s->high_water_mark - s->low_water_mark + 1 < size) {
error_setg(errp, "iova exhausted (water mark reached)");
return true;
}
return false;
} }
/* Map [host, host + size) area into a contiguous IOVA address space, and store /* Map [host, host + size) area into a contiguous IOVA address space, and store
@ -725,9 +746,8 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
* mapping status within this area is not allowed). * mapping status within this area is not allowed).
*/ */
int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
bool temporary, uint64_t *iova) bool temporary, uint64_t *iova, Error **errp)
{ {
int ret = 0;
int index; int index;
IOVAMapping *mapping; IOVAMapping *mapping;
uint64_t iova0; uint64_t iova0;
@ -735,41 +755,36 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size)); assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size)); assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
trace_qemu_vfio_dma_map(s, host, size, temporary, iova); trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
qemu_mutex_lock(&s->lock); QEMU_LOCK_GUARD(&s->lock);
mapping = qemu_vfio_find_mapping(s, host, &index); mapping = qemu_vfio_find_mapping(s, host, &index);
if (mapping) { if (mapping) {
iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host); iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
} else { } else {
if (s->high_water_mark - s->low_water_mark + 1 < size) { int ret;
ret = -ENOMEM;
goto out; if (qemu_vfio_water_mark_reached(s, size, errp)) {
return -ENOMEM;
} }
if (!temporary) { if (!temporary) {
if (qemu_vfio_find_fixed_iova(s, size, &iova0)) { if (!qemu_vfio_find_fixed_iova(s, size, &iova0, errp)) {
ret = -ENOMEM; return -ENOMEM;
goto out;
} }
mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0); mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
if (!mapping) {
ret = -ENOMEM;
goto out;
}
assert(qemu_vfio_verify_mappings(s)); assert(qemu_vfio_verify_mappings(s));
ret = qemu_vfio_do_mapping(s, host, size, iova0); ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
if (ret) { if (ret < 0) {
qemu_vfio_undo_mapping(s, mapping, NULL); qemu_vfio_undo_mapping(s, mapping, NULL);
goto out; return ret;
} }
qemu_vfio_dump_mappings(s); qemu_vfio_dump_mappings(s);
} else { } else {
if (qemu_vfio_find_temp_iova(s, size, &iova0)) { if (!qemu_vfio_find_temp_iova(s, size, &iova0, errp)) {
ret = -ENOMEM; return -ENOMEM;
goto out;
} }
ret = qemu_vfio_do_mapping(s, host, size, iova0); ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
if (ret) { if (ret < 0) {
goto out; return ret;
} }
} }
} }
@ -777,9 +792,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
if (iova) { if (iova) {
*iova = iova0; *iova = iova0;
} }
out: return 0;
qemu_mutex_unlock(&s->lock);
return ret;
} }
/* Reset the high watermark and free all "temporary" mappings. */ /* Reset the high watermark and free all "temporary" mappings. */
@ -813,14 +826,12 @@ void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
} }
trace_qemu_vfio_dma_unmap(s, host); trace_qemu_vfio_dma_unmap(s, host);
qemu_mutex_lock(&s->lock); QEMU_LOCK_GUARD(&s->lock);
m = qemu_vfio_find_mapping(s, host, &index); m = qemu_vfio_find_mapping(s, host, &index);
if (!m) { if (!m) {
goto out; return;
} }
qemu_vfio_undo_mapping(s, m, NULL); qemu_vfio_undo_mapping(s, m, NULL);
out:
qemu_mutex_unlock(&s->lock);
} }
static void qemu_vfio_reset(QEMUVFIOState *s) static void qemu_vfio_reset(QEMUVFIOState *s)