diff --git a/Makefile.objs b/Makefile.objs index b39d76cbb6..5f0b3f7136 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -17,8 +17,12 @@ coroutine-obj-y += qemu-coroutine-sleep.o ifeq ($(CONFIG_UCONTEXT_COROUTINE),y) coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o else +ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y) +coroutine-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o +else coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o endif +endif coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ####################################################################### diff --git a/block.c b/block.c index 52ffe1494a..b88ee90817 100644 --- a/block.c +++ b/block.c @@ -1244,13 +1244,17 @@ ro_cleanup: return ret; } -void bdrv_commit_all(void) +int bdrv_commit_all(void) { BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { - bdrv_commit(bs); + int ret = bdrv_commit(bs); + if (ret < 0) { + return ret; + } } + return 0; } struct BdrvTrackedRequest { diff --git a/block.h b/block.h index 48d0bf3592..415bb17c73 100644 --- a/block.h +++ b/block.h @@ -165,7 +165,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs); int bdrv_commit(BlockDriverState *bs); -void bdrv_commit_all(void); +int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 340a6f2b26..710d4b1828 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -25,6 +25,7 @@ #include "block_int.h" #include "qemu-common.h" #include "qcow2.h" +#include "trace.h" typedef struct Qcow2CachedTable { void* table; @@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) return 0; } + trace_qcow2_cache_entry_flush(qemu_coroutine_self(), + c == s->l2_table_cache, i); + if (c->depends) { ret = qcow2_cache_flush_dependency(bs, c); } else if (c->depends_on_flush) { @@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) { + BDRVQcowState *s = bs->opaque; int result = 0; int ret; int i; + trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); + for (i = 0; i < c->size; i++) { ret = qcow2_cache_entry_flush(bs, c, i); if (ret < 0 && result != -ENOSPC) { @@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, int i; int ret; + trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, + offset, read_from_disk); + /* Check if the table is already cached */ for (i = 0; i < c->size; i++) { if (c->entries[i].offset == offset) { @@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, /* If not, write a table back and replace it */ i = qcow2_cache_find_entry_to_replace(c); + trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), + c == s->l2_table_cache, i); if (i < 0) { return i; } @@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, return ret; } + trace_qcow2_cache_get_read(qemu_coroutine_self(), + c == s->l2_table_cache, i); c->entries[i].offset = 0; if (read_from_disk) { if (c == s->l2_table_cache) { @@ -258,6 +272,10 @@ found: c->entries[i].cache_hits++; c->entries[i].ref++; *table = c->entries[i].table; + + trace_qcow2_cache_get_done(qemu_coroutine_self(), + c == s->l2_table_cache, i); + return 0; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 07a2e936fd..e0fb90792f 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -27,6 +27,7 @@ #include "qemu-common.h" #include "block_int.h" #include "block/qcow2.h" +#include "trace.h" int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) { @@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) old_l2_offset = s->l1_table[l1_index]; + trace_qcow2_l2_allocate(bs, l1_index); + /* allocate a new l2 entry */ l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); @@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* allocate a new entry in the l2 cache */ + trace_qcow2_l2_allocate_get_empty(bs, l1_index); ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); if (ret < 0) { return ret; @@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* write the l2 table to the file */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); + trace_qcow2_l2_allocate_write_l2(bs, l1_index); qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { @@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } /* update the L1 entry */ + trace_qcow2_l2_allocate_write_l1(bs, l1_index); s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; ret = write_l1_entry(bs, l1_index); if (ret < 0) { @@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } *table = l2_table; + trace_qcow2_l2_allocate_done(bs, l1_index, 0); return 0; fail: + trace_qcow2_l2_allocate_done(bs, l1_index, ret); qcow2_cache_put(bs, s->l2_table_cache, (void**) table); s->l1_table[l1_index] = old_l2_offset; return ret; @@ -581,9 +589,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) BDRVQcowState *s = bs->opaque; int i, j = 0, l2_index, ret; uint64_t *old_cluster, start_sect, l2_offset, *l2_table; - uint64_t cluster_offset = m->cluster_offset; + uint64_t cluster_offset = m->alloc_offset; bool cow = false; + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + if (m->nb_clusters == 0) return 0; @@ -667,64 +677,15 @@ err: } /* - * alloc_cluster_offset - * - * For a given offset of the disk image, return cluster offset in qcow2 file. - * If the offset is not found, allocate a new cluster. - * - * If the cluster was already allocated, m->nb_clusters is set to 0, - * other fields in m are meaningless. - * - * If the cluster is newly allocated, m->nb_clusters is set to the number of - * contiguous clusters that have been allocated. In this case, the other - * fields of m are valid and contain information about the first allocated - * cluster. - * - * If the request conflicts with another write request in flight, the coroutine - * is queued and will be reentered when the dependency has completed. - * - * Return 0 on success and -errno in error cases + * Returns the number of contiguous clusters that can be used for an allocating + * write, but require COW to be performed (this includes yet unallocated space, + * which must copy from the backing file) */ -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) +static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, + uint64_t *l2_table, int l2_index) { - BDRVQcowState *s = bs->opaque; - int l2_index, ret; - uint64_t l2_offset, *l2_table; - int64_t cluster_offset; - unsigned int nb_clusters, i = 0; - QCowL2Meta *old_alloc; - - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); - if (ret < 0) { - return ret; - } - -again: - nb_clusters = size_to_clusters(s, n_end << 9); - - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - - cluster_offset = be64_to_cpu(l2_table[l2_index]); - - /* We keep all QCOW_OFLAG_COPIED clusters */ - - if (cluster_offset & QCOW_OFLAG_COPIED) { - nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, 0); - - cluster_offset &= ~QCOW_OFLAG_COPIED; - m->nb_clusters = 0; - - goto out; - } - - /* for the moment, multiple compressed clusters are not managed */ - - if (cluster_offset & QCOW_OFLAG_COMPRESSED) - nb_clusters = 1; - - /* how many available clusters ? */ + int i = 0; + uint64_t cluster_offset; while (i < nb_clusters) { i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, @@ -745,8 +706,39 @@ again: (cluster_offset & QCOW_OFLAG_COMPRESSED)) break; } + assert(i <= nb_clusters); - nb_clusters = i; + return i; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table) +{ + BDRVQcowState *s = bs->opaque; + int64_t cluster_offset; + QCowL2Meta *old_alloc; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); /* * Check if there already is an AIO write request in flight which allocates @@ -755,8 +747,8 @@ again: */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { - uint64_t start = offset >> s->cluster_bits; - uint64_t end = start + nb_clusters; + uint64_t start = guest_offset >> s->cluster_bits; + uint64_t end = start + *nb_clusters; uint64_t old_start = old_alloc->offset >> s->cluster_bits; uint64_t old_end = old_start + old_alloc->nb_clusters; @@ -765,58 +757,185 @@ again: } else { if (start < old_start) { /* Stop at the start of a running allocation */ - nb_clusters = old_start - start; + *nb_clusters = old_start - start; } else { - nb_clusters = 0; + *nb_clusters = 0; } - if (nb_clusters == 0) { + if (*nb_clusters == 0) { /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(&s->lock); qemu_co_queue_wait(&old_alloc->dependent_requests); qemu_co_mutex_lock(&s->lock); - goto again; + return -EAGAIN; } } } - if (!nb_clusters) { + if (!*nb_clusters) { abort(); } - /* save info needed for meta data update */ - m->offset = offset; - m->n_start = n_start; - m->nb_clusters = nb_clusters; - - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); - - /* allocate a new cluster */ - - cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); - if (cluster_offset < 0) { - ret = cluster_offset; - goto fail; + /* Allocate new clusters */ + trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); + if (*host_offset == 0) { + cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); + } else { + cluster_offset = *host_offset; + *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters); } -out: + if (cluster_offset < 0) { + return cluster_offset; + } + *host_offset = cluster_offset; + return 0; +} + +/* + * alloc_cluster_offset + * + * For a given offset on the virtual disk, find the cluster offset in qcow2 + * file. If the offset is not found, allocate a new cluster. + * + * If the cluster was already allocated, m->nb_clusters is set to 0 and + * other fields in m are meaningless. + * + * If the cluster is newly allocated, m->nb_clusters is set to the number of + * contiguous clusters that have been allocated. In this case, the other + * fields of m are valid and contain information about the first allocated + * cluster. + * + * If the request conflicts with another write request in flight, the coroutine + * is queued and will be reentered when the dependency has completed. + * + * Return 0 on success and -errno in error cases + */ +int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, + int n_start, int n_end, int *num, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index, ret, sectors; + uint64_t l2_offset, *l2_table; + unsigned int nb_clusters, keep_clusters; + uint64_t cluster_offset; + + trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, + n_start, n_end); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + if (ret < 0) { + return ret; + } + + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ +again: + nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS), + s->l2_size - l2_index); + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + /* + * Check how many clusters are already allocated and don't need COW, and how + * many need a new allocation. + */ + if (cluster_offset & QCOW_OFLAG_COPIED) { + /* We keep all QCOW_OFLAG_COPIED clusters */ + keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, 0); + assert(keep_clusters <= nb_clusters); + nb_clusters -= keep_clusters; + } else { + /* For the moment, overwrite compressed clusters one by one */ + if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + nb_clusters = 1; + } else { + nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + } + + keep_clusters = 0; + cluster_offset = 0; + } + + cluster_offset &= ~QCOW_OFLAG_COPIED; + + /* If there is something left to allocate, do that now */ + *m = (QCowL2Meta) { + .cluster_offset = cluster_offset, + .nb_clusters = 0, + }; + qemu_co_queue_init(&m->dependent_requests); + + if (nb_clusters > 0) { + uint64_t alloc_offset; + uint64_t alloc_cluster_offset; + uint64_t keep_bytes = keep_clusters * s->cluster_size; + + /* Calculate start and size of allocation */ + alloc_offset = offset + keep_bytes; + + if (keep_clusters == 0) { + alloc_cluster_offset = 0; + } else { + alloc_cluster_offset = cluster_offset + keep_bytes; + } + + /* Allocate, if necessary at a given offset in the image file */ + ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset, + &nb_clusters, l2_table); + if (ret == -EAGAIN) { + goto again; + } else if (ret < 0) { + goto fail; + } + + /* save info needed for meta data update */ + if (nb_clusters > 0) { + int requested_sectors = n_end - keep_clusters * s->cluster_sectors; + int avail_sectors = (keep_clusters + nb_clusters) + << (s->cluster_bits - BDRV_SECTOR_BITS); + + *m = (QCowL2Meta) { + .cluster_offset = keep_clusters == 0 ? + alloc_cluster_offset : cluster_offset, + .alloc_offset = alloc_cluster_offset, + .offset = alloc_offset, + .n_start = keep_clusters == 0 ? n_start : 0, + .nb_clusters = nb_clusters, + .nb_available = MIN(requested_sectors, avail_sectors), + }; + qemu_co_queue_init(&m->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + } + } + + /* Some cleanup work */ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); if (ret < 0) { goto fail_put; } - m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); - m->cluster_offset = cluster_offset; + sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9); + if (sectors > n_end) { + sectors = n_end; + } - *num = m->nb_available - n_start; + assert(sectors > n_start); + *num = sectors - n_start; return 0; fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); fail_put: - QLIST_REMOVE(m, next_in_flight); + if (nb_clusters > 0) { + QLIST_REMOVE(m, next_in_flight); + } return ret; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 2db2ede3d1..f39928a6bf 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -582,6 +582,34 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) return offset; } +int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + uint64_t cluster_index; + int i, refcount, ret; + + /* Check how many clusters there are free */ + cluster_index = offset >> s->cluster_bits; + for(i = 0; i < nb_clusters; i++) { + refcount = get_refcount(bs, cluster_index++); + + if (refcount < 0) { + return refcount; + } else if (refcount != 0) { + break; + } + } + + /* And then allocate them */ + ret = update_refcount(bs, offset, i << s->cluster_bits, 1); + if (ret < 0) { + return ret; + } + + return i; +} + /* only used to allocate compressed sectors. We try to allocate contiguous sectors. size must be <= cluster_size */ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) diff --git a/block/qcow2.c b/block/qcow2.c index eb5ea485d9..7aece65406 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -29,6 +29,7 @@ #include "block/qcow2.h" #include "qemu-error.h" #include "qerror.h" +#include "trace.h" /* Differences with QCOW: @@ -569,6 +570,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, .nb_clusters = 0, }; + trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, + remaining_sectors); + qemu_co_queue_init(&l2meta.dependent_requests); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -579,6 +583,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; if (s->crypt_method && @@ -619,6 +624,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + trace_qcow2_writev_data(qemu_coroutine_self(), + (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); @@ -637,6 +644,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; bytes_done += cur_nr_sectors * 512; + trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); } ret = 0; @@ -647,6 +655,7 @@ fail: qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); return ret; } @@ -1111,16 +1120,19 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) int ret, new_l1_size; if (offset & 511) { + error_report("The new size must be a multiple of 512"); return -EINVAL; } /* cannot proceed if image has snapshots */ if (s->nb_snapshots) { + error_report("Can't resize an image which has snapshots"); return -ENOTSUP; } /* shrinking is currently not supported */ if (offset < bs->total_sectors * 512) { + error_report("qcow2 doesn't support shrinking images yet"); return -ENOTSUP; } diff --git a/block/qcow2.h b/block/qcow2.h index fc35838175..e4ac366cfc 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -155,6 +155,7 @@ typedef struct QCowL2Meta { uint64_t offset; uint64_t cluster_offset; + uint64_t alloc_offset; int n_start; int nb_available; int nb_clusters; @@ -193,6 +194,8 @@ int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size); +int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int nb_clusters); int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); void qcow2_free_clusters(BlockDriverState *bs, int64_t offset, int64_t size); diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c index 02b81a2e33..e9b2aae44d 100644 --- a/block/qed-l2-cache.c +++ b/block/qed-l2-cache.c @@ -161,11 +161,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table) return; } + /* Evict an unused cache entry so we have space. If all entries are in use + * we can grow the cache temporarily and we try to shrink back down later. + */ if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) { - entry = QTAILQ_FIRST(&l2_cache->entries); - QTAILQ_REMOVE(&l2_cache->entries, entry, node); - l2_cache->n_entries--; - qed_unref_l2_cache_entry(entry); + CachedL2Table *next; + QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) { + if (entry->ref > 1) { + continue; + } + + QTAILQ_REMOVE(&l2_cache->entries, entry, node); + l2_cache->n_entries--; + qed_unref_l2_cache_entry(entry); + + /* Stop evicting when we've shrunk back to max size */ + if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) { + break; + } + } } l2_cache->n_entries++; diff --git a/blockdev.c b/blockdev.c index d78aa51af5..1a500b830d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -627,12 +627,15 @@ void do_commit(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_str(qdict, "device"); BlockDriverState *bs; + int ret; if (!strcmp(device, "all")) { - bdrv_commit_all(); + ret = bdrv_commit_all(); + if (ret == -EBUSY) { + qerror_report(QERR_DEVICE_IN_USE, device); + return; + } } else { - int ret; - bs = bdrv_find(device); if (!bs) { qerror_report(QERR_DEVICE_NOT_FOUND, device); @@ -646,101 +649,55 @@ void do_commit(Monitor *mon, const QDict *qdict) } } +static void blockdev_do_action(int kind, void *data, Error **errp) +{ + BlockdevAction action; + BlockdevActionList list; + + action.kind = kind; + action.data = data; + list.value = &action; + list.next = NULL; + qmp_transaction(&list, errp); +} + void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, bool has_format, const char *format, + bool has_mode, enum NewImageMode mode, Error **errp) { - BlockDriverState *bs; - BlockDriver *drv, *old_drv, *proto_drv; - int ret = 0; - int flags; - char old_filename[1024]; - - bs = bdrv_find(device); - if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); - return; - } - if (bdrv_in_use(bs)) { - error_set(errp, QERR_DEVICE_IN_USE, device); - return; - } - - pstrcpy(old_filename, sizeof(old_filename), bs->filename); - - old_drv = bs->drv; - flags = bs->open_flags; - - if (!has_format) { - format = "qcow2"; - } - - drv = bdrv_find_format(format); - if (!drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; - } - - proto_drv = bdrv_find_protocol(snapshot_file); - if (!proto_drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; - } - - ret = bdrv_img_create(snapshot_file, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); - if (ret) { - error_set(errp, QERR_UNDEFINED_ERROR); - return; - } - - bdrv_drain_all(); - bdrv_flush(bs); - - bdrv_close(bs); - ret = bdrv_open(bs, snapshot_file, flags, drv); - /* - * If reopening the image file we just created fails, fall back - * and try to re-open the original image. If that fails too, we - * are in serious trouble. - */ - if (ret != 0) { - ret = bdrv_open(bs, old_filename, flags, old_drv); - if (ret != 0) { - error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); - } else { - error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); - } - } + BlockdevSnapshot snapshot = { + .device = (char *) device, + .snapshot_file = (char *) snapshot_file, + .has_format = has_format, + .format = (char *) format, + .has_mode = has_mode, + .mode = mode, + }; + blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot, + errp); } /* New and old BlockDriverState structs for group snapshots */ -typedef struct BlkGroupSnapshotStates { +typedef struct BlkTransactionStates { BlockDriverState *old_bs; BlockDriverState *new_bs; - QSIMPLEQ_ENTRY(BlkGroupSnapshotStates) entry; -} BlkGroupSnapshotStates; + QSIMPLEQ_ENTRY(BlkTransactionStates) entry; +} BlkTransactionStates; /* * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail * then we do not pivot any of the devices in the group, and abandon the * snapshots */ -void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list, - Error **errp) +void qmp_transaction(BlockdevActionList *dev_list, Error **errp) { int ret = 0; - SnapshotDevList *dev_entry = dev_list; - SnapshotDev *dev_info = NULL; - BlkGroupSnapshotStates *states; - BlockDriver *proto_drv; - BlockDriver *drv; - int flags; - const char *format; - const char *snapshot_file; + BlockdevActionList *dev_entry = dev_list; + BlkTransactionStates *states, *next; - QSIMPLEQ_HEAD(snap_bdrv_states, BlkGroupSnapshotStates) snap_bdrv_states; + QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); /* drain all i/o before any snapshots */ @@ -748,21 +705,51 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list, /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { + BlockdevAction *dev_info = NULL; + BlockDriver *proto_drv; + BlockDriver *drv; + int flags; + enum NewImageMode mode; + const char *new_image_file; + const char *device; + const char *format = "qcow2"; + dev_info = dev_entry->value; dev_entry = dev_entry->next; - states = g_malloc0(sizeof(BlkGroupSnapshotStates)); + states = g_malloc0(sizeof(BlkTransactionStates)); QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); - states->old_bs = bdrv_find(dev_info->device); + switch (dev_info->kind) { + case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: + device = dev_info->blockdev_snapshot_sync->device; + if (!dev_info->blockdev_snapshot_sync->has_mode) { + dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file; + if (dev_info->blockdev_snapshot_sync->has_format) { + format = dev_info->blockdev_snapshot_sync->format; + } + mode = dev_info->blockdev_snapshot_sync->mode; + break; + default: + abort(); + } + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + goto delete_and_fail; + } + + states->old_bs = bdrv_find(device); if (!states->old_bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, dev_info->device); + error_set(errp, QERR_DEVICE_NOT_FOUND, device); goto delete_and_fail; } if (bdrv_in_use(states->old_bs)) { - error_set(errp, QERR_DEVICE_IN_USE, dev_info->device); + error_set(errp, QERR_DEVICE_IN_USE, device); goto delete_and_fail; } @@ -775,43 +762,32 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list, } } - snapshot_file = dev_info->snapshot_file; - flags = states->old_bs->open_flags; - if (!dev_info->has_format) { - format = "qcow2"; - } else { - format = dev_info->format; - } - - drv = bdrv_find_format(format); - if (!drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - goto delete_and_fail; - } - - proto_drv = bdrv_find_protocol(snapshot_file); + proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); goto delete_and_fail; } /* create new image w/backing file */ - ret = bdrv_img_create(snapshot_file, format, - states->old_bs->filename, - drv->format_name, NULL, -1, flags); - if (ret) { - error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); - goto delete_and_fail; + if (mode != NEW_IMAGE_MODE_EXISTING) { + ret = bdrv_img_create(new_image_file, format, + states->old_bs->filename, + states->old_bs->drv->format_name, + NULL, -1, flags); + if (ret) { + error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + goto delete_and_fail; + } } /* We will manually add the backing_hd field to the bs later */ states->new_bs = bdrv_new(""); - ret = bdrv_open(states->new_bs, snapshot_file, + ret = bdrv_open(states->new_bs, new_image_file, flags | BDRV_O_NO_BACKING, drv); if (ret != 0) { - error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); + error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); goto delete_and_fail; } } @@ -838,7 +814,7 @@ delete_and_fail: } } exit: - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { + QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) { g_free(states); } return; diff --git a/configure b/configure index 39d2b54519..ddb3e398fa 100755 --- a/configure +++ b/configure @@ -194,6 +194,7 @@ opengl="" zlib="yes" guest_agent="yes" libiscsi="" +coroutine="" # parse CC options first for opt do @@ -784,6 +785,8 @@ for opt do ;; --with-pkgversion=*) pkgversion=" ($optarg)" ;; + --with-coroutine=*) coroutine="$optarg" + ;; --disable-docs) docs="no" ;; --enable-docs) docs="yes" @@ -1110,6 +1113,8 @@ echo " --disable-usb-redir disable usb network redirection support" echo " --enable-usb-redir enable usb network redirection support" echo " --disable-guest-agent disable building of the QEMU Guest Agent" echo " --enable-guest-agent enable building of the QEMU Guest Agent" +echo " --with-coroutine=BACKEND coroutine backend. Supported options:" +echo " gthread, ucontext, sigaltstack, windows" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2715,21 +2720,38 @@ EOF fi ########################################## -# check if we have makecontext -# (and that it's not a glibc stub which always returns -1) +# check and set a backend for coroutine -ucontext_coroutine=no -if test "$darwin" != "yes"; then - cat > $TMPC << EOF +# default is ucontext, but always fallback to gthread +# windows autodetected by make +if test "$coroutine" = "" -o "$coroutine" = "ucontext"; then + if test "$darwin" != "yes"; then + cat > $TMPC << EOF #include <ucontext.h> #ifdef __stub_makecontext #error Ignoring glibc stub makecontext which will always fail #endif int main(void) { makecontext(0, 0, 0); return 0; } EOF - if compile_prog "" "" ; then - ucontext_coroutine=yes + if compile_prog "" "" ; then + coroutine_backend=ucontext + else + coroutine_backend=gthread + fi + else + echo "Silently falling back into gthread backend under darwin" fi +elif test "$coroutine" = "gthread" ; then + coroutine_backend=gthread +elif test "$coroutine" = "windows" ; then + coroutine_backend=windows +elif test "$coroutine" = "sigaltstack" ; then + coroutine_backend=sigaltstack +else + echo + echo "Error: unknown coroutine backend $coroutine" + echo + exit 1 fi ########################################## @@ -2931,6 +2953,7 @@ echo "usb net redir $usb_redir" echo "OpenGL support $opengl" echo "libiscsi support $libiscsi" echo "build guest agent $guest_agent" +echo "coroutine backend $coroutine_backend" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -3246,8 +3269,10 @@ if test "$rbd" = "yes" ; then echo "CONFIG_RBD=y" >> $config_host_mak fi -if test "$ucontext_coroutine" = "yes" ; then +if test "$coroutine_backend" = "ucontext" ; then echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak +elif test "$coroutine_backend" = "sigaltstack" ; then + echo "CONFIG_SIGALTSTACK_COROUTINE=y" >> $config_host_mak fi if test "$open_by_handle_at" = "yes" ; then diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c new file mode 100644 index 0000000000..7ff2d3379e --- /dev/null +++ b/coroutine-sigaltstack.c @@ -0,0 +1,334 @@ +/* + * sigaltstack coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * Copyright (C) 2012 Alex Barcelo <abarcelo@ac.upc.edu> +** This file is partly based on pth_mctx.c, from the GNU Portable Threads +** Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include <stdlib.h> +#include <setjmp.h> +#include <stdint.h> +#include <pthread.h> +#include <signal.h> +#include "qemu-common.h" +#include "qemu-coroutine-int.h" + +enum { + /* Maximum free pool size prevents holding too many freed coroutines */ + POOL_MAX_SIZE = 64, +}; + +/** Free list to speed up creation */ +static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int pool_size; + +typedef struct { + Coroutine base; + void *stack; + jmp_buf env; +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +typedef struct { + /** Currently executing coroutine */ + Coroutine *current; + + /** The default coroutine */ + CoroutineUContext leader; + + /** Information for the signal handler (trampoline) */ + jmp_buf tr_reenter; + volatile sig_atomic_t tr_called; + void *tr_handler; +} CoroutineThreadState; + +static pthread_key_t thread_state_key; + +static CoroutineThreadState *coroutine_get_thread_state(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + if (!s) { + s = g_malloc0(sizeof(*s)); + s->current = &s->leader.base; + pthread_setspecific(thread_state_key, s); + } + return s; +} + +static void qemu_coroutine_thread_cleanup(void *opaque) +{ + CoroutineThreadState *s = opaque; + + g_free(s); +} + +static void __attribute__((destructor)) coroutine_cleanup(void) +{ + Coroutine *co; + Coroutine *tmp; + + QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) { + g_free(DO_UPCAST(CoroutineUContext, base, co)->stack); + g_free(co); + } +} + +static void __attribute__((constructor)) coroutine_init(void) +{ + int ret; + + ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); + if (ret != 0) { + fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); + abort(); + } +} + +/* "boot" function + * This is what starts the coroutine, is called from the trampoline + * (from the signal handler when it is not signal handling, read ahead + * for more information). + */ +static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +{ + /* Initialize longjmp environment and switch back the caller */ + if (!setjmp(self->env)) { + longjmp(*(jmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +/* + * This is used as the signal handler. This is called with the brand new stack + * (thanks to sigaltstack). We have to return, given that this is a signal + * handler and the sigmask and some other things are changed. + */ +static void coroutine_trampoline(int signal) +{ + CoroutineUContext *self; + Coroutine *co; + CoroutineThreadState *coTS; + + /* Get the thread specific information */ + coTS = coroutine_get_thread_state(); + self = coTS->tr_handler; + coTS->tr_called = 1; + co = &self->base; + + /* + * Here we have to do a bit of a ping pong between the caller, given that + * this is a signal handler and we have to do a return "soon". Then the + * caller can reestablish everything and do a longjmp here again. + */ + if (!setjmp(coTS->tr_reenter)) { + return; + } + + /* + * Ok, the caller has longjmp'ed back to us, so now prepare + * us for the real machine state switching. We have to jump + * into another function here to get a new stack context for + * the auto variables (which have to be auto-variables + * because the start of the thread happens later). Else with + * PIC (i.e. Position Independent Code which is used when PTH + * is built as a shared library) most platforms would + * horrible core dump as experience showed. + */ + coroutine_bootstrap(self, co); +} + +static Coroutine *coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + CoroutineThreadState *coTS; + struct sigaction sa; + struct sigaction osa; + struct sigaltstack ss; + struct sigaltstack oss; + sigset_t sigs; + sigset_t osigs; + jmp_buf old_env; + + /* The way to manipulate stack is with the sigaltstack function. We + * prepare a stack, with it delivering a signal to ourselves and then + * put setjmp/longjmp where needed. + * This has been done keeping coroutine-ucontext as a model and with the + * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics + * of the coroutines and see pth_mctx.c (from the pth project) for the + * sigaltstack way of manipulating stacks. + */ + + co = g_malloc0(sizeof(*co)); + co->stack = g_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + coTS = coroutine_get_thread_state(); + coTS->tr_handler = co; + + /* + * Preserve the SIGUSR2 signal state, block SIGUSR2, + * and establish our signal handler. The signal will + * later transfer control onto the signal stack. + */ + sigemptyset(&sigs); + sigaddset(&sigs, SIGUSR2); + pthread_sigmask(SIG_BLOCK, &sigs, &osigs); + sa.sa_handler = coroutine_trampoline; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK; + if (sigaction(SIGUSR2, &sa, &osa) != 0) { + abort(); + } + + /* + * Set the new stack. + */ + ss.ss_sp = co->stack; + ss.ss_size = stack_size; + ss.ss_flags = 0; + if (sigaltstack(&ss, &oss) < 0) { + abort(); + } + + /* + * Now transfer control onto the signal stack and set it up. + * It will return immediately via "return" after the setjmp() + * was performed. Be careful here with race conditions. The + * signal can be delivered the first time sigsuspend() is + * called. + */ + coTS->tr_called = 0; + kill(getpid(), SIGUSR2); + sigfillset(&sigs); + sigdelset(&sigs, SIGUSR2); + while (!coTS->tr_called) { + sigsuspend(&sigs); + } + + /* + * Inform the system that we are back off the signal stack by + * removing the alternative signal stack. Be careful here: It + * first has to be disabled, before it can be removed. + */ + sigaltstack(NULL, &ss); + ss.ss_flags = SS_DISABLE; + if (sigaltstack(&ss, NULL) < 0) { + abort(); + } + sigaltstack(NULL, &ss); + if (!(oss.ss_flags & SS_DISABLE)) { + sigaltstack(&oss, NULL); + } + + /* + * Restore the old SIGUSR2 signal handler and mask + */ + sigaction(SIGUSR2, &osa, NULL); + pthread_sigmask(SIG_SETMASK, &osigs, NULL); + + /* + * Now enter the trampoline again, but this time not as a signal + * handler. Instead we jump into it directly. The functionally + * redundant ping-pong pointer arithmentic is neccessary to avoid + * type-conversion warnings related to the `volatile' qualifier and + * the fact that `jmp_buf' usually is an array type. + */ + if (!setjmp(old_env)) { + longjmp(coTS->tr_reenter, 1); + } + + /* + * Ok, we returned again, so now we're finished + */ + + return &co->base; +} + +Coroutine *qemu_coroutine_new(void) +{ + Coroutine *co; + + co = QSLIST_FIRST(&pool); + if (co) { + QSLIST_REMOVE_HEAD(&pool, pool_next); + pool_size--; + } else { + co = coroutine_new(); + } + return co; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + + if (pool_size < POOL_MAX_SIZE) { + QSLIST_INSERT_HEAD(&pool, &co->base, pool_next); + co->base.caller = NULL; + pool_size++; + return; + } + + g_free(co->stack); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineThreadState *s = coroutine_get_thread_state(); + int ret; + + s->current = to_; + + ret = setjmp(from->env); + if (ret == 0) { + longjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + + return s->current; +} + +bool qemu_in_coroutine(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + return s && s->current->caller; +} + diff --git a/hmp-commands.hx b/hmp-commands.hx index ed88877230..6980214a1a 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -882,14 +882,17 @@ ETEXI { .name = "snapshot_blkdev", - .args_type = "device:B,snapshot-file:s?,format:s?", - .params = "device [new-image-file] [format]", + .args_type = "reuse:-n,device:B,snapshot-file:s?,format:s?", + .params = "[-n] device [new-image-file] [format]", .help = "initiates a live snapshot\n\t\t\t" "of device. If a new image file is specified, the\n\t\t\t" "new image file will become the new root image.\n\t\t\t" "If format is specified, the snapshot file will\n\t\t\t" "be created in that format. Otherwise the\n\t\t\t" - "snapshot will be internal! (currently unsupported)", + "snapshot will be internal! (currently unsupported).\n\t\t\t" + "The default format is qcow2. The -n flag requests QEMU\n\t\t\t" + "to reuse the image found in new-image-file, instead of\n\t\t\t" + "recreating it from scratch.", .mhandler.cmd = hmp_snapshot_blkdev, }, diff --git a/hmp.c b/hmp.c index 3a54455574..290c43d03a 100644 --- a/hmp.c +++ b/hmp.c @@ -692,6 +692,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) const char *device = qdict_get_str(qdict, "device"); const char *filename = qdict_get_try_str(qdict, "snapshot-file"); const char *format = qdict_get_try_str(qdict, "format"); + int reuse = qdict_get_try_bool(qdict, "reuse", 0); + enum NewImageMode mode; Error *errp = NULL; if (!filename) { @@ -702,7 +704,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) return; } - qmp_blockdev_snapshot_sync(device, filename, !!format, format, &errp); + mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; + qmp_blockdev_snapshot_sync(device, filename, !!format, format, + true, mode, &errp); hmp_handle_error(mon, &errp); } diff --git a/qapi-schema-test.json b/qapi-schema-test.json index 2b38919001..8c7f9f79f4 100644 --- a/qapi-schema-test.json +++ b/qapi-schema-test.json @@ -22,6 +22,16 @@ 'dict2': { 'userdef1': 'UserDefOne', 'string2': 'str' }, '*dict3': { 'userdef2': 'UserDefOne', 'string3': 'str' } } } } +# for testing unions +{ 'type': 'UserDefA', + 'data': { 'boolean': 'bool' } } + +{ 'type': 'UserDefB', + 'data': { 'integer': 'int' } } + +{ 'union': 'UserDefUnion', + 'data': { 'a' : 'UserDefA', 'b' : 'UserDefB' } } + # testing commands { 'command': 'user_def_cmd', 'data': {} } { 'command': 'user_def_cmd1', 'data': {'ud1a': 'UserDefOne'} } diff --git a/qapi-schema.json b/qapi-schema.json index dd9e0e5434..04fa84fbde 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1120,27 +1120,58 @@ { 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }} ## -# @SnapshotDev +# @NewImageMode +# +# An enumeration that tells QEMU how to set the backing file path in +# a new image file. +# +# @existing: QEMU should look for an existing image file. +# +# @absolute-paths: QEMU should create a new image with absolute paths +# for the backing file. +# +# Since: 1.1 +## +{ 'enum': 'NewImageMode' + 'data': [ 'existing', 'absolute-paths' ] } + +## +# @BlockdevSnapshot # # @device: the name of the device to generate the snapshot from. # # @snapshot-file: the target of the new image. A new file will be created. # # @format: #optional the format of the snapshot image, default is 'qcow2'. +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. ## -{ 'type': 'SnapshotDev', - 'data': {'device': 'str', 'snapshot-file': 'str', '*format': 'str' } } +{ 'type': 'BlockdevSnapshot', + 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', + '*mode': 'NewImageMode' } } ## -# @blockdev-group-snapshot-sync +# @BlockdevAction # -# Generates a synchronous snapshot of a group of one or more block devices, -# as atomically as possible. If the snapshot of any device in the group -# fails, then the entire group snapshot will be abandoned and the -# appropriate error returned. +# A discriminated record of operations that can be performed with +# @transaction. +## +{ 'union': 'BlockdevAction', + 'data': { + 'blockdev-snapshot-sync': 'BlockdevSnapshot', + } } + +## +# @transaction +# +# Atomically operate on a group of one or more block devices. If +# any operation fails, then the entire set of actions will be +# abandoned and the appropriate error returned. The only operation +# supported is currently blockdev-snapshot-sync. # # List of: -# @SnapshotDev: information needed for the device snapshot +# @BlockdevAction: information needed for the device snapshot # # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound @@ -1149,13 +1180,14 @@ # If @snapshot-file can't be opened, OpenFileFailed # If @format is invalid, InvalidBlockFormat # -# Note: The group snapshot attempt returns failure on the first snapshot -# device failure. Therefore, there will be only one device or snapshot file -# returned in an error condition, and subsequent devices will not have been -# attempted. +# Note: The transaction aborts on the first failure. Therefore, there will +# be only one device or snapshot file returned in an error condition, and +# subsequent actions will not have been attempted. +# +# Since 1.1 ## -{ 'command': 'blockdev-group-snapshot-sync', - 'data': { 'devlist': [ 'SnapshotDev' ] } } +{ 'command': 'transaction', + 'data': { 'actions': [ 'BlockdevAction' ] } } ## # @blockdev-snapshot-sync @@ -1170,21 +1202,19 @@ # # @format: #optional the format of the snapshot image, default is 'qcow2'. # +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound # If @snapshot-file can't be opened, OpenFileFailed # If @format is invalid, InvalidBlockFormat # -# Notes: One of the last steps taken by this command is to close the current -# image being used by @device and open the @snapshot-file one. If that -# fails, the command will try to reopen the original image file. If -# that also fails OpenFileFailed will be returned and the guest may get -# unexpected errors. -# # Since 0.14.0 ## { 'command': 'blockdev-snapshot-sync', - 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } } + 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', + '*mode': 'NewImageMode'} } ## # @human-monitor-command: diff --git a/qemu-img.c b/qemu-img.c index 8df35648e9..0e48b35296 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1614,7 +1614,7 @@ static int img_resize(int argc, char **argv) printf("Image resized.\n"); break; case -ENOTSUP: - error_report("This image format does not support resize"); + error_report("This image does not support resize"); break; case -EACCES: error_report("Image is read-only"); diff --git a/qmp-commands.hx b/qmp-commands.hx index 0c9bfac20d..dfe8a5b40b 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -687,41 +687,55 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, }, { - .name = "blockdev-group-snapshot-sync", - .args_type = "devlist:O", - .params = "device:B,snapshot-file:s,format:s?", - .mhandler.cmd_new = qmp_marshal_input_blockdev_group_snapshot_sync, + .name = "transaction", + .args_type = "actions:O", + .mhandler.cmd_new = qmp_marshal_input_transaction, }, SQMP -blockdev-group-snapshot-sync ----------------------- +transaction +----------- -Synchronous snapshot of one or more block devices. A list array input -is accepted, that contains the device and snapshot file information for -each device in group. The default format, if not specified, is qcow2. +Atomically operate on one or more block devices. The only supported +operation for now is snapshotting. If there is any failure performing +any of the operations, all snapshots for the group are abandoned, and +the original disks pre-snapshot attempt are used. -If there is any failure creating or opening a new snapshot, all snapshots -for the group are abandoned, and the original disks pre-snapshot attempt -are used. +A list of dictionaries is accepted, that contains the actions to be performed. +For snapshots this is the device, the file to use for the new snapshot, +and the format. The default format, if not specified, is qcow2. +Each new snapshot defaults to being created by QEMU (wiping any +contents if the file already exists), but it is also possible to reuse +an externally-created file. In the latter case, you should ensure that +the new image file has the same contents as the current one; QEMU cannot +perform any meaningful check. Typically this is achieved by using the +current image file as the backing file for the new image. Arguments: -devlist array: - - "device": device name to snapshot (json-string) - - "snapshot-file": name of new image file (json-string) - - "format": format of new image (json-string, optional) +actions array: + - "type": the operation to perform. The only supported + value is "blockdev-snapshot-sync". (json-string) + - "data": a dictionary. The contents depend on the value + of "type". When "type" is "blockdev-snapshot-sync": + - "device": device name to snapshot (json-string) + - "snapshot-file": name of new image file (json-string) + - "format": format of new image (json-string, optional) + - "mode": whether and how QEMU should create the snapshot file + (NewImageMode, optional, default "absolute-paths") Example: --> { "execute": "blockdev-group-snapshot-sync", "arguments": - { "devlist": [{ "device": "ide-hd0", - "snapshot-file": "/some/place/my-image", - "format": "qcow2" }, - { "device": "ide-hd1", - "snapshot-file": "/some/place/my-image2", - "format": "qcow2" }] } } +-> { "execute": "transaction", + "arguments": { "actions": [ + { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0", + "snapshot-file": "/some/place/my-image", + "format": "qcow2" } }, + { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1", + "snapshot-file": "/some/place/my-image2", + "mode": "existing", + "format": "qcow2" } } ] } } <- { "return": {} } EQMP @@ -746,6 +760,8 @@ Arguments: - "device": device name to snapshot (json-string) - "snapshot-file": name of new image file (json-string) +- "mode": whether and how QEMU should create the snapshot file + (NewImageMode, optional, default "absolute-paths") - "format": format of new image (json-string, optional) Example: diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index b56225bdaa..727fb77266 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -117,6 +117,7 @@ struct %(name)s { %(name)sKind kind; union { + void *data; ''', name=name) @@ -269,6 +270,7 @@ for expr in exprs: elif expr.has_key('union'): ret += generate_fwd_struct(expr['union'], expr['data']) + "\n" ret += generate_enum('%sKind' % expr['union'], expr['data'].keys()) + fdef.write(generate_enum_lookup('%sKind' % expr['union'], expr['data'].keys())) else: continue fdecl.write(ret) @@ -283,6 +285,10 @@ for expr in exprs: fdef.write(generate_type_cleanup(expr['type']) + "\n") elif expr.has_key('union'): ret += generate_union(expr['union'], expr['data']) + ret += generate_type_cleanup_decl(expr['union'] + "List") + fdef.write(generate_type_cleanup(expr['union'] + "List") + "\n") + ret += generate_type_cleanup_decl(expr['union']) + fdef.write(generate_type_cleanup(expr['union']) + "\n") else: continue fdecl.write(ret) diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index 5160d83c4f..54117d4d2b 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -110,10 +110,38 @@ def generate_visit_union(name, members): void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error **errp) { -} + Error *err = NULL; + + visit_start_struct(m, (void **)obj, "%(name)s", name, sizeof(%(name)s), &err); + visit_type_%(name)sKind(m, &(*obj)->kind, "type", &err); + if (err) { + error_propagate(errp, err); + goto end; + } + switch ((*obj)->kind) { ''', name=name) + for key in members: + ret += mcgen(''' + case %(abbrev)s_KIND_%(enum)s: + visit_type_%(c_type)s(m, &(*obj)->%(c_name)s, "data", errp); + break; +''', + abbrev = de_camel_case(name).upper(), + enum = de_camel_case(key).upper(), + c_type=members[key], + c_name=c_var(key)) + + ret += mcgen(''' + default: + abort(); + } +end: + visit_end_struct(m, errp); +} +''') + return ret def generate_declaration(name, members, genlist=True): @@ -242,6 +270,7 @@ for expr in exprs: fdecl.write(ret) elif expr.has_key('union'): ret = generate_visit_union(expr['union'], expr['data']) + ret += generate_visit_list(expr['union'], expr['data']) fdef.write(ret) ret = generate_decl_enum('%sKind' % expr['union'], expr['data'].keys()) diff --git a/test-coroutine.c b/test-coroutine.c index bf9f3e91b5..e5d14eb696 100644 --- a/test-coroutine.c +++ b/test-coroutine.c @@ -177,6 +177,32 @@ static void perf_lifecycle(void) g_test_message("Lifecycle %u iterations: %f s\n", max, duration); } +static void perf_nesting(void) +{ + unsigned int i, maxcycles, maxnesting; + double duration; + + maxcycles = 100000000; + maxnesting = 20000; + Coroutine *root; + NestData nd = { + .n_enter = 0, + .n_return = 0, + .max = maxnesting, + }; + + g_test_timer_start(); + for (i = 0; i < maxcycles; i++) { + root = qemu_coroutine_create(nest); + qemu_coroutine_enter(root, &nd); + } + duration = g_test_timer_elapsed(); + + g_test_message("Nesting %u iterations of %u depth each: %f s\n", + maxcycles, maxnesting, duration); +} + + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -187,6 +213,7 @@ int main(int argc, char **argv) g_test_add_func("/basic/in_coroutine", test_in_coroutine); if (g_test_perf()) { g_test_add_func("/perf/lifecycle", perf_lifecycle); + g_test_add_func("/perf/nesting", perf_nesting); } return g_test_run(); } diff --git a/test-qmp-input-visitor.c b/test-qmp-input-visitor.c index 926db5cb91..1996e49576 100644 --- a/test-qmp-input-visitor.c +++ b/test-qmp-input-visitor.c @@ -234,6 +234,22 @@ static void test_visitor_in_list(TestInputVisitorData *data, qapi_free_UserDefOneList(head); } +static void test_visitor_in_union(TestInputVisitorData *data, + const void *unused) +{ + Visitor *v; + Error *err = NULL; + UserDefUnion *tmp; + + v = visitor_input_test_init(data, "{ 'type': 'b', 'data' : { 'integer': 42 } }"); + + visit_type_UserDefUnion(v, &tmp, NULL, &err); + g_assert(err == NULL); + g_assert_cmpint(tmp->kind, ==, USER_DEF_UNION_KIND_B); + g_assert_cmpint(tmp->b->integer, ==, 42); + qapi_free_UserDefUnion(tmp); +} + static void input_visitor_test_add(const char *testpath, TestInputVisitorData *data, void (*test_func)(TestInputVisitorData *data, const void *user_data)) @@ -264,6 +280,8 @@ int main(int argc, char **argv) &in_visitor_data, test_visitor_in_struct_nested); input_visitor_test_add("/visitor/input/list", &in_visitor_data, test_visitor_in_list); + input_visitor_test_add("/visitor/input/union", + &in_visitor_data, test_visitor_in_union); g_test_run(); diff --git a/test-qmp-output-visitor.c b/test-qmp-output-visitor.c index 5452cd43bc..4d6c4d4420 100644 --- a/test-qmp-output-visitor.c +++ b/test-qmp-output-visitor.c @@ -380,6 +380,38 @@ static void test_visitor_out_list_qapi_free(TestOutputVisitorData *data, qapi_free_UserDefNestedList(head); } +static void test_visitor_out_union(TestOutputVisitorData *data, + const void *unused) +{ + QObject *arg, *qvalue; + QDict *qdict, *value; + + Error *err = NULL; + + UserDefUnion *tmp = g_malloc0(sizeof(UserDefUnion)); + tmp->kind = USER_DEF_UNION_KIND_A; + tmp->a = g_malloc0(sizeof(UserDefA)); + tmp->a->boolean = true; + + visit_type_UserDefUnion(data->ov, &tmp, NULL, &err); + g_assert(err == NULL); + arg = qmp_output_get_qobject(data->qov); + + g_assert(qobject_type(arg) == QTYPE_QDICT); + qdict = qobject_to_qdict(arg); + + g_assert_cmpstr(qdict_get_str(qdict, "type"), ==, "a"); + + qvalue = qdict_get(qdict, "data"); + g_assert(data != NULL); + g_assert(qobject_type(qvalue) == QTYPE_QDICT); + value = qobject_to_qdict(qvalue); + g_assert_cmpint(qdict_get_bool(value, "boolean"), ==, true); + + qapi_free_UserDefUnion(tmp); + QDECREF(qdict); +} + static void output_visitor_test_add(const char *testpath, TestOutputVisitorData *data, void (*test_func)(TestOutputVisitorData *data, const void *user_data)) @@ -416,6 +448,8 @@ int main(int argc, char **argv) &out_visitor_data, test_visitor_out_list); output_visitor_test_add("/visitor/output/list-qapi-free", &out_visitor_data, test_visitor_out_list_qapi_free); + output_visitor_test_add("/visitor/output/union", + &out_visitor_data, test_visitor_out_union); g_test_run(); diff --git a/tests/Makefile b/tests/Makefile index 74b29dc076..c78ade122e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,6 +1,9 @@ +export SRC_PATH + CHECKS = check-qdict check-qfloat check-qint check-qstring check-qlist CHECKS += check-qjson test-qmp-output-visitor test-qmp-input-visitor CHECKS += test-string-input-visitor test-string-output-visitor test-coroutine +CHECKS += $(SRC_PATH)/tests/qemu-iotests-quick.sh check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS) @@ -42,6 +45,13 @@ test-qmp-input-visitor: test-qmp-input-visitor.o $(qobject-obj-y) $(qapi-obj-y) test-qmp-commands.o: $(addprefix $(qapi-dir)/, test-qapi-types.c test-qapi-types.h test-qapi-visit.c test-qapi-visit.h test-qmp-marshal.c test-qmp-commands.h) $(qapi-obj-y) test-qmp-commands: test-qmp-commands.o $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o $(qapi-dir)/test-qmp-marshal.o module.o -.PHONY: check +$(SRC_PATH)/tests/qemu-iotests-quick.sh: qemu-img qemu-io + + +.PHONY: check check-block + check: $(CHECKS) $(call quiet-command, gtester $(CHECKS), " CHECK") + +check-block: + $(call quiet-command, $(SHELL) $(SRC_PATH)/tests/check-block.sh , " CHECK") diff --git a/tests/check-block.sh b/tests/check-block.sh new file mode 100755 index 0000000000..b9d9c6a9f6 --- /dev/null +++ b/tests/check-block.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +export QEMU_PROG="$(pwd)/x86_64-softmmu/qemu-system-x86_64" +export QEMU_IMG_PROG="$(pwd)/qemu-img" +export QEMU_IO_PROG="$(pwd)/qemu-io" + +if [ ! -x $QEMU_PROG ]; then + echo "'make check-block' requires qemu-system-x86_64" + exit 1 +fi + +cd $SRC_PATH/tests/qemu-iotests + +ret=0 +./check -T -nocache -raw || ret=1 +./check -T -nocache -qcow2 || ret=1 +./check -T -nocache -qed|| ret=1 +./check -T -nocache -vmdk|| ret=1 +./check -T -nocache -vpc || ret=1 + +exit $ret diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh new file mode 100755 index 0000000000..cf90de0b8b --- /dev/null +++ b/tests/qemu-iotests-quick.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# We don't know which of the system emulator binaries there is (or if there is +# any at all), so the 'quick' group doesn't contain any tests that require +# running qemu proper. Assign a fake binary name so that qemu-iotests doesn't +# complain about the missing binary. +export QEMU_PROG="this_should_be_unused" + +export QEMU_IMG_PROG="$(pwd)/qemu-img" +export QEMU_IO_PROG="$(pwd)/qemu-io" + +cd $SRC_PATH/tests/qemu-iotests + +ret=0 +./check -T -nocache -qcow2 -g quick || ret=1 + +exit $ret diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index fcf869d36e..b549f10f17 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -8,32 +8,32 @@ # test-group association ... one line per test # 001 rw auto -002 rw auto +002 rw auto quick 003 rw auto -004 rw auto +004 rw auto quick 005 img auto 006 img auto 007 snapshot auto 008 rw auto 009 rw auto 010 rw auto -011 rw auto -012 auto +011 rw auto quick +012 auto quick 013 rw auto 014 rw auto 015 rw snapshot auto -016 rw auto -017 rw backing auto +016 rw auto quick +017 rw backing auto quick 018 rw backing auto -019 rw backing auto -020 rw backing auto +019 rw backing auto quick +020 rw backing auto quick 021 io auto 022 rw snapshot auto 023 rw auto -024 rw backing auto -025 rw auto +024 rw backing auto quick +025 rw auto quick 026 rw blkdbg auto -027 rw auto +027 rw auto quick 028 rw backing auto -029 rw auto +029 rw auto quick 030 rw auto diff --git a/trace-events b/trace-events index dfe28ed42b..74bb92af4f 100644 --- a/trace-events +++ b/trace-events @@ -312,6 +312,31 @@ scsi_request_sense(int target, int lun, int tag) "target %d lun %d tag %d" # vl.c vm_state_notify(int running, int reason) "running %d reason %d" +# block/qcow2.c +qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d" +qcow2_writev_done_req(void *co, int ret) "co %p ret %d" +qcow2_writev_start_part(void *co) "co %p" +qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d" +qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64 + +qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d" +qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d" +qcow2_cluster_alloc_phys(void *co) "co %p" +qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d" + +qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d" + +qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset %" PRIx64 " read_from_disk %d" +qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d" +qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d" + # block/qed-l2-cache.c qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p" qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"