diff --git a/block.c b/block.c index e176c6f3bc..ea4956d6c7 100644 --- a/block.c +++ b/block.c @@ -2669,7 +2669,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, goto out; } - total_sectors = len >> BDRV_SECTOR_BITS; + total_sectors = (len + BDRV_SECTOR_SIZE - 1) >> BDRV_SECTOR_BITS; max_nb_sectors = MAX(0, total_sectors - sector_num); if (max_nb_sectors > 0) { ret = drv->bdrv_co_readv(bs, sector_num, diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 7d144205c3..5e8a7794f4 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -192,7 +192,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) /* The snapshot list position has not yet been updated, so these clusters * must indeed be completely free */ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset, - s->snapshots_size); + snapshots_size); if (ret < 0) { return ret; } diff --git a/blockdev.c b/blockdev.c index 80605a2bac..8aa66a949c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -443,7 +443,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts, if (qemu_opt_get_bool(opts, "cache.direct", false)) { bdrv_flags |= BDRV_O_NOCACHE; } - if (qemu_opt_get_bool(opts, "cache.no-flush", true)) { + if (qemu_opt_get_bool(opts, "cache.no-flush", false)) { bdrv_flags |= BDRV_O_NO_FLUSH; } diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e2f55cc946..49a23c33f7 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -460,9 +460,9 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running, static void virtio_blk_reset(VirtIODevice *vdev) { -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE VirtIOBlock *s = VIRTIO_BLK(vdev); +#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE if (s->dataplane) { virtio_blk_data_plane_stop(s->dataplane); } @@ -473,6 +473,7 @@ static void virtio_blk_reset(VirtIODevice *vdev) * are per-device request lists. */ bdrv_drain_all(); + bdrv_set_enable_write_cache(s->bs, s->original_wce); } /* coalesce internal state, copy to pci i/o region 0 @@ -564,7 +565,25 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) } features = vdev->guest_features; - bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE))); + + /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send + * cache flushes. Thus, the "auto writethrough" behavior is never + * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. + * Leaving it enabled would break the following sequence: + * + * Guest started with "-drive cache=writethrough" + * Guest sets status to 0 + * Guest sets DRIVER bit in status field + * Guest reads host features (WCE=0, CONFIG_WCE=1) + * Guest writes guest features (WCE=0, CONFIG_WCE=1) + * Guest writes 1 to the WCE configuration field (writeback mode) + * Guest sets DRIVER_OK bit in status field + * + * s->bs would erroneously be placed in writethrough mode. + */ + if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) { + bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE))); + } } static void virtio_blk_save(QEMUFile *f, void *opaque) @@ -674,6 +693,7 @@ static int virtio_blk_device_init(VirtIODevice *vdev) } blkconf_serial(&blk->conf, &blk->serial); + s->original_wce = bdrv_enable_write_cache(blk->conf.bs); if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) { return -1; } diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index b87cf490b1..41885da1a0 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -123,6 +123,7 @@ typedef struct VirtIOBlock { BlockConf *conf; VirtIOBlkConf blk; unsigned short sector_mask; + bool original_wce; VMChangeStateEntry *change; #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE Notifier migration_state_notifier; diff --git a/include/qemu/timer.h b/include/qemu/timer.h index e4934dd61b..b58903bef5 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -115,6 +115,10 @@ static inline int64_t qemu_clock_get_us(QEMUClockType type) * Determines whether a clock's default timer list * has timers attached * + * Note that this function should not be used when other threads also access + * the timer list. The return value may be outdated by the time it is acted + * upon. + * * Returns: true if the clock's default timer list * has timers attached */ @@ -271,6 +275,10 @@ void timerlist_free(QEMUTimerList *timer_list); * * Determine whether a timer list has active timers * + * Note that this function should not be used when other threads also access + * the timer list. The return value may be outdated by the time it is acted + * upon. + * * Returns: true if the timer list has timers. */ bool timerlist_has_timers(QEMUTimerList *timer_list); @@ -512,6 +520,9 @@ void timer_free(QEMUTimer *ts); * @ts: the timer * * Delete a timer from the active list. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. */ void timer_del(QEMUTimer *ts); @@ -521,6 +532,9 @@ void timer_del(QEMUTimer *ts); * @expire_time: the expiry time in nanoseconds * * Modify a timer to expire at @expire_time + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. */ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time); @@ -531,6 +545,9 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time); * * Modify a timer to expiry at @expire_time, taking into * account the scale associated with the timer. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. */ void timer_mod(QEMUTimer *ts, int64_t expire_timer); diff --git a/libcacard/Makefile b/libcacard/Makefile index 47827a0eb8..4d15da49b8 100644 --- a/libcacard/Makefile +++ b/libcacard/Makefile @@ -4,7 +4,8 @@ TOOLS += vscclient$(EXESUF) # objects linked into a shared library, built with libtool with -fPIC if required libcacard-obj-y = $(stub-obj-y) $(libcacard-y) -libcacard-obj-y += util/osdep.o util/cutils.o util/qemu-timer-common.o util/error.o +libcacard-obj-y += util/osdep.o util/cutils.o util/qemu-timer-common.o +libcacard-obj-y += util/error.o util/qemu-error.o libcacard-obj-$(CONFIG_WIN32) += util/oslib-win32.o util/qemu-thread-win32.o libcacard-obj-$(CONFIG_POSIX) += util/oslib-posix.o util/qemu-thread-posix.o libcacard-obj-y += $(filter trace/%, $(util-obj-y)) diff --git a/qemu-timer.c b/qemu-timer.c index 95ff47fef3..6b62e88669 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -66,6 +66,7 @@ QEMUClock qemu_clocks[QEMU_CLOCK_MAX]; struct QEMUTimerList { QEMUClock *clock; + QemuMutex active_timers_lock; QEMUTimer *active_timers; QLIST_ENTRY(QEMUTimerList) list; QEMUTimerListNotifyCB *notify_cb; @@ -101,6 +102,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type, timer_list->clock = clock; timer_list->notify_cb = cb; timer_list->notify_opaque = opaque; + qemu_mutex_init(&timer_list->active_timers_lock); QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list); return timer_list; } @@ -111,6 +113,7 @@ void timerlist_free(QEMUTimerList *timer_list) if (timer_list->clock) { QLIST_REMOVE(timer_list, list); } + qemu_mutex_destroy(&timer_list->active_timers_lock); g_free(timer_list); } @@ -163,9 +166,17 @@ bool qemu_clock_has_timers(QEMUClockType type) bool timerlist_expired(QEMUTimerList *timer_list) { - return (timer_list->active_timers && - timer_list->active_timers->expire_time < - qemu_clock_get_ns(timer_list->clock->type)); + int64_t expire_time; + + qemu_mutex_lock(&timer_list->active_timers_lock); + if (!timer_list->active_timers) { + qemu_mutex_unlock(&timer_list->active_timers_lock); + return false; + } + expire_time = timer_list->active_timers->expire_time; + qemu_mutex_unlock(&timer_list->active_timers_lock); + + return expire_time < qemu_clock_get_ns(timer_list->clock->type); } bool qemu_clock_expired(QEMUClockType type) @@ -182,13 +193,25 @@ bool qemu_clock_expired(QEMUClockType type) int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) { int64_t delta; + int64_t expire_time; - if (!timer_list->clock->enabled || !timer_list->active_timers) { + if (!timer_list->clock->enabled) { return -1; } - delta = timer_list->active_timers->expire_time - - qemu_clock_get_ns(timer_list->clock->type); + /* The active timers list may be modified before the caller uses our return + * value but ->notify_cb() is called when the deadline changes. Therefore + * the caller should notice the change and there is no race condition. + */ + qemu_mutex_lock(&timer_list->active_timers_lock); + if (!timer_list->active_timers) { + qemu_mutex_unlock(&timer_list->active_timers_lock); + return -1; + } + expire_time = timer_list->active_timers->expire_time; + qemu_mutex_unlock(&timer_list->active_timers_lock); + + delta = expire_time - qemu_clock_get_ns(timer_list->clock->type); if (delta <= 0) { return 0; @@ -289,6 +312,7 @@ void timer_init(QEMUTimer *ts, ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->expire_time = -1; } void timer_free(QEMUTimer *ts) @@ -296,14 +320,12 @@ void timer_free(QEMUTimer *ts) g_free(ts); } -/* stop a timer, but do not dealloc it */ -void timer_del(QEMUTimer *ts) +static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) { QEMUTimer **pt, *t; - /* NOTE: this code must be signal safe because - timer_expired() can be called from a signal. */ - pt = &ts->timer_list->active_timers; + ts->expire_time = -1; + pt = &timer_list->active_timers; for(;;) { t = *pt; if (!t) @@ -316,18 +338,28 @@ void timer_del(QEMUTimer *ts) } } +/* stop a timer, but do not dealloc it */ +void timer_del(QEMUTimer *ts) +{ + QEMUTimerList *timer_list = ts->timer_list; + + qemu_mutex_lock(&timer_list->active_timers_lock); + timer_del_locked(timer_list, ts); + qemu_mutex_unlock(&timer_list->active_timers_lock); +} + /* modify the current timer so that it will be fired when current_time >= expire_time. The corresponding callback will be called. */ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) { + QEMUTimerList *timer_list = ts->timer_list; QEMUTimer **pt, *t; - timer_del(ts); + qemu_mutex_lock(&timer_list->active_timers_lock); + timer_del_locked(timer_list, ts); /* add the timer in the sorted list */ - /* NOTE: this code must be signal safe because - timer_expired() can be called from a signal. */ - pt = &ts->timer_list->active_timers; + pt = &timer_list->active_timers; for(;;) { t = *pt; if (!timer_expired_ns(t, expire_time)) { @@ -335,15 +367,16 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) } pt = &t->next; } - ts->expire_time = expire_time; + ts->expire_time = MAX(expire_time, 0); ts->next = *pt; *pt = ts; + qemu_mutex_unlock(&timer_list->active_timers_lock); /* Rearm if necessary */ - if (pt == &ts->timer_list->active_timers) { + if (pt == &timer_list->active_timers) { /* Interrupt execution to force deadline recalculation. */ - qemu_clock_warp(ts->timer_list->clock->type); - timerlist_notify(ts->timer_list); + qemu_clock_warp(timer_list->clock->type); + timerlist_notify(timer_list); } } @@ -354,13 +387,7 @@ void timer_mod(QEMUTimer *ts, int64_t expire_time) bool timer_pending(QEMUTimer *ts) { - QEMUTimer *t; - for (t = ts->timer_list->active_timers; t != NULL; t = t->next) { - if (t == ts) { - return true; - } - } - return false; + return ts->expire_time >= 0; } bool timer_expired(QEMUTimer *timer_head, int64_t current_time) @@ -373,23 +400,32 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) QEMUTimer *ts; int64_t current_time; bool progress = false; - + QEMUTimerCB *cb; + void *opaque; + if (!timer_list->clock->enabled) { return progress; } current_time = qemu_clock_get_ns(timer_list->clock->type); for(;;) { + qemu_mutex_lock(&timer_list->active_timers_lock); ts = timer_list->active_timers; if (!timer_expired_ns(ts, current_time)) { + qemu_mutex_unlock(&timer_list->active_timers_lock); break; } + /* remove timer from the list before calling the callback */ timer_list->active_timers = ts->next; ts->next = NULL; + ts->expire_time = -1; + cb = ts->cb; + opaque = ts->opaque; + qemu_mutex_unlock(&timer_list->active_timers_lock); /* run the callback (the timer list can be modified) */ - ts->cb(ts->opaque); + cb(opaque); progress = true; } return progress; diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c index 39be046ec7..15a885e882 100644 --- a/tests/test-coroutine.c +++ b/tests/test-coroutine.c @@ -182,17 +182,17 @@ static void perf_nesting(void) unsigned int i, maxcycles, maxnesting; double duration; - maxcycles = 100000000; + maxcycles = 10000; maxnesting = 1000; Coroutine *root; - NestData nd = { - .n_enter = 0, - .n_return = 0, - .max = maxnesting, - }; g_test_timer_start(); for (i = 0; i < maxcycles; i++) { + NestData nd = { + .n_enter = 0, + .n_return = 0, + .max = maxnesting, + }; root = qemu_coroutine_create(nest); qemu_coroutine_enter(root, &nd); } @@ -202,6 +202,38 @@ static void perf_nesting(void) maxcycles, maxnesting, duration); } +/* + * Yield benchmark + */ + +static void coroutine_fn yield_loop(void *opaque) +{ + unsigned int *counter = opaque; + + while ((*counter) > 0) { + (*counter)--; + qemu_coroutine_yield(); + } +} + +static void perf_yield(void) +{ + unsigned int i, maxcycles; + double duration; + + maxcycles = 100000000; + i = maxcycles; + Coroutine *coroutine = qemu_coroutine_create(yield_loop); + + g_test_timer_start(); + while (i > 0) { + qemu_coroutine_enter(coroutine, &i); + } + duration = g_test_timer_elapsed(); + + g_test_message("Yield %u iterations: %f s\n", + maxcycles, duration); +} int main(int argc, char **argv) { @@ -214,6 +246,7 @@ int main(int argc, char **argv) if (g_test_perf()) { g_test_add_func("/perf/lifecycle", perf_lifecycle); g_test_add_func("/perf/nesting", perf_nesting); + g_test_add_func("/perf/yield", perf_yield); } return g_test_run(); } diff --git a/util/osdep.c b/util/osdep.c index 685c8ae889..62072b4be3 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -207,6 +207,13 @@ int qemu_open(const char *name, int flags, ...) } #endif +#ifdef O_DIRECT + if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) { + error_report("file system may not support O_DIRECT"); + errno = EINVAL; /* in case it was clobbered */ + } +#endif /* O_DIRECT */ + return ret; }