block: add event when disk usage exceeds threshold
Managing applications, like oVirt (http://www.ovirt.org), make extensive
use of thin-provisioned disk images.
To let the guest run smoothly and be not unnecessarily paused, oVirt sets
a disk usage threshold (so called 'high water mark') based on the occupation
of the device, and automatically extends the image once the threshold
is reached or exceeded.
In order to detect the crossing of the threshold, oVirt has no choice but
aggressively polling the QEMU monitor using the query-blockstats command.
This lead to unnecessary system load, and is made even worse under scale:
deployments with hundreds of VMs are no longer rare.
To fix this, this patch adds:
* A new monitor command `block-set-write-threshold', to set a mark for
a given block device.
* A new event `BLOCK_WRITE_THRESHOLD', to report if a block device
usage exceeds the threshold.
* A new `write_threshold' field into the `BlockDeviceInfo' structure,
to report the configured threshold.
This will allow the managing application to use smarter and more
efficient monitoring, greatly reducing the need of polling.
[Updated qemu-iotests 067 output to add the new 'write_threshold'
property. --Stefan]
[Changed g_assert_false() to !g_assert() to fix the build on older glib
versions. --Kevin]
Signed-off-by: Francesco Romani <fromani@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1421068273-692-1-git-send-email-fromani@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-01-12 14:11:13 +01:00
|
|
|
/*
|
|
|
|
* QEMU System Emulator block write threshold notification
|
|
|
|
*
|
|
|
|
* Copyright Red Hat, Inc. 2014
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Francesco Romani <fromani@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
|
|
|
* See the COPYING.LIB file in the top-level directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "block/block_int.h"
|
2015-09-01 14:48:02 +01:00
|
|
|
#include "qemu/coroutine.h"
|
block: add event when disk usage exceeds threshold
Managing applications, like oVirt (http://www.ovirt.org), make extensive
use of thin-provisioned disk images.
To let the guest run smoothly and be not unnecessarily paused, oVirt sets
a disk usage threshold (so called 'high water mark') based on the occupation
of the device, and automatically extends the image once the threshold
is reached or exceeded.
In order to detect the crossing of the threshold, oVirt has no choice but
aggressively polling the QEMU monitor using the query-blockstats command.
This lead to unnecessary system load, and is made even worse under scale:
deployments with hundreds of VMs are no longer rare.
To fix this, this patch adds:
* A new monitor command `block-set-write-threshold', to set a mark for
a given block device.
* A new event `BLOCK_WRITE_THRESHOLD', to report if a block device
usage exceeds the threshold.
* A new `write_threshold' field into the `BlockDeviceInfo' structure,
to report the configured threshold.
This will allow the managing application to use smarter and more
efficient monitoring, greatly reducing the need of polling.
[Updated qemu-iotests 067 output to add the new 'write_threshold'
property. --Stefan]
[Changed g_assert_false() to !g_assert() to fix the build on older glib
versions. --Kevin]
Signed-off-by: Francesco Romani <fromani@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1421068273-692-1-git-send-email-fromani@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-01-12 14:11:13 +01:00
|
|
|
#include "block/write-threshold.h"
|
|
|
|
#include "qemu/notify.h"
|
|
|
|
#include "qapi-event.h"
|
|
|
|
#include "qmp-commands.h"
|
|
|
|
|
|
|
|
|
|
|
|
uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->write_threshold_offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->write_threshold_offset > 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_threshold_disable(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (bdrv_write_threshold_is_set(bs)) {
|
|
|
|
notifier_with_return_remove(&bs->write_threshold_notifier);
|
|
|
|
bs->write_threshold_offset = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
|
|
|
|
const BdrvTrackedRequest *req)
|
|
|
|
{
|
|
|
|
if (bdrv_write_threshold_is_set(bs)) {
|
|
|
|
if (req->offset > bs->write_threshold_offset) {
|
|
|
|
return (req->offset - bs->write_threshold_offset) + req->bytes;
|
|
|
|
}
|
|
|
|
if ((req->offset + req->bytes) > bs->write_threshold_offset) {
|
|
|
|
return (req->offset + req->bytes) - bs->write_threshold_offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
|
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
BdrvTrackedRequest *req = opaque;
|
|
|
|
BlockDriverState *bs = req->bs;
|
|
|
|
uint64_t amount = 0;
|
|
|
|
|
|
|
|
amount = bdrv_write_threshold_exceeded(bs, req);
|
|
|
|
if (amount > 0) {
|
|
|
|
qapi_event_send_block_write_threshold(
|
|
|
|
bs->node_name,
|
|
|
|
amount,
|
|
|
|
bs->write_threshold_offset,
|
|
|
|
&error_abort);
|
|
|
|
|
|
|
|
/* autodisable to avoid flooding the monitor */
|
|
|
|
write_threshold_disable(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0; /* should always let other notifiers run */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_threshold_register_notifier(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
bs->write_threshold_notifier.notify = before_write_notify;
|
|
|
|
notifier_with_return_list_add(&bs->before_write_notifiers,
|
|
|
|
&bs->write_threshold_notifier);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_threshold_update(BlockDriverState *bs,
|
|
|
|
int64_t threshold_bytes)
|
|
|
|
{
|
|
|
|
bs->write_threshold_offset = threshold_bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
|
|
|
|
{
|
|
|
|
if (bdrv_write_threshold_is_set(bs)) {
|
|
|
|
if (threshold_bytes > 0) {
|
|
|
|
write_threshold_update(bs, threshold_bytes);
|
|
|
|
} else {
|
|
|
|
write_threshold_disable(bs);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (threshold_bytes > 0) {
|
|
|
|
/* avoid multiple registration */
|
|
|
|
write_threshold_register_notifier(bs);
|
|
|
|
write_threshold_update(bs, threshold_bytes);
|
|
|
|
}
|
|
|
|
/* discard bogus disable request */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void qmp_block_set_write_threshold(const char *node_name,
|
|
|
|
uint64_t threshold_bytes,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
|
|
|
AioContext *aio_context;
|
|
|
|
|
|
|
|
bs = bdrv_find_node(node_name);
|
|
|
|
if (!bs) {
|
2015-03-13 18:51:38 +01:00
|
|
|
error_setg(errp, "Device '%s' not found", node_name);
|
block: add event when disk usage exceeds threshold
Managing applications, like oVirt (http://www.ovirt.org), make extensive
use of thin-provisioned disk images.
To let the guest run smoothly and be not unnecessarily paused, oVirt sets
a disk usage threshold (so called 'high water mark') based on the occupation
of the device, and automatically extends the image once the threshold
is reached or exceeded.
In order to detect the crossing of the threshold, oVirt has no choice but
aggressively polling the QEMU monitor using the query-blockstats command.
This lead to unnecessary system load, and is made even worse under scale:
deployments with hundreds of VMs are no longer rare.
To fix this, this patch adds:
* A new monitor command `block-set-write-threshold', to set a mark for
a given block device.
* A new event `BLOCK_WRITE_THRESHOLD', to report if a block device
usage exceeds the threshold.
* A new `write_threshold' field into the `BlockDeviceInfo' structure,
to report the configured threshold.
This will allow the managing application to use smarter and more
efficient monitoring, greatly reducing the need of polling.
[Updated qemu-iotests 067 output to add the new 'write_threshold'
property. --Stefan]
[Changed g_assert_false() to !g_assert() to fix the build on older glib
versions. --Kevin]
Signed-off-by: Francesco Romani <fromani@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1421068273-692-1-git-send-email-fromani@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-01-12 14:11:13 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
aio_context = bdrv_get_aio_context(bs);
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
|
|
|
|
bdrv_write_threshold_set(bs, threshold_bytes);
|
|
|
|
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
}
|