block: Make overlap range for serialisation dynamic
Copy on Read wants to serialise with all requests touching the same cluster, so wait_serialising_requests() rounded to cluster boundaries. Other users like alignment RMW will have different requirements, though (requests touching the same sector), so make it dynamic. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Benoit Canet <benoit@irqsave.net>
This commit is contained in:
parent
2dbafdc012
commit
7327145f63
53
block.c
53
block.c
|
@ -2231,6 +2231,8 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
||||||
.is_write = is_write,
|
.is_write = is_write,
|
||||||
.co = qemu_coroutine_self(),
|
.co = qemu_coroutine_self(),
|
||||||
.serialising = false,
|
.serialising = false,
|
||||||
|
.overlap_offset = offset,
|
||||||
|
.overlap_bytes = bytes,
|
||||||
};
|
};
|
||||||
|
|
||||||
qemu_co_queue_init(&req->wait_queue);
|
qemu_co_queue_init(&req->wait_queue);
|
||||||
|
@ -2238,12 +2240,19 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
||||||
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
|
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mark_request_serialising(BdrvTrackedRequest *req)
|
static void mark_request_serialising(BdrvTrackedRequest *req, size_t align)
|
||||||
{
|
{
|
||||||
|
int64_t overlap_offset = req->offset & ~(align - 1);
|
||||||
|
int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
|
||||||
|
- overlap_offset;
|
||||||
|
|
||||||
if (!req->serialising) {
|
if (!req->serialising) {
|
||||||
req->bs->serialising_in_flight++;
|
req->bs->serialising_in_flight++;
|
||||||
req->serialising = true;
|
req->serialising = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
|
||||||
|
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2267,20 +2276,16 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void round_bytes_to_clusters(BlockDriverState *bs,
|
static int bdrv_get_cluster_size(BlockDriverState *bs)
|
||||||
int64_t offset, unsigned int bytes,
|
|
||||||
int64_t *cluster_offset,
|
|
||||||
unsigned int *cluster_bytes)
|
|
||||||
{
|
{
|
||||||
BlockDriverInfo bdi;
|
BlockDriverInfo bdi;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
|
ret = bdrv_get_info(bs, &bdi);
|
||||||
*cluster_offset = offset;
|
if (ret < 0 || bdi.cluster_size == 0) {
|
||||||
*cluster_bytes = bytes;
|
return bs->request_alignment;
|
||||||
} else {
|
} else {
|
||||||
*cluster_offset = QEMU_ALIGN_DOWN(offset, bdi.cluster_size);
|
return bdi.cluster_size;
|
||||||
*cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes,
|
|
||||||
bdi.cluster_size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2288,11 +2293,11 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
|
||||||
int64_t offset, unsigned int bytes)
|
int64_t offset, unsigned int bytes)
|
||||||
{
|
{
|
||||||
/* aaaa bbbb */
|
/* aaaa bbbb */
|
||||||
if (offset >= req->offset + req->bytes) {
|
if (offset >= req->overlap_offset + req->overlap_bytes) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/* bbbb aaaa */
|
/* bbbb aaaa */
|
||||||
if (req->offset >= offset + bytes) {
|
if (req->overlap_offset >= offset + bytes) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -2302,30 +2307,21 @@ static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
||||||
{
|
{
|
||||||
BlockDriverState *bs = self->bs;
|
BlockDriverState *bs = self->bs;
|
||||||
BdrvTrackedRequest *req;
|
BdrvTrackedRequest *req;
|
||||||
int64_t cluster_offset;
|
|
||||||
unsigned int cluster_bytes;
|
|
||||||
bool retry;
|
bool retry;
|
||||||
|
|
||||||
if (!bs->serialising_in_flight) {
|
if (!bs->serialising_in_flight) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we touch the same cluster it counts as an overlap. This guarantees
|
|
||||||
* that allocating writes will be serialized and not race with each other
|
|
||||||
* for the same cluster. For example, in copy-on-read it ensures that the
|
|
||||||
* CoR read and write operations are atomic and guest writes cannot
|
|
||||||
* interleave between them.
|
|
||||||
*/
|
|
||||||
round_bytes_to_clusters(bs, self->offset, self->bytes,
|
|
||||||
&cluster_offset, &cluster_bytes);
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
retry = false;
|
retry = false;
|
||||||
QLIST_FOREACH(req, &bs->tracked_requests, list) {
|
QLIST_FOREACH(req, &bs->tracked_requests, list) {
|
||||||
if (req == self || (!req->serialising && !self->serialising)) {
|
if (req == self || (!req->serialising && !self->serialising)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
|
if (tracked_request_overlaps(req, self->overlap_offset,
|
||||||
|
self->overlap_bytes))
|
||||||
|
{
|
||||||
/* Hitting this means there was a reentrant request, for
|
/* Hitting this means there was a reentrant request, for
|
||||||
* example, a block driver issuing nested requests. This must
|
* example, a block driver issuing nested requests. This must
|
||||||
* never happen since it means deadlock.
|
* never happen since it means deadlock.
|
||||||
|
@ -2941,7 +2937,12 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
|
||||||
|
|
||||||
/* Handle Copy on Read and associated serialisation */
|
/* Handle Copy on Read and associated serialisation */
|
||||||
if (flags & BDRV_REQ_COPY_ON_READ) {
|
if (flags & BDRV_REQ_COPY_ON_READ) {
|
||||||
mark_request_serialising(req);
|
/* If we touch the same cluster it counts as an overlap. This
|
||||||
|
* guarantees that allocating writes will be serialized and not race
|
||||||
|
* with each other for the same cluster. For example, in copy-on-read
|
||||||
|
* it ensures that the CoR read and write operations are atomic and
|
||||||
|
* guest writes cannot interleave between them. */
|
||||||
|
mark_request_serialising(req, bdrv_get_cluster_size(bs));
|
||||||
}
|
}
|
||||||
|
|
||||||
wait_serialising_requests(req);
|
wait_serialising_requests(req);
|
||||||
|
|
|
@ -60,7 +60,11 @@ typedef struct BdrvTrackedRequest {
|
||||||
int64_t offset;
|
int64_t offset;
|
||||||
unsigned int bytes;
|
unsigned int bytes;
|
||||||
bool is_write;
|
bool is_write;
|
||||||
|
|
||||||
bool serialising;
|
bool serialising;
|
||||||
|
int64_t overlap_offset;
|
||||||
|
unsigned int overlap_bytes;
|
||||||
|
|
||||||
QLIST_ENTRY(BdrvTrackedRequest) list;
|
QLIST_ENTRY(BdrvTrackedRequest) list;
|
||||||
Coroutine *co; /* owner, used for deadlock detection */
|
Coroutine *co; /* owner, used for deadlock detection */
|
||||||
CoQueue wait_queue; /* coroutines blocked on this request */
|
CoQueue wait_queue; /* coroutines blocked on this request */
|
||||||
|
|
Loading…
Reference in New Issue