Expand cache= option and use write-through caching by default

This patch changes the cache= option to accept none, writeback, or writethough
to control the host page cache behavior.  By default, writethrough caching is
now used which internally is implemented by using O_DSYNC to open the disk
images.  When using -snapshot, writeback is used by default since data integrity
it not at all an issue.

cache=none has the same behavior as cache=off previously.  The later syntax is
still supported by now deprecated.  I also cleaned up the O_DIRECT
implementation to avoid many of the #ifdefs.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
aliguori 2008-10-14 14:42:54 +00:00
parent eeb438c1b8
commit 9f7965c7e9
7 changed files with 59 additions and 38 deletions

View File

@ -73,6 +73,11 @@
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
#ifndef O_DIRECT
#define O_DIRECT O_DSYNC
#endif
#define FTYPE_FILE 0
#define FTYPE_CD 1
#define FTYPE_FD 2
@ -101,9 +106,7 @@ typedef struct BDRVRawState {
int fd_got_error;
int fd_media_changed;
#endif
#if defined(O_DIRECT)
uint8_t* aligned_buf;
#endif
} BDRVRawState;
static int posix_aio_init(void);
@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
}
if (flags & BDRV_O_CREAT)
open_flags |= O_CREAT | O_TRUNC;
#ifdef O_DIRECT
if (flags & BDRV_O_DIRECT)
/* Use O_DSYNC for write-through caching, no flags for write-back caching,
* and O_DIRECT for no caching. */
if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
#endif
else if (!(flags & BDRV_O_CACHE_WB))
open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
s->fd = fd;
for (i = 0; i < RAW_FD_POOL_SIZE; i++)
s->fd_pool[i] = -1;
#if defined(O_DIRECT)
s->aligned_buf = NULL;
if (flags & BDRV_O_DIRECT) {
if ((flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
return ret;
}
}
#endif
return 0;
}
@ -281,7 +285,6 @@ label__raw_write__success:
}
#if defined(O_DIRECT)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}
#else
#define raw_pread raw_pread_aligned
#define raw_pwrite raw_pwrite_aligned
#endif
#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
qemu_bh_schedule(bh);
return &acb->common;
}
#endif
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
qemu_bh_schedule(bh);
return &acb->common;
}
#endif
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs)
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
#if defined(O_DIRECT)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
#endif
}
raw_close_fd_pool(s);
}
@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
open_flags |= O_RDONLY;
bs->read_only = 1;
}
#ifdef O_DIRECT
if (flags & BDRV_O_DIRECT)
/* Use O_DSYNC for write-through caching, no flags for write-back caching,
* and O_DIRECT for no caching. */
if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
#endif
else if (!(flags & BDRV_O_CACHE_WB))
open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
#if defined(__linux__)

View File

@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
#else
overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
if (flags & BDRV_O_DIRECT)
if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
else if (!(flags & BDRV_O_CACHE_WB))
overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
#else
overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
if (flags & BDRV_O_DIRECT)
if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
else if (!(flags & BDRV_O_CACHE_WB))
overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);

View File

@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
/* Note: for compatibility, we open disk image files as RDWR, and
RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
bs->read_only = 1;
}
if (ret < 0) {
@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
}
path_combine(backing_filename, sizeof(backing_filename),
filename, bs->backing_file);
if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
goto fail;
}

View File

@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo {
use a disk image format on top of
it (default for
bdrv_file_open()) */
#define BDRV_O_DIRECT 0x0020
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
void bdrv_info(void);
void bdrv_info_stats(void);

View File

@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}.
@item snapshot=@var{snapshot}
@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
@item cache=@var{cache}
@var{cache} is "on" or "off" and allows to disable host cache to access data.
@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data.
@item format=@var{format}
Specify which disk @var{format} will be used rather than detecting
the format. Can be used to specifiy format=raw to avoid interpreting
an untrusted format header.
@end table
By default, writethrough caching is used for all block device. This means that
the host page cache will be used to read and write data but write notification
will be sent to the guest only when the data has been reported as written by
the storage subsystem.
Writeback caching will report data writes as completed as soon as the data is
present in the host page cache. This is safe as long as you trust your host.
If your host crashes or loses power, then the guest may experience data
corruption. When using the @option{-snapshot} option, writeback caching is
used by default.
The host page can be avoided entirely with @option{cache=none}. This will
attempt to do disk IO directly to the guests memory. QEMU may still perform
an internal copy of the data.
Instead of @option{-cdrom} you can use:
@example
qemu -drive file=file,index=2,media=cdrom

View File

@ -232,7 +232,7 @@ int main(int argc, char **argv)
flags |= BDRV_O_SNAPSHOT;
break;
case 'n':
flags |= BDRV_O_DIRECT;
flags |= BDRV_O_NOCACHE;
break;
case 'b':
bindto = optarg;

18
vl.c
View File

@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot,
}
if (get_param_value(buf, sizeof(buf), "cache", str)) {
if (!strcmp(buf, "off"))
if (!strcmp(buf, "off") || !strcmp(buf, "none"))
cache = 0;
else if (!strcmp(buf, "on"))
else if (!strcmp(buf, "writethrough"))
cache = 1;
else if (!strcmp(buf, "writeback"))
cache = 2;
else {
fprintf(stderr, "qemu: invalid cache option\n");
return -1;
@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot,
if (!file[0])
return 0;
bdrv_flags = 0;
if (snapshot)
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
if (!cache)
bdrv_flags |= BDRV_O_DIRECT;
cache = 2; /* always use write-back with snapshot */
}
if (cache == 0) /* no caching */
bdrv_flags |= BDRV_O_NOCACHE;
else if (cache == 2) /* write-back */
bdrv_flags |= BDRV_O_CACHE_WB;
if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) {
fprintf(stderr, "qemu: could not open disk image %s\n",
file);
@ -8145,7 +8151,7 @@ static void help(int exitcode)
"-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=on|off][,format=f]\n"
" [,cache=writethrough|writeback|none][,format=f]\n"
" use 'file' as a drive image\n"
"-mtdblock file use 'file' as on-board Flash memory image\n"
"-sd file use 'file' as SecureDigital card image\n"