diff --git a/block-raw-posix.c b/block-raw-posix.c index 83a358cd4d..4c04dbf8da 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -73,6 +73,11 @@ #define DEBUG_BLOCK_PRINT(formatCstr, args...) #endif +/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */ +#ifndef O_DIRECT +#define O_DIRECT O_DSYNC +#endif + #define FTYPE_FILE 0 #define FTYPE_CD 1 #define FTYPE_FD 2 @@ -101,9 +106,7 @@ typedef struct BDRVRawState { int fd_got_error; int fd_media_changed; #endif -#if defined(O_DIRECT) uint8_t* aligned_buf; -#endif } BDRVRawState; static int posix_aio_init(void); @@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) } if (flags & BDRV_O_CREAT) open_flags |= O_CREAT | O_TRUNC; -#ifdef O_DIRECT - if (flags & BDRV_O_DIRECT) + + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((flags & BDRV_O_NOCACHE)) open_flags |= O_DIRECT; -#endif + else if (!(flags & BDRV_O_CACHE_WB)) + open_flags |= O_DSYNC; s->type = FTYPE_FILE; @@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) s->fd = fd; for (i = 0; i < RAW_FD_POOL_SIZE; i++) s->fd_pool[i] = -1; -#if defined(O_DIRECT) s->aligned_buf = NULL; - if (flags & BDRV_O_DIRECT) { + if ((flags & BDRV_O_NOCACHE)) { s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE); if (s->aligned_buf == NULL) { ret = -errno; @@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return ret; } } -#endif return 0; } @@ -281,7 +285,6 @@ label__raw_write__success: } -#if defined(O_DIRECT) /* * offset and count are in bytes and possibly not aligned. For files opened * with O_DIRECT, necessary alignments are ensured before calling @@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, return raw_pwrite_aligned(bs, offset, buf, count) + sum; } -#else -#define raw_pread raw_pread_aligned -#define raw_pwrite raw_pwrite_aligned -#endif - - #ifdef CONFIG_AIO /***********************************************************/ /* Unix AIO using POSIX AIO */ @@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, * If O_DIRECT is used and the buffer is not aligned fall back * to synchronous IO. */ -#if defined(O_DIRECT) BDRVRawState *s = bs->opaque; if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { @@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, qemu_bh_schedule(bh); return &acb->common; } -#endif acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); if (!acb) @@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, * If O_DIRECT is used and the buffer is not aligned fall back * to synchronous IO. */ -#if defined(O_DIRECT) BDRVRawState *s = bs->opaque; if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { @@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, qemu_bh_schedule(bh); return &acb->common; } -#endif acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); if (!acb) @@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs) if (s->fd >= 0) { close(s->fd); s->fd = -1; -#if defined(O_DIRECT) if (s->aligned_buf != NULL) qemu_free(s->aligned_buf); -#endif } raw_close_fd_pool(s); } @@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) open_flags |= O_RDONLY; bs->read_only = 1; } -#ifdef O_DIRECT - if (flags & BDRV_O_DIRECT) + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((flags & BDRV_O_NOCACHE)) open_flags |= O_DIRECT; -#endif + else if (!(flags & BDRV_O_CACHE_WB)) + open_flags |= O_DSYNC; s->type = FTYPE_FILE; #if defined(__linux__) diff --git a/block-raw-win32.c b/block-raw-win32.c index fd4a9e3a4c..892f2d1e5a 100644 --- a/block-raw-win32.c +++ b/block-raw-win32.c @@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) #else overlapped = FILE_ATTRIBUTE_NORMAL; #endif - if (flags & BDRV_O_DIRECT) + if ((flags & BDRV_O_NOCACHE)) overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; + else if (!(flags & BDRV_O_CACHE_WB)) + overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); @@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) #else overlapped = FILE_ATTRIBUTE_NORMAL; #endif - if (flags & BDRV_O_DIRECT) + if ((flags & BDRV_O_NOCACHE)) overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; + else if (!(flags & BDRV_O_CACHE_WB)) + overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); diff --git a/block.c b/block.c index 5d708baada..48229cde7b 100644 --- a/block.c +++ b/block.c @@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, /* Note: for compatibility, we open disk image files as RDWR, and RDONLY as fallback */ if (!(flags & BDRV_O_FILE)) - open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT); + open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); ret = drv->bdrv_open(bs, filename, open_flags); if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) { - ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY); + ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR); bs->read_only = 1; } if (ret < 0) { @@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, } path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); - if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0) + if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0) goto fail; } diff --git a/block.h b/block.h index f0129130be..72c1c24bb4 100644 --- a/block.h +++ b/block.h @@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo { use a disk image format on top of it (default for bdrv_file_open()) */ -#define BDRV_O_DIRECT 0x0020 +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) void bdrv_info(void); void bdrv_info_stats(void); diff --git a/qemu-doc.texi b/qemu-doc.texi index adf270b470..84021fb816 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}. @item snapshot=@var{snapshot} @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}). @item cache=@var{cache} -@var{cache} is "on" or "off" and allows to disable host cache to access data. +@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data. @item format=@var{format} Specify which disk @var{format} will be used rather than detecting the format. Can be used to specifiy format=raw to avoid interpreting an untrusted format header. @end table +By default, writethrough caching is used for all block device. This means that +the host page cache will be used to read and write data but write notification +will be sent to the guest only when the data has been reported as written by +the storage subsystem. + +Writeback caching will report data writes as completed as soon as the data is +present in the host page cache. This is safe as long as you trust your host. +If your host crashes or loses power, then the guest may experience data +corruption. When using the @option{-snapshot} option, writeback caching is +used by default. + +The host page can be avoided entirely with @option{cache=none}. This will +attempt to do disk IO directly to the guests memory. QEMU may still perform +an internal copy of the data. + Instead of @option{-cdrom} you can use: @example qemu -drive file=file,index=2,media=cdrom diff --git a/qemu-nbd.c b/qemu-nbd.c index d5d5db73a0..fa618165c8 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -232,7 +232,7 @@ int main(int argc, char **argv) flags |= BDRV_O_SNAPSHOT; break; case 'n': - flags |= BDRV_O_DIRECT; + flags |= BDRV_O_NOCACHE; break; case 'b': bindto = optarg; diff --git a/vl.c b/vl.c index 97aca75179..c0e43ac030 100644 --- a/vl.c +++ b/vl.c @@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot, } if (get_param_value(buf, sizeof(buf), "cache", str)) { - if (!strcmp(buf, "off")) + if (!strcmp(buf, "off") || !strcmp(buf, "none")) cache = 0; - else if (!strcmp(buf, "on")) + else if (!strcmp(buf, "writethrough")) cache = 1; + else if (!strcmp(buf, "writeback")) + cache = 2; else { fprintf(stderr, "qemu: invalid cache option\n"); return -1; @@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot, if (!file[0]) return 0; bdrv_flags = 0; - if (snapshot) + if (snapshot) { bdrv_flags |= BDRV_O_SNAPSHOT; - if (!cache) - bdrv_flags |= BDRV_O_DIRECT; + cache = 2; /* always use write-back with snapshot */ + } + if (cache == 0) /* no caching */ + bdrv_flags |= BDRV_O_NOCACHE; + else if (cache == 2) /* write-back */ + bdrv_flags |= BDRV_O_CACHE_WB; if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) { fprintf(stderr, "qemu: could not open disk image %s\n", file); @@ -8145,7 +8151,7 @@ static void help(int exitcode) "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n" "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" - " [,cache=on|off][,format=f]\n" + " [,cache=writethrough|writeback|none][,format=f]\n" " use 'file' as a drive image\n" "-mtdblock file use 'file' as on-board Flash memory image\n" "-sd file use 'file' as SecureDigital card image\n"