From 33e3963e1b9298e01cadd738124f0e618b5b79f5 Mon Sep 17 00:00:00 2001 From: bellard Date: Sun, 6 Jul 2003 17:15:21 +0000 Subject: [PATCH] added user mode Linux Copy On Write disk image support - added -snapshot support (initial patch by Rusty Russell) git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@309 c046a42c-6fe2-441c-8c8c-71466251a162 --- Makefile | 7 +- block.c | 260 ++++++++++++++++++++++++++++++++++++++++++++++++------ vl.c | 59 +++++++++---- vl.h | 18 +++- vlmkcow.c | 142 +++++++++++++++++++++++++++++ 5 files changed, 442 insertions(+), 44 deletions(-) create mode 100644 vlmkcow.c diff --git a/Makefile b/Makefile index 0fa0fa303d..ef6aef2851 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ else LDFLAGS+=-Wl,-shared endif ifeq ($(TARGET_ARCH), i386) -PROGS+=vl +PROGS+=vl vlmkcow endif endif @@ -141,7 +141,10 @@ endif # must use static linking to avoid leaving stuff in virtual address space vl: vl.o block.o libqemu.a - $(CC) -static -Wl,-T,i386-vl.ld -o $@ $^ $(LIBS) + $(CC) -pg -static -Wl,-T,i386-vl.ld -o $@ $^ $(LIBS) + +vlmkcow: vlmkcow.o + $(CC) -o $@ $^ $(LIBS) depend: $(SRCS) $(CC) -MM $(CFLAGS) $^ 1>.depend diff --git a/block.c b/block.c index c687ff7928..3e1ae0da21 100644 --- a/block.c +++ b/block.c @@ -38,76 +38,286 @@ #include #include #include +#include #include "vl.h" +#define NO_THUNK_TYPE_SIZE +#include "thunk.h" + struct BlockDriverState { - int fd; + int fd; /* if -1, only COW mappings */ int64_t total_sectors; int read_only; + + uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */ + uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */ + int cow_bitmap_size; + int cow_fd; + int64_t cow_sectors_offset; + char filename[1024]; }; -BlockDriverState *bdrv_open(const char *filename) +BlockDriverState *bdrv_open(const char *filename, int snapshot) { BlockDriverState *bs; - int fd; + int fd, cow_fd; int64_t size; + char template[] = "/tmp/vl.XXXXXX"; + struct cow_header_v2 cow_header; + struct stat st; bs = malloc(sizeof(BlockDriverState)); if(!bs) return NULL; bs->read_only = 0; - fd = open(filename, O_RDWR); + bs->fd = -1; + bs->cow_fd = -1; + bs->cow_bitmap = NULL; + strcpy(bs->filename, filename); + + /* open standard HD image */ + fd = open(filename, O_RDWR | O_LARGEFILE); if (fd < 0) { - fd = open(filename, O_RDONLY); + /* read only image on disk */ + fd = open(filename, O_RDONLY | O_LARGEFILE); if (fd < 0) { - close(fd); - free(bs); - return NULL; + perror(filename); + goto fail; } - bs->read_only = 1; + if (!snapshot) + bs->read_only = 1; } - size = lseek64(fd, 0, SEEK_END); - bs->total_sectors = size / 512; bs->fd = fd; + + /* see if it is a cow image */ + if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) { + fprintf(stderr, "%s: could not read header\n", filename); + goto fail; + } + if (cow_header.magic == htonl(COW_MAGIC) && + cow_header.version == htonl(COW_VERSION)) { + /* cow image found */ + size = cow_header.size; +#ifndef WORDS_BIGENDIAN + size = bswap64(size); +#endif + bs->total_sectors = size / 512; + + bs->cow_fd = fd; + bs->fd = -1; + if (cow_header.backing_file[0] != '\0') { + if (stat(cow_header.backing_file, &st) != 0) { + fprintf(stderr, "%s: could not find original disk image '%s'\n", filename, cow_header.backing_file); + goto fail; + } + if (st.st_mtime != htonl(cow_header.mtime)) { + fprintf(stderr, "%s: original raw disk image '%s' does not match saved timestamp\n", filename, cow_header.backing_file); + goto fail; + } + fd = open(cow_header.backing_file, O_RDONLY | O_LARGEFILE); + if (fd < 0) + goto fail; + bs->fd = fd; + } + /* mmap the bitmap */ + bs->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); + bs->cow_bitmap_addr = mmap(get_mmap_addr(bs->cow_bitmap_size), + bs->cow_bitmap_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, bs->cow_fd, 0); + if (bs->cow_bitmap_addr == MAP_FAILED) + goto fail; + bs->cow_bitmap = bs->cow_bitmap_addr + sizeof(cow_header); + bs->cow_sectors_offset = (bs->cow_bitmap_size + 511) & ~511; + snapshot = 0; + } else { + /* standard raw image */ + size = lseek64(fd, 0, SEEK_END); + bs->total_sectors = size / 512; + bs->fd = fd; + } + + if (snapshot) { + /* create a temporary COW file */ + cow_fd = mkstemp(template); + if (cow_fd < 0) + goto fail; + bs->cow_fd = cow_fd; + unlink(template); + + /* just need to allocate bitmap */ + bs->cow_bitmap_size = (bs->total_sectors + 7) >> 3; + bs->cow_bitmap_addr = mmap(get_mmap_addr(bs->cow_bitmap_size), + bs->cow_bitmap_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (bs->cow_bitmap_addr == MAP_FAILED) + goto fail; + bs->cow_bitmap = bs->cow_bitmap_addr; + bs->cow_sectors_offset = 0; + } + return bs; + fail: + bdrv_close(bs); + return NULL; } void bdrv_close(BlockDriverState *bs) { - close(bs->fd); + /* we unmap the mapping so that it is written to the COW file */ + if (bs->cow_bitmap_addr) + munmap(bs->cow_bitmap_addr, bs->cow_bitmap_size); + if (bs->cow_fd >= 0) + close(bs->cow_fd); + if (bs->fd >= 0) + close(bs->fd); free(bs); } +static inline void set_bit(uint8_t *bitmap, int64_t bitnum) +{ + bitmap[bitnum / 8] |= (1 << (bitnum%8)); +} + +static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum) +{ + return !!(bitmap[bitnum / 8] & (1 << (bitnum%8))); +} + + +/* Return true if first block has been changed (ie. current version is + * in COW file). Set the number of continuous blocks for which that + * is true. */ +static int is_changed(uint8_t *bitmap, + int64_t sector_num, int nb_sectors, + int *num_same) +{ + int changed; + + if (!bitmap || nb_sectors == 0) { + *num_same = nb_sectors; + return 0; + } + + changed = is_bit_set(bitmap, sector_num); + for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { + if (is_bit_set(bitmap, sector_num + *num_same) != changed) + break; + } + + return changed; +} + +/* commit COW file into the raw image */ +int bdrv_commit(BlockDriverState *bs) +{ + int64_t i; + uint8_t *cow_bitmap; + + if (!bs->cow_bitmap) { + fprintf(stderr, "Already committed to %s\n", bs->filename); + return 0; + } + + if (bs->read_only) { + fprintf(stderr, "Can't commit to %s: read-only\n", bs->filename); + return -1; + } + + cow_bitmap = bs->cow_bitmap; + for (i = 0; i < bs->total_sectors; i++) { + if (is_bit_set(cow_bitmap, i)) { + unsigned char sector[512]; + if (bdrv_read(bs, i, sector, 1) != 0) { + fprintf(stderr, "Error reading sector %lli: aborting commit\n", + (long long)i); + return -1; + } + + /* Make bdrv_write write to real file for a moment. */ + bs->cow_bitmap = NULL; + if (bdrv_write(bs, i, sector, 1) != 0) { + fprintf(stderr, "Error writing sector %lli: aborting commit\n", + (long long)i); + bs->cow_bitmap = cow_bitmap; + return -1; + } + bs->cow_bitmap = cow_bitmap; + } + } + fprintf(stderr, "Committed snapshot to %s\n", bs->filename); + return 0; +} + /* return -1 if error */ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) { - int ret; + int ret, n, fd; + int64_t offset; + + while (nb_sectors > 0) { + if (is_changed(bs->cow_bitmap, sector_num, nb_sectors, &n)) { + fd = bs->cow_fd; + offset = bs->cow_sectors_offset; + } else { + fd = bs->fd; + offset = 0; + } - lseek64(bs->fd, sector_num * 512, SEEK_SET); - ret = read(bs->fd, buf, nb_sectors * 512); - if (ret != nb_sectors * 512) - return -1; - else - return 0; + if (fd < 0) { + /* no file, just return empty sectors */ + memset(buf, 0, n * 512); + } else { + offset += sector_num * 512; + lseek64(fd, offset, SEEK_SET); + ret = read(fd, buf, n * 512); + if (ret != n * 512) { + return -1; + } + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + return 0; } /* return -1 if error */ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { - int ret; + int ret, fd, i; + int64_t offset, retl; if (bs->read_only) return -1; - lseek64(bs->fd, sector_num * 512, SEEK_SET); - ret = write(bs->fd, buf, nb_sectors * 512); - if (ret != nb_sectors * 512) + if (bs->cow_bitmap) { + fd = bs->cow_fd; + offset = bs->cow_sectors_offset; + } else { + fd = bs->fd; + offset = 0; + } + + offset += sector_num * 512; + retl = lseek64(fd, offset, SEEK_SET); + if (retl == -1) { return -1; - else - return 0; + } + ret = write(fd, buf, nb_sectors * 512); + if (ret != nb_sectors * 512) { + return -1; + } + + if (bs->cow_bitmap) { + for (i = 0; i < nb_sectors; i++) + set_bit(bs->cow_bitmap, sector_num + i); + } + return 0; } void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr) diff --git a/vl.c b/vl.c index 1683d59693..bb15ac4a91 100644 --- a/vl.c +++ b/vl.c @@ -63,6 +63,8 @@ #define INITRD_LOAD_ADDR 0x00400000 #define KERNEL_PARAMS_ADDR 0x00090000 +#define MAX_DISKS 2 + /* from plex86 (BSD license) */ struct __attribute__ ((packed)) linux_params { // For 0x00..0x3f, see 'struct screen_info' in linux/include/linux/tty.h. @@ -190,6 +192,7 @@ FILE *logfile = NULL; int loglevel; IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS]; IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS]; +BlockDriverState *bs_table[MAX_DISKS]; /***********************************************************/ /* x86 io ports */ @@ -1265,6 +1268,7 @@ void term_print_help(void) printf("\n" "C-a h print this help\n" "C-a x exit emulatior\n" + "C-a s save disk data back to file (if -snapshot)\n" "C-a b send break (magic sysrq)\n" "C-a C-a send C-a\n" ); @@ -1282,6 +1286,15 @@ void serial_received_byte(SerialState *s, int ch) case 'x': exit(0); break; + case 's': + { + int i; + for (i = 0; i < MAX_DISKS; i++) { + if (bs_table[i]) + bdrv_commit(bs_table[i]); + } + } + break; case 'b': /* send break */ s->rbr = 0; @@ -1976,8 +1989,6 @@ void ne2000_init(void) /* set to 1 set disable mult support */ #define MAX_MULT_SECTORS 8 -#define MAX_DISKS 2 - struct IDEState; typedef void EndTransferFunc(struct IDEState *); @@ -2009,7 +2020,6 @@ typedef struct IDEState { uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4]; } IDEState; -BlockDriverState *bs_table[MAX_DISKS]; IDEState ide_state[MAX_DISKS]; static void padstr(char *str, const char *src, int len) @@ -2513,6 +2523,16 @@ static void host_alarm_handler(int host_signum, siginfo_t *info, } } +unsigned long mmap_addr = PHYS_RAM_BASE; + +void *get_mmap_addr(unsigned long size) +{ + unsigned long addr; + addr = mmap_addr; + mmap_addr += ((size + 4095) & ~4095) + 4096; + return (void *)addr; +} + /* main execution loop */ CPUState *cpu_gdbstub_get_env(void *opaque) @@ -2612,6 +2632,7 @@ void help(void) "-initrd file use 'file' as initial ram disk\n" "-hda file use 'file' as hard disk 0 image\n" "-hdb file use 'file' as hard disk 1 image\n" + "-snapshot write to temporary files instead of disk image files\n" "-m megs set virtual RAM size to megs MB\n" "-n script set network init script [default=%s]\n" "\n" @@ -2630,12 +2651,14 @@ struct option long_options[] = { { "initrd", 1, NULL, 0, }, { "hda", 1, NULL, 0, }, { "hdb", 1, NULL, 0, }, + { "snapshot", 0, NULL, 0, }, { NULL, 0, NULL, 0 }, }; int main(int argc, char **argv) { int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, long_index; + int snapshot; struct linux_params *params; struct sigaction act; struct itimerval itv; @@ -2652,6 +2675,7 @@ int main(int argc, char **argv) pstrcpy(network_script, sizeof(network_script), DEFAULT_NETWORK_SCRIPT); use_gdbstub = 0; gdbstub_port = DEFAULT_GDBSTUB_PORT; + snapshot = 0; for(;;) { c = getopt_long_only(argc, argv, "hm:dn:sp:", long_options, &long_index); if (c == -1) @@ -2668,6 +2692,9 @@ int main(int argc, char **argv) case 2: hd_filename[1] = optarg; break; + case 3: + snapshot = 1; + break; } break; case 'h': @@ -2711,18 +2738,6 @@ int main(int argc, char **argv) setvbuf(logfile, NULL, _IOLBF, 0); } - /* open the virtual block devices */ - for(i = 0; i < MAX_DISKS; i++) { - if (hd_filename[i]) { - bs_table[i] = bdrv_open(hd_filename[i]); - if (!bs_table[i]) { - fprintf(stderr, "vl: could not open hard disk image '%s\n", - hd_filename[i]); - exit(1); - } - } - } - /* init network tun interface */ net_init(); @@ -2744,7 +2759,7 @@ int main(int argc, char **argv) } ftruncate(phys_ram_fd, phys_ram_size); unlink(phys_ram_file); - phys_ram_base = mmap((void *)PHYS_RAM_BASE, phys_ram_size, + phys_ram_base = mmap(get_mmap_addr(phys_ram_size), phys_ram_size, PROT_WRITE | PROT_READ, MAP_SHARED | MAP_FIXED, phys_ram_fd, 0); if (phys_ram_base == MAP_FAILED) { @@ -2752,6 +2767,18 @@ int main(int argc, char **argv) exit(1); } + /* open the virtual block devices */ + for(i = 0; i < MAX_DISKS; i++) { + if (hd_filename[i]) { + bs_table[i] = bdrv_open(hd_filename[i], snapshot); + if (!bs_table[i]) { + fprintf(stderr, "vl: could not open hard disk image '%s\n", + hd_filename[i]); + exit(1); + } + } + } + /* now we can load the kernel */ ret = load_kernel(argv[optind], phys_ram_base + KERNEL_LOAD_ADDR); if (ret < 0) { diff --git a/vl.h b/vl.h index 87b569d0f6..fa7d5da274 100644 --- a/vl.h +++ b/vl.h @@ -24,16 +24,32 @@ #ifndef VL_H #define VL_H +/* vl.c */ +void *get_mmap_addr(unsigned long size); + /* block.c */ typedef struct BlockDriverState BlockDriverState; -BlockDriverState *bdrv_open(const char *filename); +BlockDriverState *bdrv_open(const char *filename, int snapshot); void bdrv_close(BlockDriverState *bs); int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr); +int bdrv_commit(BlockDriverState *bs); +/* user mode linux compatible COW file */ +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 2 + +struct cow_header_v2 { + uint32_t magic; + uint32_t long version; + char backing_file[1024]; + int32_t mtime; + uint64_t size; + uint32_t sectorsize; +}; #endif /* VL_H */ diff --git a/vlmkcow.c b/vlmkcow.c new file mode 100644 index 0000000000..435ed21d2d --- /dev/null +++ b/vlmkcow.c @@ -0,0 +1,142 @@ +/* + * create a COW disk image + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vl.h" + +#define NO_THUNK_TYPE_SIZE +#include "thunk.h" + +int cow_create(int cow_fd, const char *image_filename, + int64_t image_sectors) +{ + struct cow_header_v2 cow_header; + int fd; + struct stat st; + + memset(&cow_header, 0, sizeof(cow_header)); + cow_header.magic = htonl(COW_MAGIC); + cow_header.version = htonl(COW_VERSION); + if (image_filename) { + fd = open(image_filename, O_RDONLY); + if (fd < 0) { + perror(image_filename); + exit(1); + } + image_sectors = lseek64(fd, 0, SEEK_END); + if (fstat(fd, &st) != 0) { + close(fd); + return -1; + } + close(fd); + image_sectors /= 512; + cow_header.mtime = htonl(st.st_mtime); + realpath(image_filename, cow_header.backing_file); + } + cow_header.sectorsize = htonl(512); + cow_header.size = image_sectors * 512; +#ifndef WORDS_BIGENDIAN + cow_header.size = bswap64(cow_header.size); +#endif + write(cow_fd, &cow_header, sizeof(cow_header)); + /* resize to include at least all the bitmap */ + ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3)); + lseek(cow_fd, 0, SEEK_SET); + return 0; +} + +void help(void) +{ + printf("usage vlmkcow [-h] [-f disk_image] cow_image [cow_size]\n" + "Create a Copy On Write disk image from an optional raw disk image\n" + "\n" + "-f disk_image set the raw disk image name\n" + "cow_image the created cow_image\n" + "cow_size the create cow_image size in MB if no raw disk image is used\n" + "\n" + "Once the cow_image is created from a raw disk image, you must not modify the original raw disk image\n" + ); + exit(1); +} + +int main(int argc, char **argv) +{ + const char *image_filename, *cow_filename; + int cow_fd, c, nb_args; + int64_t image_size; + + image_filename = NULL; + image_size = 0; + for(;;) { + c = getopt(argc, argv, "hf:"); + if (c == -1) + break; + switch(c) { + case 'h': + help(); + break; + case 'f': + image_filename = optarg; + break; + } + } + if (!image_filename) + nb_args = 2; + else + nb_args = 1; + if (optind + nb_args != argc) + help(); + + cow_filename = argv[optind]; + if (nb_args == 2) { + image_size = (int64_t)atoi(argv[optind + 1]) * 2 * 1024; + } + + cow_fd = open(cow_filename, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (!cow_fd < 0) + return -1; + if (cow_create(cow_fd, image_filename, image_size) < 0) { + fprintf(stderr, "%s: error while formating\n", cow_filename); + exit(1); + } + close(cow_fd); + return 0; +}