2006-08-05 21:14:20 +00:00
|
|
|
/*
|
|
|
|
* Block driver for the QCOW version 2 format
|
2007-09-16 21:08:06 +00:00
|
|
|
*
|
2006-08-05 21:14:20 +00:00
|
|
|
* Copyright (c) 2004-2006 Fabrice Bellard
|
2007-09-16 21:08:06 +00:00
|
|
|
*
|
2006-08-05 21:14:20 +00:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
2016-01-18 18:01:42 +00:00
|
|
|
#include "qemu/osdep.h"
|
2012-12-17 18:19:44 +01:00
|
|
|
#include "block/block_int.h"
|
2016-03-08 15:57:05 +01:00
|
|
|
#include "sysemu/block-backend.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/module.h"
|
2006-08-05 21:14:20 +00:00
|
|
|
#include <zlib.h>
|
2009-05-28 16:07:04 +02:00
|
|
|
#include "block/qcow2.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/error-report.h"
|
2012-12-17 18:19:43 +01:00
|
|
|
#include "qapi/qmp/qerror.h"
|
2013-03-18 13:08:10 +01:00
|
|
|
#include "qapi/qmp/qbool.h"
|
2014-09-05 16:07:16 +02:00
|
|
|
#include "qapi/qmp/types.h"
|
|
|
|
#include "qapi-event.h"
|
2012-03-01 18:36:21 +01:00
|
|
|
#include "trace.h"
|
2014-06-05 17:20:59 +08:00
|
|
|
#include "qemu/option_int.h"
|
2016-03-20 19:16:19 +02:00
|
|
|
#include "qemu/cutils.h"
|
2016-03-15 17:22:36 +01:00
|
|
|
#include "qemu/bswap.h"
|
2017-06-23 17:24:10 +01:00
|
|
|
#include "qapi/opts-visitor.h"
|
|
|
|
#include "qapi-visit.h"
|
|
|
|
#include "block/crypto.h"
|
2006-08-05 21:14:20 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
Differences with QCOW:
|
|
|
|
|
|
|
|
- Support for multiple incremental snapshots.
|
|
|
|
- Memory management by reference counts.
|
|
|
|
- Clusters which have a reference count of one have the bit
|
|
|
|
QCOW_OFLAG_COPIED to optimize write performance.
|
2007-09-16 21:08:06 +00:00
|
|
|
- Size of compressed clusters is stored in sectors to reduce bit usage
|
2006-08-05 21:14:20 +00:00
|
|
|
in the cluster offsets.
|
|
|
|
- Support for storing additional data (such as the VM state) in the
|
2007-09-17 08:09:54 +00:00
|
|
|
snapshots.
|
2006-08-05 21:14:20 +00:00
|
|
|
- If a backing store is used, the cluster size is not constrained
|
|
|
|
(could be backported to QCOW).
|
|
|
|
- L2 tables have always a size of one cluster.
|
|
|
|
*/
|
|
|
|
|
2009-03-28 17:55:06 +00:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
uint32_t magic;
|
|
|
|
uint32_t len;
|
2013-09-25 12:08:50 -04:00
|
|
|
} QEMU_PACKED QCowExtension;
|
2012-09-20 15:13:28 -04:00
|
|
|
|
2010-12-17 16:02:39 +01:00
|
|
|
#define QCOW2_EXT_MAGIC_END 0
|
|
|
|
#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
|
2012-04-12 15:20:27 +02:00
|
|
|
#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
|
2017-06-28 15:05:08 +03:00
|
|
|
#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
|
2009-03-28 17:55:06 +00:00
|
|
|
|
2010-12-17 16:02:39 +01:00
|
|
|
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
|
2006-08-05 21:14:20 +00:00
|
|
|
{
|
|
|
|
const QCowHeader *cow_header = (const void *)buf;
|
2007-09-17 08:09:54 +00:00
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
if (buf_size >= sizeof(QCowHeader) &&
|
|
|
|
be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
|
2011-12-15 12:20:58 +01:00
|
|
|
be32_to_cpu(cow_header->version) >= 2)
|
2006-08-05 21:14:20 +00:00
|
|
|
return 100;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-03-28 17:55:06 +00:00
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
|
|
|
|
uint8_t *buf, size_t buflen,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if ((offset + buflen) > s->crypto_header.length) {
|
|
|
|
error_setg(errp, "Request for data outside of extension header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pread(bs->file,
|
|
|
|
s->crypto_header.offset + offset, buf, buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not read encryption header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int64_t ret;
|
|
|
|
int64_t clusterlen;
|
|
|
|
|
|
|
|
ret = qcow2_alloc_clusters(bs, headerlen);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Cannot allocate cluster for LUKS header size %zu",
|
|
|
|
headerlen);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->crypto_header.length = headerlen;
|
|
|
|
s->crypto_header.offset = ret;
|
|
|
|
|
|
|
|
/* Zero fill remaining space in cluster so it has predictable
|
|
|
|
* content in case of future spec changes */
|
|
|
|
clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
|
|
|
|
ret = bdrv_pwrite_zeroes(bs->file,
|
|
|
|
ret + headerlen,
|
|
|
|
clusterlen - headerlen, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not zero fill encryption header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
|
|
|
|
const uint8_t *buf, size_t buflen,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if ((offset + buflen) > s->crypto_header.length) {
|
|
|
|
error_setg(errp, "Request for data outside of extension header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pwrite(bs->file,
|
|
|
|
s->crypto_header.offset + offset, buf, buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not read encryption header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-03-28 17:55:06 +00:00
|
|
|
/*
|
|
|
|
* read qcow2 extension and fill bs
|
|
|
|
* start reading from start_offset
|
|
|
|
* finish reading upon magic of value 0 or when end_offset reached
|
|
|
|
* unknown magic is skipped (future extension this version knows nothing about)
|
|
|
|
* return 0 upon success, non-0 otherwise
|
|
|
|
*/
|
2010-12-17 16:02:39 +01:00
|
|
|
static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
|
2013-09-05 09:40:43 +02:00
|
|
|
uint64_t end_offset, void **p_feature_table,
|
2017-06-28 15:05:08 +03:00
|
|
|
int flags, bool *need_update_header,
|
|
|
|
Error **errp)
|
2009-03-28 17:55:06 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2009-03-28 17:55:06 +00:00
|
|
|
QCowExtension ext;
|
|
|
|
uint64_t offset;
|
2012-02-02 14:52:08 +01:00
|
|
|
int ret;
|
2017-06-28 15:05:08 +03:00
|
|
|
Qcow2BitmapHeaderExt bitmaps_ext;
|
|
|
|
|
|
|
|
if (need_update_header != NULL) {
|
|
|
|
*need_update_header = false;
|
|
|
|
}
|
2009-03-28 17:55:06 +00:00
|
|
|
|
|
|
|
#ifdef DEBUG_EXT
|
2010-12-17 16:02:39 +01:00
|
|
|
printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
|
2009-03-28 17:55:06 +00:00
|
|
|
#endif
|
|
|
|
offset = start_offset;
|
|
|
|
while (offset < end_offset) {
|
|
|
|
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
/* Sanity check */
|
|
|
|
if (offset > s->cluster_size)
|
2010-12-17 16:02:39 +01:00
|
|
|
printf("qcow2_read_extension: suspicious offset %lu\n", offset);
|
2009-03-28 17:55:06 +00:00
|
|
|
|
2011-11-22 18:06:25 +08:00
|
|
|
printf("attempting to read extended header in offset %lu\n", offset);
|
2009-03-28 17:55:06 +00:00
|
|
|
#endif
|
|
|
|
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
|
2013-09-05 09:40:43 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
|
|
|
|
"pread fail from offset %" PRIu64, offset);
|
2009-03-28 17:55:06 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
be32_to_cpus(&ext.magic);
|
|
|
|
be32_to_cpus(&ext.len);
|
|
|
|
offset += sizeof(ext);
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("ext.magic = 0x%x\n", ext.magic);
|
|
|
|
#endif
|
2014-11-25 18:12:40 +01:00
|
|
|
if (offset > end_offset || ext.len > end_offset - offset) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "Header extension too large");
|
2012-02-22 12:37:13 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2009-03-28 17:55:06 +00:00
|
|
|
switch (ext.magic) {
|
2010-12-17 16:02:39 +01:00
|
|
|
case QCOW2_EXT_MAGIC_END:
|
2009-03-28 17:55:06 +00:00
|
|
|
return 0;
|
2009-03-28 17:55:14 +00:00
|
|
|
|
2010-12-17 16:02:39 +01:00
|
|
|
case QCOW2_EXT_MAGIC_BACKING_FORMAT:
|
2009-03-28 17:55:14 +00:00
|
|
|
if (ext.len >= sizeof(bs->backing_format)) {
|
2014-04-29 19:03:12 +02:00
|
|
|
error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
|
|
|
|
" too large (>=%zu)", ext.len,
|
|
|
|
sizeof(bs->backing_format));
|
2009-03-28 17:55:14 +00:00
|
|
|
return 2;
|
|
|
|
}
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
|
2013-09-05 09:40:43 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
|
|
|
|
"Could not read format name");
|
2009-03-28 17:55:14 +00:00
|
|
|
return 3;
|
2013-09-05 09:40:43 +02:00
|
|
|
}
|
2009-03-28 17:55:14 +00:00
|
|
|
bs->backing_format[ext.len] = '\0';
|
2015-04-07 15:03:16 +02:00
|
|
|
s->image_backing_format = g_strdup(bs->backing_format);
|
2009-03-28 17:55:14 +00:00
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("Qcow2: Got format extension %s\n", bs->backing_format);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
2012-04-12 15:20:27 +02:00
|
|
|
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
|
|
|
|
if (p_feature_table != NULL) {
|
|
|
|
void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
|
2012-04-12 15:20:27 +02:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
|
|
|
|
"Could not read table");
|
2012-04-12 15:20:27 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
*p_feature_table = feature_table;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
|
|
|
|
unsigned int cflags = 0;
|
|
|
|
if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
|
|
|
|
error_setg(errp, "CRYPTO header extension only "
|
|
|
|
"expected with LUKS encryption method");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
|
|
|
|
error_setg(errp, "CRYPTO header extension size %u, "
|
|
|
|
"but expected size %zu", ext.len,
|
|
|
|
sizeof(Qcow2CryptoHeaderExtension));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Unable to read CRYPTO header extension");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
be64_to_cpus(&s->crypto_header.offset);
|
|
|
|
be64_to_cpus(&s->crypto_header.length);
|
|
|
|
|
|
|
|
if ((s->crypto_header.offset % s->cluster_size) != 0) {
|
|
|
|
error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
|
|
|
|
"not a multiple of cluster size '%u'",
|
|
|
|
s->crypto_header.offset, s->cluster_size);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & BDRV_O_NO_IO) {
|
|
|
|
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
|
|
|
}
|
2017-06-23 17:24:17 +01:00
|
|
|
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
qcow2_crypto_hdr_read_func,
|
|
|
|
bs, cflags, errp);
|
|
|
|
if (!s->crypto) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
|
2017-06-28 15:05:08 +03:00
|
|
|
case QCOW2_EXT_MAGIC_BITMAPS:
|
|
|
|
if (ext.len != sizeof(bitmaps_ext)) {
|
|
|
|
error_setg_errno(errp, -ret, "bitmaps_ext: "
|
|
|
|
"Invalid extension length");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
|
2017-09-11 12:52:46 -07:00
|
|
|
warn_report("a program lacking bitmap support "
|
|
|
|
"modified this file, so all bitmaps are now "
|
|
|
|
"considered inconsistent");
|
|
|
|
error_printf("Some clusters may be leaked, "
|
|
|
|
"run 'qemu-img check -r' on the image "
|
2017-06-28 15:05:08 +03:00
|
|
|
"file to fix.");
|
|
|
|
if (need_update_header != NULL) {
|
|
|
|
/* Updating is needed to drop invalid bitmap extension. */
|
|
|
|
*need_update_header = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "bitmaps_ext: "
|
|
|
|
"Could not read ext header");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.reserved32 != 0) {
|
|
|
|
error_setg_errno(errp, -ret, "bitmaps_ext: "
|
|
|
|
"Reserved field is not zero");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
be32_to_cpus(&bitmaps_ext.nb_bitmaps);
|
|
|
|
be64_to_cpus(&bitmaps_ext.bitmap_directory_size);
|
|
|
|
be64_to_cpus(&bitmaps_ext.bitmap_directory_offset);
|
|
|
|
|
|
|
|
if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
|
|
|
|
error_setg(errp,
|
|
|
|
"bitmaps_ext: Image has %" PRIu32 " bitmaps, "
|
|
|
|
"exceeding the QEMU supported maximum of %d",
|
|
|
|
bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.nb_bitmaps == 0) {
|
|
|
|
error_setg(errp, "found bitmaps extension with zero bitmaps");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
|
|
|
|
error_setg(errp, "bitmaps_ext: "
|
|
|
|
"invalid bitmap directory offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.bitmap_directory_size >
|
|
|
|
QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
|
|
|
|
error_setg(errp, "bitmaps_ext: "
|
|
|
|
"bitmap directory size (%" PRIu64 ") exceeds "
|
|
|
|
"the maximum supported size (%d)",
|
|
|
|
bitmaps_ext.bitmap_directory_size,
|
|
|
|
QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
|
|
|
|
s->bitmap_directory_offset =
|
|
|
|
bitmaps_ext.bitmap_directory_offset;
|
|
|
|
s->bitmap_directory_size =
|
|
|
|
bitmaps_ext.bitmap_directory_size;
|
|
|
|
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("Qcow2: Got bitmaps extension: "
|
|
|
|
"offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
|
|
|
|
s->bitmap_directory_offset, s->nb_bitmaps);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
2009-03-28 17:55:06 +00:00
|
|
|
default:
|
2012-02-02 14:52:08 +01:00
|
|
|
/* unknown magic - save it in case we need to rewrite the header */
|
|
|
|
{
|
|
|
|
Qcow2UnknownHeaderExtension *uext;
|
|
|
|
|
|
|
|
uext = g_malloc0(sizeof(*uext) + ext.len);
|
|
|
|
uext->magic = ext.magic;
|
|
|
|
uext->len = ext.len;
|
|
|
|
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
|
|
|
|
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
|
2012-02-02 14:52:08 +01:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
|
|
|
|
"Could not read data");
|
2012-02-02 14:52:08 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
2009-03-28 17:55:06 +00:00
|
|
|
break;
|
|
|
|
}
|
2012-02-22 12:31:47 +01:00
|
|
|
|
|
|
|
offset += ((ext.len + 7) & ~7);
|
2009-03-28 17:55:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-02-02 14:52:08 +01:00
|
|
|
static void cleanup_unknown_header_ext(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-02-02 14:52:08 +01:00
|
|
|
Qcow2UnknownHeaderExtension *uext, *next;
|
|
|
|
|
|
|
|
QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
|
|
|
|
QLIST_REMOVE(uext, next);
|
|
|
|
g_free(uext);
|
|
|
|
}
|
|
|
|
}
|
2009-03-28 17:55:06 +00:00
|
|
|
|
2016-03-16 19:54:33 +01:00
|
|
|
static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
|
|
|
|
uint64_t mask)
|
2012-04-12 15:20:27 +02:00
|
|
|
{
|
2014-07-17 11:41:53 +02:00
|
|
|
char *features = g_strdup("");
|
|
|
|
char *old;
|
|
|
|
|
2012-04-12 15:20:27 +02:00
|
|
|
while (table && table->name[0] != '\0') {
|
|
|
|
if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
|
2014-07-17 11:41:53 +02:00
|
|
|
if (mask & (1ULL << table->bit)) {
|
|
|
|
old = features;
|
|
|
|
features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
|
|
|
|
table->name);
|
|
|
|
g_free(old);
|
|
|
|
mask &= ~(1ULL << table->bit);
|
2012-04-12 15:20:27 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mask) {
|
2014-07-17 11:41:53 +02:00
|
|
|
old = features;
|
|
|
|
features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
|
|
|
|
old, *old ? ", " : "", mask);
|
|
|
|
g_free(old);
|
2012-04-12 15:20:27 +02:00
|
|
|
}
|
2014-07-17 11:41:53 +02:00
|
|
|
|
2016-03-16 19:54:33 +01:00
|
|
|
error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
|
2014-07-17 11:41:53 +02:00
|
|
|
g_free(features);
|
2012-04-12 15:20:27 +02:00
|
|
|
}
|
|
|
|
|
2012-07-27 09:05:22 +01:00
|
|
|
/*
|
|
|
|
* Sets the dirty bit and flushes afterwards if necessary.
|
|
|
|
*
|
|
|
|
* The incompatible_features bit is only set if the image file header was
|
|
|
|
* updated successfully. Therefore it is not required to check the return
|
|
|
|
* value of this function.
|
|
|
|
*/
|
2012-12-07 18:08:47 +01:00
|
|
|
int qcow2_mark_dirty(BlockDriverState *bs)
|
2012-07-27 09:05:22 +01:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-07-27 09:05:22 +01:00
|
|
|
uint64_t val;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
assert(s->qcow_version >= 3);
|
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
|
|
|
return 0; /* already dirty */
|
|
|
|
}
|
|
|
|
|
|
|
|
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
|
2016-06-20 20:09:15 +02:00
|
|
|
ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
|
2012-07-27 09:05:22 +01:00
|
|
|
&val, sizeof(val));
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2015-06-16 14:19:22 +02:00
|
|
|
ret = bdrv_flush(bs->file->bs);
|
2012-07-27 09:05:22 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only treat image as dirty if the header was updated successfully */
|
|
|
|
s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-27 09:05:19 +01:00
|
|
|
/*
|
|
|
|
* Clears the dirty bit and flushes before if necessary. Only call this
|
|
|
|
* function when there are no pending requests, it does not guard against
|
|
|
|
* concurrent requests dirtying the image.
|
|
|
|
*/
|
|
|
|
static int qcow2_mark_clean(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-07-27 09:05:19 +01:00
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
2014-04-03 13:47:50 +02:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
|
|
|
|
|
|
|
|
ret = bdrv_flush(bs);
|
2012-07-27 09:05:19 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return qcow2_update_header(bs);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-08-30 14:34:24 +02:00
|
|
|
/*
|
|
|
|
* Marks the image as corrupt.
|
|
|
|
*/
|
|
|
|
int qcow2_mark_corrupt(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-08-30 14:34:24 +02:00
|
|
|
|
|
|
|
s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
|
|
|
|
return qcow2_update_header(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
|
|
|
|
* before if necessary.
|
|
|
|
*/
|
|
|
|
int qcow2_mark_consistent(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-08-30 14:34:24 +02:00
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
|
|
|
|
int ret = bdrv_flush(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
|
|
|
|
return qcow2_update_header(bs);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-08-09 13:05:55 +01:00
|
|
|
static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
|
|
|
|
BdrvCheckMode fix)
|
|
|
|
{
|
|
|
|
int ret = qcow2_check_refcounts(bs, result, fix);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fix && result->check_errors == 0 && result->corruptions == 0) {
|
2013-08-30 14:34:30 +02:00
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return qcow2_mark_consistent(bs);
|
2012-08-09 13:05:55 +01:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-03-26 13:05:44 +01:00
|
|
|
static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t entries, size_t entry_len)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-03-26 13:05:44 +01:00
|
|
|
uint64_t size;
|
|
|
|
|
|
|
|
/* Use signed INT64_MAX as the maximum even for uint64_t header fields,
|
|
|
|
* because values will be passed to qemu functions taking int64_t. */
|
|
|
|
if (entries > INT64_MAX / entry_len) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
size = entries * entry_len;
|
|
|
|
|
|
|
|
if (INT64_MAX - size < offset) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Tables must be cluster aligned */
|
2017-06-20 16:01:36 +03:00
|
|
|
if (offset_into_cluster(s, offset) != 0) {
|
2014-03-26 13:05:44 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-03-15 10:35:08 +01:00
|
|
|
static QemuOptsList qcow2_runtime_opts = {
|
|
|
|
.name = "qcow2",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
2013-07-17 14:45:34 +02:00
|
|
|
.name = QCOW2_OPT_LAZY_REFCOUNTS,
|
2013-03-15 10:35:08 +01:00
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Postpone refcount updates",
|
|
|
|
},
|
2013-06-19 13:44:19 +02:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_DISCARD_REQUEST,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Pass guest discard requests to the layer below",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_DISCARD_SNAPSHOT,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Generate discard requests when snapshot related space "
|
|
|
|
"is freed",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_DISCARD_OTHER,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Generate discard requests when other clusters are freed",
|
|
|
|
},
|
2013-10-10 11:09:25 +02:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Selects which overlap checks to perform from a range of "
|
|
|
|
"templates (none, constant, cached, all)",
|
|
|
|
},
|
2014-08-20 19:59:35 +02:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_TEMPLATE,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Selects which overlap checks to perform from a range of "
|
|
|
|
"templates (none, constant, cached, all)",
|
|
|
|
},
|
2013-10-10 11:09:25 +02:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the main qcow2 header",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the active L1 table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into an active L2 table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the refcount table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into a refcount block",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the snapshot table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into an inactive L1 table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into an inactive L2 table",
|
|
|
|
},
|
2014-08-18 22:07:33 +02:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_CACHE_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Maximum combined metadata (L2 tables and refcount blocks) "
|
|
|
|
"cache size",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_L2_CACHE_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Maximum L2 table cache size",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Maximum refcount block cache size",
|
|
|
|
},
|
2015-08-04 15:14:40 +03:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
|
|
|
|
.type = QEMU_OPT_NUMBER,
|
|
|
|
.help = "Clean unused cache entries after this time (in seconds)",
|
|
|
|
},
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
|
|
|
|
"ID of secret providing qcow2 AES key or LUKS passphrase"),
|
2013-03-15 10:35:08 +01:00
|
|
|
{ /* end of list */ }
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2013-10-10 11:09:26 +02:00
|
|
|
static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
|
|
|
|
[QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
|
|
|
|
[QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
|
|
|
|
[QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
|
|
|
|
[QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
|
|
|
|
[QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
|
|
|
|
[QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
|
|
|
|
[QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
|
|
|
|
[QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
|
|
|
|
};
|
|
|
|
|
2015-08-04 15:14:40 +03:00
|
|
|
static void cache_clean_timer_cb(void *opaque)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-08-04 15:14:40 +03:00
|
|
|
qcow2_cache_clean_unused(bs, s->l2_table_cache);
|
|
|
|
qcow2_cache_clean_unused(bs, s->refcount_block_cache);
|
|
|
|
timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
|
|
|
(int64_t) s->cache_clean_interval * 1000);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-08-04 15:14:40 +03:00
|
|
|
if (s->cache_clean_interval > 0) {
|
|
|
|
s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
|
|
|
|
SCALE_MS, cache_clean_timer_cb,
|
|
|
|
bs);
|
|
|
|
timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
|
|
|
(int64_t) s->cache_clean_interval * 1000);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_clean_timer_del(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-08-04 15:14:40 +03:00
|
|
|
if (s->cache_clean_timer) {
|
|
|
|
timer_del(s->cache_clean_timer);
|
|
|
|
timer_free(s->cache_clean_timer);
|
|
|
|
s->cache_clean_timer = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_detach_aio_context(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
cache_clean_timer_del(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_attach_aio_context(BlockDriverState *bs,
|
|
|
|
AioContext *new_context)
|
|
|
|
{
|
|
|
|
cache_clean_timer_init(bs, new_context);
|
|
|
|
}
|
|
|
|
|
2015-06-01 18:09:19 +02:00
|
|
|
static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
|
|
|
|
uint64_t *l2_cache_size,
|
2014-08-18 22:07:33 +02:00
|
|
|
uint64_t *refcount_cache_size, Error **errp)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-08-18 22:07:33 +02:00
|
|
|
uint64_t combined_cache_size;
|
|
|
|
bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
|
|
|
|
|
|
|
|
combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
|
|
|
|
l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
|
|
|
|
refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
|
|
|
|
|
|
|
|
combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
|
|
|
|
*l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0);
|
|
|
|
*refcount_cache_size = qemu_opt_get_size(opts,
|
|
|
|
QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
|
|
|
|
|
|
|
|
if (combined_cache_size_set) {
|
|
|
|
if (l2_cache_size_set && refcount_cache_size_set) {
|
|
|
|
error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
|
|
|
|
" and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
|
|
|
|
"the same time");
|
|
|
|
return;
|
|
|
|
} else if (*l2_cache_size > combined_cache_size) {
|
|
|
|
error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
|
|
|
|
QCOW2_OPT_CACHE_SIZE);
|
|
|
|
return;
|
|
|
|
} else if (*refcount_cache_size > combined_cache_size) {
|
|
|
|
error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
|
|
|
|
QCOW2_OPT_CACHE_SIZE);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (l2_cache_size_set) {
|
|
|
|
*refcount_cache_size = combined_cache_size - *l2_cache_size;
|
|
|
|
} else if (refcount_cache_size_set) {
|
|
|
|
*l2_cache_size = combined_cache_size - *refcount_cache_size;
|
|
|
|
} else {
|
|
|
|
*refcount_cache_size = combined_cache_size
|
|
|
|
/ (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
|
|
|
|
*l2_cache_size = combined_cache_size - *refcount_cache_size;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!l2_cache_size_set && !refcount_cache_size_set) {
|
2015-06-01 18:09:19 +02:00
|
|
|
*l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
|
|
|
|
(uint64_t)DEFAULT_L2_CACHE_CLUSTERS
|
|
|
|
* s->cluster_size);
|
2014-08-18 22:07:33 +02:00
|
|
|
*refcount_cache_size = *l2_cache_size
|
|
|
|
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
|
|
|
|
} else if (!l2_cache_size_set) {
|
|
|
|
*l2_cache_size = *refcount_cache_size
|
|
|
|
* DEFAULT_L2_REFCOUNT_SIZE_RATIO;
|
|
|
|
} else if (!refcount_cache_size_set) {
|
|
|
|
*refcount_cache_size = *l2_cache_size
|
|
|
|
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-16 16:16:02 +02:00
|
|
|
typedef struct Qcow2ReopenState {
|
|
|
|
Qcow2Cache *l2_table_cache;
|
|
|
|
Qcow2Cache *refcount_block_cache;
|
|
|
|
bool use_lazy_refcounts;
|
|
|
|
int overlap_check;
|
|
|
|
bool discard_passthrough[QCOW2_DISCARD_MAX];
|
|
|
|
uint64_t cache_clean_interval;
|
2017-06-23 17:24:10 +01:00
|
|
|
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
|
2015-04-16 16:16:02 +02:00
|
|
|
} Qcow2ReopenState;
|
|
|
|
|
|
|
|
static int qcow2_update_options_prepare(BlockDriverState *bs,
|
|
|
|
Qcow2ReopenState *r,
|
|
|
|
QDict *options, int flags,
|
|
|
|
Error **errp)
|
2015-04-16 11:29:27 +02:00
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-04-16 11:44:26 +02:00
|
|
|
QemuOpts *opts = NULL;
|
2015-04-16 11:29:27 +02:00
|
|
|
const char *opt_overlap_check, *opt_overlap_check_template;
|
|
|
|
int overlap_check_template = 0;
|
2015-04-16 11:44:26 +02:00
|
|
|
uint64_t l2_cache_size, refcount_cache_size;
|
2015-04-16 11:29:27 +02:00
|
|
|
int i;
|
2017-06-23 17:24:10 +01:00
|
|
|
const char *encryptfmt;
|
|
|
|
QDict *encryptopts = NULL;
|
2015-04-16 11:44:26 +02:00
|
|
|
Error *local_err = NULL;
|
2015-04-16 11:29:27 +02:00
|
|
|
int ret;
|
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
qdict_extract_subqdict(options, &encryptopts, "encrypt.");
|
|
|
|
encryptfmt = qdict_get_try_str(encryptopts, "format");
|
|
|
|
|
2015-04-16 11:44:26 +02:00
|
|
|
opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
|
|
|
|
qemu_opts_absorb_qdict(opts, options, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get L2 table/refcount block cache size from command line options */
|
|
|
|
read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
|
|
|
|
&local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
l2_cache_size /= s->cluster_size;
|
|
|
|
if (l2_cache_size < MIN_L2_CACHE_SIZE) {
|
|
|
|
l2_cache_size = MIN_L2_CACHE_SIZE;
|
|
|
|
}
|
|
|
|
if (l2_cache_size > INT_MAX) {
|
|
|
|
error_setg(errp, "L2 cache size too big");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
refcount_cache_size /= s->cluster_size;
|
|
|
|
if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
|
|
|
|
refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
|
|
|
|
}
|
|
|
|
if (refcount_cache_size > INT_MAX) {
|
|
|
|
error_setg(errp, "Refcount cache size too big");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 13:42:27 +02:00
|
|
|
/* alloc new L2 table/refcount block cache, flush old one */
|
|
|
|
if (s->l2_table_cache) {
|
|
|
|
ret = qcow2_cache_flush(bs, s->l2_table_cache);
|
|
|
|
if (ret) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->refcount_block_cache) {
|
|
|
|
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
|
|
|
|
if (ret) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to flush the refcount block cache");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-16 16:16:02 +02:00
|
|
|
r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
|
|
|
|
r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
|
|
|
|
if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
|
2015-04-16 11:44:26 +02:00
|
|
|
error_setg(errp, "Could not allocate metadata caches");
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* New interval for cache cleanup timer */
|
2015-04-16 16:16:02 +02:00
|
|
|
r->cache_clean_interval =
|
2015-04-16 13:42:27 +02:00
|
|
|
qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
|
|
|
|
s->cache_clean_interval);
|
2016-11-25 13:27:44 +02:00
|
|
|
#ifndef CONFIG_LINUX
|
|
|
|
if (r->cache_clean_interval != 0) {
|
|
|
|
error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
|
|
|
|
" not supported on this host");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
#endif
|
2015-04-16 16:16:02 +02:00
|
|
|
if (r->cache_clean_interval > UINT_MAX) {
|
2015-04-16 11:44:26 +02:00
|
|
|
error_setg(errp, "Cache clean interval too big");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 13:42:27 +02:00
|
|
|
/* lazy-refcounts; flush if going from enabled to disabled */
|
2015-04-16 16:16:02 +02:00
|
|
|
r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
|
2015-04-16 11:29:27 +02:00
|
|
|
(s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
|
2015-04-16 16:16:02 +02:00
|
|
|
if (r->use_lazy_refcounts && s->qcow_version < 3) {
|
2015-04-16 13:11:39 +02:00
|
|
|
error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
|
|
|
|
"qemu 1.1 compatibility level");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2015-04-16 11:29:27 +02:00
|
|
|
|
2015-04-16 13:42:27 +02:00
|
|
|
if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-16 13:11:39 +02:00
|
|
|
/* Overlap check options */
|
2015-04-16 11:29:27 +02:00
|
|
|
opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
|
|
|
|
opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
|
|
|
|
if (opt_overlap_check_template && opt_overlap_check &&
|
|
|
|
strcmp(opt_overlap_check_template, opt_overlap_check))
|
|
|
|
{
|
|
|
|
error_setg(errp, "Conflicting values for qcow2 options '"
|
|
|
|
QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
|
|
|
|
"' ('%s')", opt_overlap_check, opt_overlap_check_template);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
if (!opt_overlap_check) {
|
|
|
|
opt_overlap_check = opt_overlap_check_template ?: "cached";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(opt_overlap_check, "none")) {
|
|
|
|
overlap_check_template = 0;
|
|
|
|
} else if (!strcmp(opt_overlap_check, "constant")) {
|
|
|
|
overlap_check_template = QCOW2_OL_CONSTANT;
|
|
|
|
} else if (!strcmp(opt_overlap_check, "cached")) {
|
|
|
|
overlap_check_template = QCOW2_OL_CACHED;
|
|
|
|
} else if (!strcmp(opt_overlap_check, "all")) {
|
|
|
|
overlap_check_template = QCOW2_OL_ALL;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Unsupported value '%s' for qcow2 option "
|
|
|
|
"'overlap-check'. Allowed are any of the following: "
|
|
|
|
"none, constant, cached, all", opt_overlap_check);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 16:16:02 +02:00
|
|
|
r->overlap_check = 0;
|
2015-04-16 11:29:27 +02:00
|
|
|
for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
|
|
|
|
/* overlap-check defines a template bitmask, but every flag may be
|
|
|
|
* overwritten through the associated boolean option */
|
2015-04-16 16:16:02 +02:00
|
|
|
r->overlap_check |=
|
2015-04-16 11:29:27 +02:00
|
|
|
qemu_opt_get_bool(opts, overlap_bool_option_names[i],
|
|
|
|
overlap_check_template & (1 << i)) << i;
|
|
|
|
}
|
|
|
|
|
2015-04-16 16:16:02 +02:00
|
|
|
r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
|
|
|
|
r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
|
|
|
|
r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
|
2015-04-16 13:11:39 +02:00
|
|
|
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
|
|
|
|
flags & BDRV_O_UNMAP);
|
2015-04-16 16:16:02 +02:00
|
|
|
r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
|
2015-04-16 13:11:39 +02:00
|
|
|
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
|
2015-04-16 16:16:02 +02:00
|
|
|
r->discard_passthrough[QCOW2_DISCARD_OTHER] =
|
2015-04-16 13:11:39 +02:00
|
|
|
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
|
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
switch (s->crypt_method_header) {
|
|
|
|
case QCOW_CRYPT_NONE:
|
|
|
|
if (encryptfmt) {
|
|
|
|
error_setg(errp, "No encryption in image header, but options "
|
|
|
|
"specified format '%s'", encryptfmt);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case QCOW_CRYPT_AES:
|
|
|
|
if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Header reported 'aes' encryption format but "
|
|
|
|
"options specify '%s'", encryptfmt);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
qdict_del(encryptopts, "format");
|
|
|
|
r->crypto_opts = block_crypto_open_opts_init(
|
|
|
|
Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
|
|
|
|
break;
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
case QCOW_CRYPT_LUKS:
|
|
|
|
if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Header reported 'luks' encryption format but "
|
|
|
|
"options specify '%s'", encryptfmt);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
qdict_del(encryptopts, "format");
|
|
|
|
r->crypto_opts = block_crypto_open_opts_init(
|
|
|
|
Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp);
|
|
|
|
break;
|
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
default:
|
|
|
|
error_setg(errp, "Unsupported encryption method %d",
|
|
|
|
s->crypt_method_header);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 11:29:27 +02:00
|
|
|
ret = 0;
|
|
|
|
fail:
|
2017-06-23 17:24:10 +01:00
|
|
|
QDECREF(encryptopts);
|
2015-04-16 11:44:26 +02:00
|
|
|
qemu_opts_del(opts);
|
|
|
|
opts = NULL;
|
2015-04-16 16:16:02 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_update_options_commit(BlockDriverState *bs,
|
|
|
|
Qcow2ReopenState *r)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int i;
|
|
|
|
|
2015-04-16 13:42:27 +02:00
|
|
|
if (s->l2_table_cache) {
|
|
|
|
qcow2_cache_destroy(bs, s->l2_table_cache);
|
|
|
|
}
|
|
|
|
if (s->refcount_block_cache) {
|
|
|
|
qcow2_cache_destroy(bs, s->refcount_block_cache);
|
|
|
|
}
|
2015-04-16 16:16:02 +02:00
|
|
|
s->l2_table_cache = r->l2_table_cache;
|
|
|
|
s->refcount_block_cache = r->refcount_block_cache;
|
|
|
|
|
|
|
|
s->overlap_check = r->overlap_check;
|
|
|
|
s->use_lazy_refcounts = r->use_lazy_refcounts;
|
|
|
|
|
|
|
|
for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
|
|
|
|
s->discard_passthrough[i] = r->discard_passthrough[i];
|
|
|
|
}
|
|
|
|
|
2015-04-16 13:42:27 +02:00
|
|
|
if (s->cache_clean_interval != r->cache_clean_interval) {
|
|
|
|
cache_clean_timer_del(bs);
|
|
|
|
s->cache_clean_interval = r->cache_clean_interval;
|
|
|
|
cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
|
|
|
|
}
|
2017-06-23 17:24:10 +01:00
|
|
|
|
|
|
|
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
|
|
|
|
s->crypto_opts = r->crypto_opts;
|
2015-04-16 16:16:02 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_update_options_abort(BlockDriverState *bs,
|
|
|
|
Qcow2ReopenState *r)
|
|
|
|
{
|
|
|
|
if (r->l2_table_cache) {
|
|
|
|
qcow2_cache_destroy(bs, r->l2_table_cache);
|
|
|
|
}
|
|
|
|
if (r->refcount_block_cache) {
|
|
|
|
qcow2_cache_destroy(bs, r->refcount_block_cache);
|
|
|
|
}
|
2017-06-23 17:24:10 +01:00
|
|
|
qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
|
2015-04-16 16:16:02 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_update_options(BlockDriverState *bs, QDict *options,
|
|
|
|
int flags, Error **errp)
|
|
|
|
{
|
|
|
|
Qcow2ReopenState r = {};
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
|
|
|
|
if (ret >= 0) {
|
|
|
|
qcow2_update_options_commit(bs, &r);
|
|
|
|
} else {
|
|
|
|
qcow2_update_options_abort(bs, &r);
|
|
|
|
}
|
2015-04-16 11:44:26 +02:00
|
|
|
|
2015-04-16 11:29:27 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-12-16 18:52:37 +01:00
|
|
|
static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp)
|
2006-08-05 21:14:20 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-03-26 13:05:47 +01:00
|
|
|
unsigned int len, i;
|
|
|
|
int ret = 0;
|
2006-08-05 21:14:20 +00:00
|
|
|
QCowHeader header;
|
2013-03-15 10:35:08 +01:00
|
|
|
Error *local_err = NULL;
|
2009-03-28 17:55:06 +00:00
|
|
|
uint64_t ext_end;
|
2013-05-14 16:14:33 +02:00
|
|
|
uint64_t l1_vm_state_index;
|
2017-06-28 15:05:08 +03:00
|
|
|
bool update_header = false;
|
2006-08-05 21:14:20 +00:00
|
|
|
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
|
2010-12-17 16:02:40 +01:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not read qcow2 header");
|
2006-08-05 21:14:20 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
be32_to_cpus(&header.magic);
|
|
|
|
be32_to_cpus(&header.version);
|
|
|
|
be64_to_cpus(&header.backing_file_offset);
|
|
|
|
be32_to_cpus(&header.backing_file_size);
|
|
|
|
be64_to_cpus(&header.size);
|
|
|
|
be32_to_cpus(&header.cluster_bits);
|
|
|
|
be32_to_cpus(&header.crypt_method);
|
|
|
|
be64_to_cpus(&header.l1_table_offset);
|
|
|
|
be32_to_cpus(&header.l1_size);
|
|
|
|
be64_to_cpus(&header.refcount_table_offset);
|
|
|
|
be32_to_cpus(&header.refcount_table_clusters);
|
|
|
|
be64_to_cpus(&header.snapshots_offset);
|
|
|
|
be32_to_cpus(&header.nb_snapshots);
|
2007-09-17 08:09:54 +00:00
|
|
|
|
2011-02-09 11:11:07 +01:00
|
|
|
if (header.magic != QCOW_MAGIC) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "Image is not in qcow2 format");
|
2014-02-17 14:44:06 +01:00
|
|
|
ret = -EINVAL;
|
2006-08-05 21:14:20 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2011-12-15 12:20:58 +01:00
|
|
|
if (header.version < 2 || header.version > 3) {
|
2016-03-16 19:54:33 +01:00
|
|
|
error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
|
2011-12-15 12:20:58 +01:00
|
|
|
ret = -ENOTSUP;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->qcow_version = header.version;
|
|
|
|
|
2014-03-26 13:05:41 +01:00
|
|
|
/* Initialise cluster size */
|
|
|
|
if (header.cluster_bits < MIN_CLUSTER_BITS ||
|
|
|
|
header.cluster_bits > MAX_CLUSTER_BITS) {
|
2014-04-29 19:03:12 +02:00
|
|
|
error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
|
|
|
|
header.cluster_bits);
|
2014-03-26 13:05:41 +01:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->cluster_bits = header.cluster_bits;
|
|
|
|
s->cluster_size = 1 << s->cluster_bits;
|
2017-10-09 17:38:56 +02:00
|
|
|
s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS);
|
2014-03-26 13:05:41 +01:00
|
|
|
|
2011-12-15 12:20:58 +01:00
|
|
|
/* Initialise version 3 header fields */
|
|
|
|
if (header.version == 2) {
|
|
|
|
header.incompatible_features = 0;
|
|
|
|
header.compatible_features = 0;
|
|
|
|
header.autoclear_features = 0;
|
|
|
|
header.refcount_order = 4;
|
|
|
|
header.header_length = 72;
|
|
|
|
} else {
|
|
|
|
be64_to_cpus(&header.incompatible_features);
|
|
|
|
be64_to_cpus(&header.compatible_features);
|
|
|
|
be64_to_cpus(&header.autoclear_features);
|
|
|
|
be32_to_cpus(&header.refcount_order);
|
|
|
|
be32_to_cpus(&header.header_length);
|
2014-03-26 13:05:41 +01:00
|
|
|
|
|
|
|
if (header.header_length < 104) {
|
|
|
|
error_setg(errp, "qcow2 header too short");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (header.header_length > s->cluster_size) {
|
|
|
|
error_setg(errp, "qcow2 header exceeds cluster size");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2011-12-15 12:20:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (header.header_length > sizeof(header)) {
|
|
|
|
s->unknown_header_fields_size = header.header_length - sizeof(header);
|
|
|
|
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
|
2011-12-15 12:20:58 +01:00
|
|
|
s->unknown_header_fields_size);
|
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
|
|
|
|
"fields");
|
2011-12-15 12:20:58 +01:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-26 13:05:42 +01:00
|
|
|
if (header.backing_file_offset > s->cluster_size) {
|
|
|
|
error_setg(errp, "Invalid backing file offset");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-04-12 15:20:27 +02:00
|
|
|
if (header.backing_file_offset) {
|
|
|
|
ext_end = header.backing_file_offset;
|
|
|
|
} else {
|
|
|
|
ext_end = 1 << header.cluster_bits;
|
|
|
|
}
|
|
|
|
|
2011-12-15 12:20:58 +01:00
|
|
|
/* Handle feature bits */
|
|
|
|
s->incompatible_features = header.incompatible_features;
|
|
|
|
s->compatible_features = header.compatible_features;
|
|
|
|
s->autoclear_features = header.autoclear_features;
|
|
|
|
|
2012-07-27 09:05:19 +01:00
|
|
|
if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
|
2012-04-12 15:20:27 +02:00
|
|
|
void *feature_table = NULL;
|
|
|
|
qcow2_read_extensions(bs, header.header_length, ext_end,
|
2017-06-28 15:05:08 +03:00
|
|
|
&feature_table, flags, NULL, NULL);
|
2016-03-16 19:54:33 +01:00
|
|
|
report_unsupported_feature(errp, feature_table,
|
2012-07-27 09:05:19 +01:00
|
|
|
s->incompatible_features &
|
|
|
|
~QCOW2_INCOMPAT_MASK);
|
2011-12-15 12:20:58 +01:00
|
|
|
ret = -ENOTSUP;
|
2014-03-28 23:08:58 +05:30
|
|
|
g_free(feature_table);
|
2011-12-15 12:20:58 +01:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2013-08-30 14:34:24 +02:00
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
|
|
|
|
/* Corrupt images may not be written to unless they are being repaired
|
|
|
|
*/
|
|
|
|
if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
|
|
|
|
"read/write");
|
2013-08-30 14:34:24 +02:00
|
|
|
ret = -EACCES;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-15 12:20:58 +01:00
|
|
|
/* Check support for various header values */
|
2015-02-10 15:28:52 -05:00
|
|
|
if (header.refcount_order > 6) {
|
|
|
|
error_setg(errp, "Reference count entry width too large; may not "
|
|
|
|
"exceed 64 bits");
|
|
|
|
ret = -EINVAL;
|
2011-02-09 11:11:07 +01:00
|
|
|
goto fail;
|
|
|
|
}
|
2013-09-03 10:09:53 +02:00
|
|
|
s->refcount_order = header.refcount_order;
|
2015-02-10 15:28:43 -05:00
|
|
|
s->refcount_bits = 1 << s->refcount_order;
|
|
|
|
s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
|
|
|
|
s->refcount_max += s->refcount_max - 1;
|
2011-12-15 12:20:58 +01:00
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
s->crypt_method_header = header.crypt_method;
|
2010-12-17 16:02:40 +01:00
|
|
|
if (s->crypt_method_header) {
|
2016-03-21 14:11:48 +00:00
|
|
|
if (bdrv_uses_whitelist() &&
|
|
|
|
s->crypt_method_header == QCOW_CRYPT_AES) {
|
block: drop support for using qcow[2] encryption with system emulators
Back in the 2.3.0 release we declared qcow[2] encryption as
deprecated, warning people that it would be removed in a future
release.
commit a1f688f4152e65260b94f37543521ceff8bfebe4
Author: Markus Armbruster <armbru@redhat.com>
Date: Fri Mar 13 21:09:40 2015 +0100
block: Deprecate QCOW/QCOW2 encryption
The code still exists today, but by a (happy?) accident we entirely
broke the ability to use qcow[2] encryption in the system emulators
in the 2.4.0 release due to
commit 8336aafae1451d54c81dd2b187b45f7c45d2428e
Author: Daniel P. Berrange <berrange@redhat.com>
Date: Tue May 12 17:09:18 2015 +0100
qcow2/qcow: protect against uninitialized encryption key
This commit was designed to prevent future coding bugs which
might cause QEMU to read/write data on an encrypted block
device in plain text mode before a decryption key is set.
It turns out this preventative measure was a little too good,
because we already had a long standing bug where QEMU read
encrypted data in plain text mode during system emulator
startup, in order to guess disk geometry:
Thread 10 (Thread 0x7fffd3fff700 (LWP 30373)):
#0 0x00007fffe90b1a28 in raise () at /lib64/libc.so.6
#1 0x00007fffe90b362a in abort () at /lib64/libc.so.6
#2 0x00007fffe90aa227 in __assert_fail_base () at /lib64/libc.so.6
#3 0x00007fffe90aa2d2 in () at /lib64/libc.so.6
#4 0x000055555587ae19 in qcow2_co_readv (bs=0x5555562accb0, sector_num=0, remaining_sectors=1, qiov=0x7fffffffd260) at block/qcow2.c:1229
#5 0x000055555589b60d in bdrv_aligned_preadv (bs=bs@entry=0x5555562accb0, req=req@entry=0x7fffd3ffea50, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=512, qiov=qiov@entry=0x7fffffffd260, flags=0) at block/io.c:908
#6 0x000055555589b8bc in bdrv_co_do_preadv (bs=0x5555562accb0, offset=0, bytes=512, qiov=0x7fffffffd260, flags=<optimized out>) at block/io.c:999
#7 0x000055555589c375 in bdrv_rw_co_entry (opaque=0x7fffffffd210) at block/io.c:544
#8 0x000055555586933b in coroutine_thread (opaque=0x555557876310) at coroutine-gthread.c:134
#9 0x00007ffff64e1835 in g_thread_proxy (data=0x5555562b5590) at gthread.c:778
#10 0x00007ffff6bb760a in start_thread () at /lib64/libpthread.so.0
#11 0x00007fffe917f59d in clone () at /lib64/libc.so.6
Thread 1 (Thread 0x7ffff7ecab40 (LWP 30343)):
#0 0x00007fffe91797a9 in syscall () at /lib64/libc.so.6
#1 0x00007ffff64ff87f in g_cond_wait (cond=cond@entry=0x555555e085f0 <coroutine_cond>, mutex=mutex@entry=0x555555e08600 <coroutine_lock>) at gthread-posix.c:1397
#2 0x00005555558692c3 in qemu_coroutine_switch (co=<optimized out>) at coroutine-gthread.c:117
#3 0x00005555558692c3 in qemu_coroutine_switch (from_=0x5555562b5e30, to_=to_@entry=0x555557876310, action=action@entry=COROUTINE_ENTER) at coroutine-gthread.c:175
#4 0x0000555555868a90 in qemu_coroutine_enter (co=0x555557876310, opaque=0x0) at qemu-coroutine.c:116
#5 0x0000555555859b84 in thread_pool_completion_bh (opaque=0x7fffd40010e0) at thread-pool.c:187
#6 0x0000555555859514 in aio_bh_poll (ctx=ctx@entry=0x5555562953b0) at async.c:85
#7 0x0000555555864d10 in aio_dispatch (ctx=ctx@entry=0x5555562953b0) at aio-posix.c:135
#8 0x0000555555864f75 in aio_poll (ctx=ctx@entry=0x5555562953b0, blocking=blocking@entry=true) at aio-posix.c:291
#9 0x000055555589c40d in bdrv_prwv_co (bs=bs@entry=0x5555562accb0, offset=offset@entry=0, qiov=qiov@entry=0x7fffffffd260, is_write=is_write@entry=false, flags=flags@entry=(unknown: 0)) at block/io.c:591
#10 0x000055555589c503 in bdrv_rw_co (bs=bs@entry=0x5555562accb0, sector_num=sector_num@entry=0, buf=buf@entry=0x7fffffffd2e0 "\321,", nb_sectors=nb_sectors@entry=21845, is_write=is_write@entry=false, flags=flags@entry=(unknown: 0)) at block/io.c:614
#11 0x000055555589c562 in bdrv_read_unthrottled (nb_sectors=21845, buf=0x7fffffffd2e0 "\321,", sector_num=0, bs=0x5555562accb0) at block/io.c:622
#12 0x000055555589c562 in bdrv_read_unthrottled (bs=0x5555562accb0, sector_num=sector_num@entry=0, buf=buf@entry=0x7fffffffd2e0 "\321,", nb_sectors=nb_sectors@entry=21845) at block/io.c:634
nb_sectors@entry=1) at block/block-backend.c:504
#14 0x0000555555752e9f in guess_disk_lchs (blk=blk@entry=0x5555562a5290, pcylinders=pcylinders@entry=0x7fffffffd52c, pheads=pheads@entry=0x7fffffffd530, psectors=psectors@entry=0x7fffffffd534) at hw/block/hd-geometry.c:68
#15 0x0000555555752ff7 in hd_geometry_guess (blk=0x5555562a5290, pcyls=pcyls@entry=0x555557875d1c, pheads=pheads@entry=0x555557875d20, psecs=psecs@entry=0x555557875d24, ptrans=ptrans@entry=0x555557875d28) at hw/block/hd-geometry.c:133
#16 0x0000555555752b87 in blkconf_geometry (conf=conf@entry=0x555557875d00, ptrans=ptrans@entry=0x555557875d28, cyls_max=cyls_max@entry=65536, heads_max=heads_max@entry=16, secs_max=secs_max@entry=255, errp=errp@entry=0x7fffffffd5e0) at hw/block/block.c:71
#17 0x0000555555799bc4 in ide_dev_initfn (dev=0x555557875c80, kind=IDE_HD) at hw/ide/qdev.c:174
#18 0x0000555555768394 in device_realize (dev=0x555557875c80, errp=0x7fffffffd640) at hw/core/qdev.c:247
#19 0x0000555555769a81 in device_set_realized (obj=0x555557875c80, value=<optimized out>, errp=0x7fffffffd730) at hw/core/qdev.c:1058
#20 0x00005555558240ce in property_set_bool (obj=0x555557875c80, v=<optimized out>, opaque=0x555557875de0, name=<optimized out>, errp=0x7fffffffd730)
at qom/object.c:1514
#21 0x0000555555826c87 in object_property_set_qobject (obj=obj@entry=0x555557875c80, value=value@entry=0x55555784bcb0, name=name@entry=0x55555591cb3d "realized", errp=errp@entry=0x7fffffffd730) at qom/qom-qobject.c:24
#22 0x0000555555825760 in object_property_set_bool (obj=obj@entry=0x555557875c80, value=value@entry=true, name=name@entry=0x55555591cb3d "realized", errp=errp@entry=0x7fffffffd730) at qom/object.c:905
#23 0x000055555576897b in qdev_init_nofail (dev=dev@entry=0x555557875c80) at hw/core/qdev.c:380
#24 0x0000555555799ead in ide_create_drive (bus=bus@entry=0x555557629630, unit=unit@entry=0, drive=0x5555562b77e0) at hw/ide/qdev.c:122
#25 0x000055555579a746 in pci_ide_create_devs (dev=dev@entry=0x555557628db0, hd_table=hd_table@entry=0x7fffffffd830) at hw/ide/pci.c:440
#26 0x000055555579b165 in pci_piix3_ide_init (bus=<optimized out>, hd_table=0x7fffffffd830, devfn=<optimized out>) at hw/ide/piix.c:218
#27 0x000055555568ca55 in pc_init1 (machine=0x5555562960a0, pci_enabled=1, kvmclock_enabled=<optimized out>) at /home/berrange/src/virt/qemu/hw/i386/pc_piix.c:256
#28 0x0000555555603ab2 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4249
So the safety net is correctly preventing QEMU reading cipher
text as if it were plain text, during startup and aborting QEMU
to avoid bad usage of this data.
For added fun this bug only happens if the encrypted qcow2
file happens to have data written to the first cluster,
otherwise the cluster won't be allocated and so qcow2 would
not try the decryption routines at all, just return all 0's.
That no one even noticed, let alone reported, this bug that
has shipped in 2.4.0, 2.5.0 and 2.6.0 shows that the number
of actual users of encrypted qcow2 is approximately zero.
So rather than fix the crash, and backport it to stable
releases, just go ahead with what we have warned users about
and disable any use of qcow2 encryption in the system
emulators. qemu-img/qemu-io/qemu-nbd are still able to access
qcow2 encrypted images for the sake of data conversion.
In the future, qcow2 will gain support for the alternative
luks format, but when this happens it'll be using the
'-object secret' infrastructure for getting keys, which
avoids this problematic scenario entirely.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-06-13 12:30:09 +01:00
|
|
|
error_setg(errp,
|
|
|
|
"Use of AES-CBC encrypted qcow2 images is no longer "
|
|
|
|
"supported in system emulators");
|
|
|
|
error_append_hint(errp,
|
|
|
|
"You can use 'qemu-img convert' to convert your "
|
|
|
|
"image to an alternative supported format, such "
|
|
|
|
"as unencrypted qcow2, or raw with the LUKS "
|
|
|
|
"format instead.\n");
|
|
|
|
ret = -ENOSYS;
|
|
|
|
goto fail;
|
2016-03-21 14:11:48 +00:00
|
|
|
}
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
if (s->crypt_method_header == QCOW_CRYPT_AES) {
|
|
|
|
s->crypt_physical_offset = false;
|
|
|
|
} else {
|
|
|
|
/* Assuming LUKS and any future crypt methods we
|
|
|
|
* add will all use physical offsets, due to the
|
|
|
|
* fact that the alternative is insecure... */
|
|
|
|
s->crypt_physical_offset = true;
|
|
|
|
}
|
|
|
|
|
2016-06-23 16:37:26 -06:00
|
|
|
bs->encrypted = true;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2014-03-26 13:05:41 +01:00
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
|
|
|
|
s->l2_size = 1 << s->l2_bits;
|
2014-10-22 14:09:28 +02:00
|
|
|
/* 2^(s->refcount_order - 3) is the refcount width in bytes */
|
|
|
|
s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
|
|
|
|
s->refcount_block_size = 1 << s->refcount_block_bits;
|
2006-08-05 21:14:20 +00:00
|
|
|
bs->total_sectors = header.size / 512;
|
|
|
|
s->csize_shift = (62 - (s->cluster_bits - 8));
|
|
|
|
s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
|
|
|
|
s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
|
2014-03-26 13:05:43 +01:00
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
s->refcount_table_offset = header.refcount_table_offset;
|
2007-09-16 21:08:06 +00:00
|
|
|
s->refcount_table_size =
|
2006-08-05 21:14:20 +00:00
|
|
|
header.refcount_table_clusters << (s->cluster_bits - 3);
|
|
|
|
|
2014-03-26 13:05:50 +01:00
|
|
|
if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) {
|
2014-03-26 13:05:43 +01:00
|
|
|
error_setg(errp, "Reference count table too large");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2014-03-26 13:05:44 +01:00
|
|
|
ret = validate_table_offset(bs, s->refcount_table_offset,
|
|
|
|
s->refcount_table_size, sizeof(uint64_t));
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg(errp, "Invalid reference count table offset");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2014-03-26 13:05:45 +01:00
|
|
|
/* Snapshot table offset/length */
|
|
|
|
if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) {
|
|
|
|
error_setg(errp, "Too many snapshots");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = validate_table_offset(bs, header.snapshots_offset,
|
|
|
|
header.nb_snapshots,
|
|
|
|
sizeof(QCowSnapshotHeader));
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg(errp, "Invalid snapshot table offset");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
/* read the level 1 table */
|
2015-03-11 11:05:21 +08:00
|
|
|
if (header.l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
|
2014-03-26 13:05:46 +01:00
|
|
|
error_setg(errp, "Active L1 table too large");
|
|
|
|
ret = -EFBIG;
|
|
|
|
goto fail;
|
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
s->l1_size = header.l1_size;
|
2013-05-14 16:14:33 +02:00
|
|
|
|
|
|
|
l1_vm_state_index = size_to_l1(s, header.size);
|
|
|
|
if (l1_vm_state_index > INT_MAX) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "Image is too big");
|
2013-05-14 16:14:33 +02:00
|
|
|
ret = -EFBIG;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
s->l1_vm_state_index = l1_vm_state_index;
|
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
/* the L1 table must contain at least enough entries to put
|
|
|
|
header.size bytes */
|
2010-12-17 16:02:40 +01:00
|
|
|
if (s->l1_size < s->l1_vm_state_index) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "L1 table is too small");
|
2010-12-17 16:02:40 +01:00
|
|
|
ret = -EINVAL;
|
2006-08-05 21:14:20 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2014-03-26 13:05:46 +01:00
|
|
|
|
|
|
|
ret = validate_table_offset(bs, header.l1_table_offset,
|
|
|
|
header.l1_size, sizeof(uint64_t));
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg(errp, "Invalid L1 table offset");
|
|
|
|
goto fail;
|
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
s->l1_table_offset = header.l1_table_offset;
|
2014-03-26 13:05:46 +01:00
|
|
|
|
|
|
|
|
2009-10-26 16:11:16 +01:00
|
|
|
if (s->l1_size > 0) {
|
2015-06-16 14:19:22 +02:00
|
|
|
s->l1_table = qemu_try_blockalign(bs->file->bs,
|
2009-10-26 16:11:16 +01:00
|
|
|
align_offset(s->l1_size * sizeof(uint64_t), 512));
|
2014-05-20 17:12:47 +02:00
|
|
|
if (s->l1_table == NULL) {
|
|
|
|
error_setg(errp, "Could not allocate L1 table");
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
|
2010-12-17 16:02:40 +01:00
|
|
|
s->l1_size * sizeof(uint64_t));
|
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not read L1 table");
|
2009-10-26 16:11:16 +01:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2009-10-26 16:11:16 +01:00
|
|
|
for(i = 0;i < s->l1_size; i++) {
|
|
|
|
be64_to_cpus(&s->l1_table[i]);
|
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
2011-01-10 17:17:28 +01:00
|
|
|
|
2015-04-16 11:44:26 +02:00
|
|
|
/* Parse driver-specific options */
|
|
|
|
ret = qcow2_update_options(bs, options, flags, errp);
|
2015-04-16 11:36:10 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
s->cluster_cache_offset = -1;
|
2011-11-14 15:09:46 -06:00
|
|
|
s->flags = flags;
|
2007-09-17 08:09:54 +00:00
|
|
|
|
2010-12-17 16:02:40 +01:00
|
|
|
ret = qcow2_refcount_init(bs);
|
|
|
|
if (ret != 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not initialize refcount handling");
|
2006-08-05 21:14:20 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
|
2009-09-12 07:36:22 +00:00
|
|
|
QLIST_INIT(&s->cluster_allocs);
|
2013-06-19 13:44:20 +02:00
|
|
|
QTAILQ_INIT(&s->discards);
|
2009-08-31 16:48:49 +02:00
|
|
|
|
2009-03-28 17:55:06 +00:00
|
|
|
/* read qcow2 extensions */
|
2013-09-05 09:40:43 +02:00
|
|
|
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
|
2017-06-28 15:05:08 +03:00
|
|
|
flags, &update_header, &local_err)) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_propagate(errp, local_err);
|
2010-12-17 16:02:40 +01:00
|
|
|
ret = -EINVAL;
|
2009-03-28 17:55:06 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2009-03-28 17:55:06 +00:00
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
/* qcow2_read_extension may have set up the crypto context
|
|
|
|
* if the crypt method needs a header region, some methods
|
|
|
|
* don't need header extensions, so must check here
|
|
|
|
*/
|
|
|
|
if (s->crypt_method_header && !s->crypto) {
|
|
|
|
if (s->crypt_method_header == QCOW_CRYPT_AES) {
|
|
|
|
unsigned int cflags = 0;
|
|
|
|
if (flags & BDRV_O_NO_IO) {
|
|
|
|
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
|
|
|
}
|
2017-06-23 17:24:17 +01:00
|
|
|
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
|
|
|
NULL, NULL, cflags, errp);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
if (!s->crypto) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
} else if (!(flags & BDRV_O_NO_IO)) {
|
|
|
|
error_setg(errp, "Missing CRYPTO header for crypt method %d",
|
|
|
|
s->crypt_method_header);
|
2017-06-23 17:24:10 +01:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
/* read the backing file name */
|
|
|
|
if (header.backing_file_offset != 0) {
|
|
|
|
len = header.backing_file_size;
|
2015-01-22 08:03:30 -05:00
|
|
|
if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
|
2015-01-27 08:33:55 -05:00
|
|
|
len >= sizeof(bs->backing_file)) {
|
2014-03-26 13:05:47 +01:00
|
|
|
error_setg(errp, "Backing file name too long");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2016-06-20 18:24:02 +02:00
|
|
|
ret = bdrv_pread(bs->file, header.backing_file_offset,
|
2010-12-17 16:02:40 +01:00
|
|
|
bs->backing_file, len);
|
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not read backing file name");
|
2006-08-05 21:14:20 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
bs->backing_file[len] = '\0';
|
2015-04-07 15:03:16 +02:00
|
|
|
s->image_backing_file = g_strdup(bs->backing_file);
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
2011-11-16 11:43:28 +01:00
|
|
|
|
2014-03-26 13:06:04 +01:00
|
|
|
/* Internal snapshots */
|
|
|
|
s->snapshots_offset = header.snapshots_offset;
|
|
|
|
s->nb_snapshots = header.nb_snapshots;
|
|
|
|
|
2011-11-16 11:43:28 +01:00
|
|
|
ret = qcow2_read_snapshots(bs);
|
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not read snapshots");
|
2006-08-05 21:14:20 +00:00
|
|
|
goto fail;
|
2010-12-17 16:02:40 +01:00
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
|
2012-06-14 11:42:23 +01:00
|
|
|
/* Clear unknown autoclear feature bits */
|
2017-06-28 15:05:08 +03:00
|
|
|
update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
|
2017-06-28 15:05:11 +03:00
|
|
|
update_header =
|
|
|
|
update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
|
|
|
|
if (update_header) {
|
2017-06-28 15:05:08 +03:00
|
|
|
s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
|
2017-06-28 15:05:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) {
|
|
|
|
update_header = false;
|
|
|
|
}
|
|
|
|
if (local_err != NULL) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (update_header) {
|
2012-06-14 11:42:23 +01:00
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not update qcow2 header");
|
2012-06-14 11:42:23 +01:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-30 17:42:09 +02:00
|
|
|
/* Initialise locks */
|
|
|
|
qemu_co_mutex_init(&s->lock);
|
2016-09-28 15:04:21 +08:00
|
|
|
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
|
2011-06-30 17:42:09 +02:00
|
|
|
|
2012-07-27 09:05:19 +01:00
|
|
|
/* Repair image if dirty */
|
2016-01-13 15:56:06 +01:00
|
|
|
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
|
2012-08-09 13:05:56 +01:00
|
|
|
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
|
2012-07-27 09:05:19 +01:00
|
|
|
BdrvCheckResult result = {0};
|
|
|
|
|
2014-10-22 14:09:29 +02:00
|
|
|
ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
|
2012-07-27 09:05:19 +01:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not repair dirty image");
|
2012-07-27 09:05:19 +01:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-05 21:14:20 +00:00
|
|
|
#ifdef DEBUG_ALLOC
|
2011-08-04 19:22:10 +02:00
|
|
|
{
|
|
|
|
BdrvCheckResult result = {0};
|
2012-06-15 16:41:07 +01:00
|
|
|
qcow2_check_refcounts(bs, &result, 0);
|
2011-08-04 19:22:10 +02:00
|
|
|
}
|
2006-08-05 21:14:20 +00:00
|
|
|
#endif
|
2010-12-17 16:02:40 +01:00
|
|
|
return ret;
|
2006-08-05 21:14:20 +00:00
|
|
|
|
|
|
|
fail:
|
2011-12-15 12:20:58 +01:00
|
|
|
g_free(s->unknown_header_fields);
|
2012-02-02 14:52:08 +01:00
|
|
|
cleanup_unknown_header_ext(bs);
|
2009-05-28 16:07:07 +02:00
|
|
|
qcow2_free_snapshots(bs);
|
|
|
|
qcow2_refcount_close(bs);
|
2014-05-20 17:12:47 +02:00
|
|
|
qemu_vfree(s->l1_table);
|
2013-08-30 14:34:26 +02:00
|
|
|
/* else pre-write overlap checks in cache_destroy may crash */
|
|
|
|
s->l1_table = NULL;
|
2015-08-04 15:14:40 +03:00
|
|
|
cache_clean_timer_del(bs);
|
2011-01-10 17:17:28 +01:00
|
|
|
if (s->l2_table_cache) {
|
|
|
|
qcow2_cache_destroy(bs, s->l2_table_cache);
|
|
|
|
}
|
2014-03-28 23:08:58 +05:30
|
|
|
if (s->refcount_block_cache) {
|
|
|
|
qcow2_cache_destroy(bs, s->refcount_block_cache);
|
|
|
|
}
|
2017-06-23 17:24:10 +01:00
|
|
|
qcrypto_block_free(s->crypto);
|
|
|
|
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
|
2010-12-17 16:02:40 +01:00
|
|
|
return ret;
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
|
|
|
|
2016-12-16 18:52:37 +01:00
|
|
|
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
|
|
|
|
false, errp);
|
|
|
|
if (!bs->file) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return qcow2_do_open(bs, options, flags, errp);
|
|
|
|
}
|
|
|
|
|
2014-07-16 17:48:16 +02:00
|
|
|
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
|
2013-12-11 19:26:16 +01:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-12-11 19:26:16 +01:00
|
|
|
|
2016-06-23 16:37:15 -06:00
|
|
|
if (bs->encrypted) {
|
|
|
|
/* Encryption works on a sector granularity */
|
2016-06-23 16:37:24 -06:00
|
|
|
bs->bl.request_alignment = BDRV_SECTOR_SIZE;
|
2016-06-23 16:37:15 -06:00
|
|
|
}
|
2016-06-01 15:10:02 -06:00
|
|
|
bs->bl.pwrite_zeroes_alignment = s->cluster_size;
|
2016-11-17 14:13:55 -06:00
|
|
|
bs->bl.pdiscard_alignment = s->cluster_size;
|
2013-12-11 19:26:16 +01:00
|
|
|
}
|
|
|
|
|
2012-09-20 15:13:28 -04:00
|
|
|
static int qcow2_reopen_prepare(BDRVReopenState *state,
|
|
|
|
BlockReopenQueue *queue, Error **errp)
|
|
|
|
{
|
2015-04-16 13:42:27 +02:00
|
|
|
Qcow2ReopenState *r;
|
2014-04-03 13:47:50 +02:00
|
|
|
int ret;
|
|
|
|
|
2015-04-16 13:42:27 +02:00
|
|
|
r = g_new0(Qcow2ReopenState, 1);
|
|
|
|
state->opaque = r;
|
|
|
|
|
|
|
|
ret = qcow2_update_options_prepare(state->bs, r, state->options,
|
|
|
|
state->flags, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We need to write out any unwritten data if we reopen read-only. */
|
2014-04-03 13:47:50 +02:00
|
|
|
if ((state->flags & BDRV_O_RDWR) == 0) {
|
2017-06-28 15:05:20 +03:00
|
|
|
ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2014-04-03 13:47:50 +02:00
|
|
|
ret = bdrv_flush(state->bs);
|
|
|
|
if (ret < 0) {
|
2015-04-16 13:42:27 +02:00
|
|
|
goto fail;
|
2014-04-03 13:47:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_mark_clean(state->bs);
|
|
|
|
if (ret < 0) {
|
2015-04-16 13:42:27 +02:00
|
|
|
goto fail;
|
2014-04-03 13:47:50 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-20 15:13:28 -04:00
|
|
|
return 0;
|
2015-04-16 13:42:27 +02:00
|
|
|
|
|
|
|
fail:
|
|
|
|
qcow2_update_options_abort(state->bs, r);
|
|
|
|
g_free(r);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_reopen_commit(BDRVReopenState *state)
|
|
|
|
{
|
|
|
|
qcow2_update_options_commit(state->bs, state->opaque);
|
|
|
|
g_free(state->opaque);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_reopen_abort(BDRVReopenState *state)
|
|
|
|
{
|
|
|
|
qcow2_update_options_abort(state->bs, state->opaque);
|
|
|
|
g_free(state->opaque);
|
2012-09-20 15:13:28 -04:00
|
|
|
}
|
|
|
|
|
2015-11-16 15:34:59 +01:00
|
|
|
static void qcow2_join_options(QDict *options, QDict *old_options)
|
|
|
|
{
|
|
|
|
bool has_new_overlap_template =
|
|
|
|
qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
|
|
|
|
qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
|
|
|
|
bool has_new_total_cache_size =
|
|
|
|
qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
|
|
|
|
bool has_all_cache_options;
|
|
|
|
|
|
|
|
/* New overlap template overrides all old overlap options */
|
|
|
|
if (has_new_overlap_template) {
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* New total cache size overrides all old options */
|
|
|
|
if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
|
|
|
|
qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
qdict_join(options, old_options, false);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If after merging all cache size options are set, an old total size is
|
|
|
|
* overwritten. Do keep all options, however, if all three are new. The
|
|
|
|
* resulting error message is what we want to happen.
|
|
|
|
*/
|
|
|
|
has_all_cache_options =
|
|
|
|
qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
|
|
|
|
qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
|
|
|
|
qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
|
|
|
|
|
|
|
|
if (has_all_cache_options && !has_new_total_cache_size) {
|
|
|
|
qdict_del(options, QCOW2_OPT_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-04 19:00:28 +02:00
|
|
|
static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
|
2016-01-26 11:58:48 +08:00
|
|
|
int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
|
2006-08-05 21:14:20 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2006-08-05 21:14:20 +00:00
|
|
|
uint64_t cluster_offset;
|
2013-09-04 19:00:30 +02:00
|
|
|
int index_in_cluster, ret;
|
2016-05-31 16:13:07 +02:00
|
|
|
unsigned int bytes;
|
2013-09-04 19:00:30 +02:00
|
|
|
int64_t status = 0;
|
2006-08-05 21:14:20 +00:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
|
2011-11-14 12:44:21 +00:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2017-10-09 17:38:56 +02:00
|
|
|
ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes,
|
2016-05-31 16:13:07 +02:00
|
|
|
&cluster_offset);
|
2011-11-14 12:44:21 +00:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2010-05-21 17:59:36 +02:00
|
|
|
if (ret < 0) {
|
2013-09-04 19:00:25 +02:00
|
|
|
return ret;
|
2010-05-21 17:59:36 +02:00
|
|
|
}
|
2008-08-14 18:10:28 +00:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
*pnum = bytes >> BDRV_SECTOR_BITS;
|
|
|
|
|
2013-09-04 19:00:30 +02:00
|
|
|
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
|
2017-06-23 17:24:10 +01:00
|
|
|
!s->crypto) {
|
2013-09-04 19:00:30 +02:00
|
|
|
index_in_cluster = sector_num & (s->cluster_sectors - 1);
|
|
|
|
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
|
2016-01-26 11:58:50 +08:00
|
|
|
*file = bs->file->bs;
|
2013-09-04 19:00:30 +02:00
|
|
|
status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
|
|
|
|
}
|
2017-05-06 19:05:46 -05:00
|
|
|
if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
|
2013-09-04 19:00:30 +02:00
|
|
|
status |= BDRV_BLOCK_ZERO;
|
|
|
|
} else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
|
|
|
|
status |= BDRV_BLOCK_DATA;
|
|
|
|
}
|
|
|
|
return status;
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
|
|
|
|
2006-08-06 13:34:04 +00:00
|
|
|
/* handle reading after the end of the backing file */
|
2010-09-13 18:08:52 +02:00
|
|
|
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
|
2016-05-31 16:13:07 +02:00
|
|
|
int64_t offset, int bytes)
|
2006-08-06 13:34:04 +00:00
|
|
|
{
|
2016-05-31 16:13:07 +02:00
|
|
|
uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
|
2006-08-06 13:34:04 +00:00
|
|
|
int n1;
|
2016-05-31 16:13:07 +02:00
|
|
|
|
|
|
|
if ((offset + bytes) <= bs_size) {
|
|
|
|
return bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (offset >= bs_size) {
|
2006-08-06 13:34:04 +00:00
|
|
|
n1 = 0;
|
2016-05-31 16:13:07 +02:00
|
|
|
} else {
|
|
|
|
n1 = bs_size - offset;
|
|
|
|
}
|
2010-09-13 18:08:52 +02:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_memset(qiov, n1, 0, bytes - n1);
|
2010-09-13 18:08:52 +02:00
|
|
|
|
2006-08-06 13:34:04 +00:00
|
|
|
return n1;
|
|
|
|
}
|
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov,
|
|
|
|
int flags)
|
2006-08-05 21:14:20 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2016-05-31 16:13:07 +02:00
|
|
|
int offset_in_cluster, n1;
|
2011-06-30 17:42:09 +02:00
|
|
|
int ret;
|
2016-05-31 16:13:07 +02:00
|
|
|
unsigned int cur_bytes; /* number of bytes in current iteration */
|
2011-08-23 15:21:16 +02:00
|
|
|
uint64_t cluster_offset = 0;
|
2011-08-23 15:21:19 +02:00
|
|
|
uint64_t bytes_done = 0;
|
|
|
|
QEMUIOVector hd_qiov;
|
|
|
|
uint8_t *cluster_data = NULL;
|
2006-08-05 21:14:20 +00:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_init(&hd_qiov, qiov->niov);
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
while (bytes != 0) {
|
2010-09-13 18:08:52 +02:00
|
|
|
|
2011-08-23 15:21:18 +02:00
|
|
|
/* prepare next request */
|
2016-05-31 16:13:07 +02:00
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
2017-06-23 17:24:10 +01:00
|
|
|
if (s->crypto) {
|
2016-05-31 16:13:07 +02:00
|
|
|
cur_bytes = MIN(cur_bytes,
|
|
|
|
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
2011-08-23 15:21:18 +02:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
|
2011-02-09 10:26:06 +01:00
|
|
|
if (ret < 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
goto fail;
|
2011-02-09 10:26:06 +01:00
|
|
|
}
|
2010-09-13 18:08:52 +02:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
offset_in_cluster = offset_into_cluster(s, offset);
|
2009-04-07 18:43:20 +00:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_reset(&hd_qiov);
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
|
2012-03-14 19:15:03 +01:00
|
|
|
switch (ret) {
|
|
|
|
case QCOW2_CLUSTER_UNALLOCATED:
|
2011-08-23 15:21:18 +02:00
|
|
|
|
2015-06-17 14:55:21 +02:00
|
|
|
if (bs->backing) {
|
2011-08-23 15:21:18 +02:00
|
|
|
/* read from the base image */
|
2015-06-17 14:55:21 +02:00
|
|
|
n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
|
2016-05-31 16:13:07 +02:00
|
|
|
offset, cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
if (n1 > 0) {
|
2014-07-03 14:43:32 +02:00
|
|
|
QEMUIOVector local_qiov;
|
|
|
|
|
|
|
|
qemu_iovec_init(&local_qiov, hd_qiov.niov);
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
|
2014-07-03 14:43:32 +02:00
|
|
|
|
2011-08-23 15:21:18 +02:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2016-06-20 21:31:46 +02:00
|
|
|
ret = bdrv_co_preadv(bs->backing, offset, n1,
|
2016-05-31 16:13:07 +02:00
|
|
|
&local_qiov, 0);
|
2011-08-23 15:21:18 +02:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2014-07-03 14:43:32 +02:00
|
|
|
|
|
|
|
qemu_iovec_destroy(&local_qiov);
|
|
|
|
|
2011-08-23 15:21:18 +02:00
|
|
|
if (ret < 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
goto fail;
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Note: in this case, no need to wait */
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2012-03-14 19:15:03 +01:00
|
|
|
break;
|
|
|
|
|
2017-05-06 19:05:46 -05:00
|
|
|
case QCOW2_CLUSTER_ZERO_PLAIN:
|
|
|
|
case QCOW2_CLUSTER_ZERO_ALLOC:
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
|
2012-03-16 15:02:38 +01:00
|
|
|
break;
|
|
|
|
|
2012-03-14 19:15:03 +01:00
|
|
|
case QCOW2_CLUSTER_COMPRESSED:
|
2011-08-23 15:21:18 +02:00
|
|
|
/* add AIO support for compressed blocks ? */
|
|
|
|
ret = qcow2_decompress_cluster(bs, cluster_offset);
|
|
|
|
if (ret < 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
goto fail;
|
2010-09-13 18:08:52 +02:00
|
|
|
}
|
|
|
|
|
allow qemu_iovec_from_buffer() to specify offset from which to start copying
Similar to
qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int c, size_t bytes);
the new prototype is:
qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
const void *buf, size_t bytes);
The processing starts at offset bytes within qiov.
This way, we may copy a bounce buffer directly to
a middle of qiov.
This is exactly the same function as iov_from_buf() from
iov.c, so use the existing implementation and rename it
to qemu_iovec_from_buf() to be shorter and to match the
utility function.
As with utility implementation, we now assert that the
offset is inside actual iovec. Nothing changed for
current callers, because `offset' parameter is new.
While at it, stop using "bounce-qiov" in block/qcow2.c
and copy decrypted data directly from cluster_data
instead of recreating a temp qiov for doing that.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-06-07 20:17:55 +04:00
|
|
|
qemu_iovec_from_buf(&hd_qiov, 0,
|
2016-05-31 16:13:07 +02:00
|
|
|
s->cluster_cache + offset_in_cluster,
|
|
|
|
cur_bytes);
|
2012-03-14 19:15:03 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case QCOW2_CLUSTER_NORMAL:
|
2011-08-23 15:21:18 +02:00
|
|
|
if ((cluster_offset & 511) != 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2010-09-13 18:08:52 +02:00
|
|
|
|
qcow2/qcow: protect against uninitialized encryption key
When a qcow[2] file is opened, if the header reports an
encryption method, this is used to set the 'crypt_method_header'
field on the BDRVQcow[2]State struct, and the 'encrypted' flag
in the BDRVState struct.
When doing I/O operations, the 'crypt_method' field on the
BDRVQcow[2]State struct is checked to determine if encryption
needs to be applied.
The crypt_method_header value is copied into crypt_method when
the bdrv_set_key() method is called.
The QEMU code which opens a block device is expected to always
do a check
if (bdrv_is_encrypted(bs)) {
bdrv_set_key(bs, ....key...);
}
If code forgets to do this, then 'crypt_method' is never set
and so when I/O is performed, QEMU writes plain text data
into a sector which is expected to contain cipher text, or
when reading, will return cipher text instead of plain
text.
Change the qcow[2] code to consult bs->encrypted when deciding
whether encryption is required, and assert(s->crypt_method)
to protect against cases where the caller forgets to set the
encryption key.
Also put an assert in the set_key methods to protect against
the case where the caller sets an encryption key on a block
device that does not have encryption
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-05-12 17:09:18 +01:00
|
|
|
if (bs->encrypted) {
|
2017-06-23 17:24:10 +01:00
|
|
|
assert(s->crypto);
|
qcow2/qcow: protect against uninitialized encryption key
When a qcow[2] file is opened, if the header reports an
encryption method, this is used to set the 'crypt_method_header'
field on the BDRVQcow[2]State struct, and the 'encrypted' flag
in the BDRVState struct.
When doing I/O operations, the 'crypt_method' field on the
BDRVQcow[2]State struct is checked to determine if encryption
needs to be applied.
The crypt_method_header value is copied into crypt_method when
the bdrv_set_key() method is called.
The QEMU code which opens a block device is expected to always
do a check
if (bdrv_is_encrypted(bs)) {
bdrv_set_key(bs, ....key...);
}
If code forgets to do this, then 'crypt_method' is never set
and so when I/O is performed, QEMU writes plain text data
into a sector which is expected to contain cipher text, or
when reading, will return cipher text instead of plain
text.
Change the qcow[2] code to consult bs->encrypted when deciding
whether encryption is required, and assert(s->crypt_method)
to protect against cases where the caller forgets to set the
encryption key.
Also put an assert in the set_key methods to protect against
the case where the caller sets an encryption key on a block
device that does not have encryption
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-05-12 17:09:18 +01:00
|
|
|
|
2011-08-23 15:21:18 +02:00
|
|
|
/*
|
|
|
|
* For encrypted images, read everything into a temporary
|
|
|
|
* contiguous buffer on which the AES functions can work.
|
|
|
|
*/
|
2011-08-23 15:21:19 +02:00
|
|
|
if (!cluster_data) {
|
|
|
|
cluster_data =
|
2015-06-16 14:19:22 +02:00
|
|
|
qemu_try_blockalign(bs->file->bs,
|
|
|
|
QCOW_MAX_CRYPT_CLUSTERS
|
|
|
|
* s->cluster_size);
|
2014-05-20 17:12:47 +02:00
|
|
|
if (cluster_data == NULL) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_reset(&hd_qiov);
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2016-06-20 21:31:46 +02:00
|
|
|
ret = bdrv_co_preadv(bs->file,
|
2016-05-31 16:13:07 +02:00
|
|
|
cluster_offset + offset_in_cluster,
|
|
|
|
cur_bytes, &hd_qiov, 0);
|
2011-08-23 15:21:18 +02:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
if (ret < 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
goto fail;
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
qcow2/qcow: protect against uninitialized encryption key
When a qcow[2] file is opened, if the header reports an
encryption method, this is used to set the 'crypt_method_header'
field on the BDRVQcow[2]State struct, and the 'encrypted' flag
in the BDRVState struct.
When doing I/O operations, the 'crypt_method' field on the
BDRVQcow[2]State struct is checked to determine if encryption
needs to be applied.
The crypt_method_header value is copied into crypt_method when
the bdrv_set_key() method is called.
The QEMU code which opens a block device is expected to always
do a check
if (bdrv_is_encrypted(bs)) {
bdrv_set_key(bs, ....key...);
}
If code forgets to do this, then 'crypt_method' is never set
and so when I/O is performed, QEMU writes plain text data
into a sector which is expected to contain cipher text, or
when reading, will return cipher text instead of plain
text.
Change the qcow[2] code to consult bs->encrypted when deciding
whether encryption is required, and assert(s->crypt_method)
to protect against cases where the caller forgets to set the
encryption key.
Also put an assert in the set_key methods to protect against
the case where the caller sets an encryption key on a block
device that does not have encryption
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-05-12 17:09:18 +01:00
|
|
|
if (bs->encrypted) {
|
2017-06-23 17:24:10 +01:00
|
|
|
assert(s->crypto);
|
2016-05-31 16:13:07 +02:00
|
|
|
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
|
assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
|
2017-06-23 17:24:10 +01:00
|
|
|
if (qcrypto_block_decrypt(s->crypto,
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
(s->crypt_physical_offset ?
|
|
|
|
cluster_offset + offset_in_cluster :
|
2017-09-27 13:53:39 +01:00
|
|
|
offset),
|
2017-06-23 17:24:09 +01:00
|
|
|
cluster_data,
|
2017-06-23 17:24:10 +01:00
|
|
|
cur_bytes,
|
2017-08-29 15:08:36 +03:00
|
|
|
NULL) < 0) {
|
2015-07-01 18:10:37 +01:00
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
|
|
|
}
|
2016-05-31 16:13:07 +02:00
|
|
|
qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2012-03-14 19:15:03 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
2011-08-23 15:21:14 +02:00
|
|
|
}
|
2009-04-07 18:43:24 +00:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
bytes -= cur_bytes;
|
|
|
|
offset += cur_bytes;
|
|
|
|
bytes_done += cur_bytes;
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2011-08-23 15:21:19 +02:00
|
|
|
ret = 0;
|
2011-08-23 15:21:14 +02:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
fail:
|
2011-06-30 17:42:09 +02:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2011-06-07 15:04:32 +02:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_destroy(&hd_qiov);
|
2011-09-10 10:59:16 +02:00
|
|
|
qemu_vfree(cluster_data);
|
2011-06-30 17:42:09 +02:00
|
|
|
|
|
|
|
return ret;
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
|
|
|
|
2017-06-19 16:40:08 +03:00
|
|
|
/* Check if it's possible to merge a write request with the writing of
|
|
|
|
* the data from the COW regions */
|
|
|
|
static bool merge_cow(uint64_t offset, unsigned bytes,
|
|
|
|
QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
|
|
|
|
{
|
|
|
|
QCowL2Meta *m;
|
|
|
|
|
|
|
|
for (m = l2meta; m != NULL; m = m->next) {
|
|
|
|
/* If both COW regions are empty then there's nothing to merge */
|
|
|
|
if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The data (middle) region must be immediately after the
|
|
|
|
* start region */
|
|
|
|
if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The end region must be immediately after the data (middle)
|
|
|
|
* region */
|
|
|
|
if (m->offset + m->cow_end.offset != offset + bytes) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Make sure that adding both COW regions to the QEMUIOVector
|
|
|
|
* does not exceed IOV_MAX */
|
|
|
|
if (hd_qiov->niov > IOV_MAX - 2) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
m->data_qiov = hd_qiov;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov,
|
|
|
|
int flags)
|
2006-08-05 21:14:20 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2016-06-01 16:55:05 +02:00
|
|
|
int offset_in_cluster;
|
2011-06-30 17:42:09 +02:00
|
|
|
int ret;
|
2016-06-01 16:55:05 +02:00
|
|
|
unsigned int cur_bytes; /* number of sectors in current iteration */
|
2011-08-23 15:21:16 +02:00
|
|
|
uint64_t cluster_offset;
|
2011-08-23 15:21:19 +02:00
|
|
|
QEMUIOVector hd_qiov;
|
|
|
|
uint64_t bytes_done = 0;
|
|
|
|
uint8_t *cluster_data = NULL;
|
2013-01-14 17:31:31 +01:00
|
|
|
QCowL2Meta *l2meta = NULL;
|
2011-08-23 15:21:15 +02:00
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
|
2012-03-01 18:36:21 +01:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_init(&hd_qiov, qiov->niov);
|
|
|
|
|
|
|
|
s->cluster_cache_offset = -1; /* disable compressed cache */
|
2007-09-17 08:09:54 +00:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
while (bytes != 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
|
2012-12-07 18:08:46 +01:00
|
|
|
l2meta = NULL;
|
2012-12-07 18:08:44 +01:00
|
|
|
|
2012-03-01 18:36:21 +01:00
|
|
|
trace_qcow2_writev_start_part(qemu_coroutine_self());
|
2016-06-01 16:55:05 +02:00
|
|
|
offset_in_cluster = offset_into_cluster(s, offset);
|
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
|
|
|
if (bs->encrypted) {
|
|
|
|
cur_bytes = MIN(cur_bytes,
|
|
|
|
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
|
|
|
|
- offset_in_cluster);
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2008-08-14 18:10:28 +00:00
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
|
|
|
|
&cluster_offset, &l2meta);
|
2011-08-23 15:21:18 +02:00
|
|
|
if (ret < 0) {
|
2011-08-23 15:21:19 +02:00
|
|
|
goto fail;
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2010-01-20 15:03:01 +01:00
|
|
|
|
2011-08-23 15:21:18 +02:00
|
|
|
assert((cluster_offset & 511) == 0);
|
2010-01-20 15:03:01 +01:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_reset(&hd_qiov);
|
2016-06-01 16:55:05 +02:00
|
|
|
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
|
2010-09-13 18:24:10 +02:00
|
|
|
|
qcow2/qcow: protect against uninitialized encryption key
When a qcow[2] file is opened, if the header reports an
encryption method, this is used to set the 'crypt_method_header'
field on the BDRVQcow[2]State struct, and the 'encrypted' flag
in the BDRVState struct.
When doing I/O operations, the 'crypt_method' field on the
BDRVQcow[2]State struct is checked to determine if encryption
needs to be applied.
The crypt_method_header value is copied into crypt_method when
the bdrv_set_key() method is called.
The QEMU code which opens a block device is expected to always
do a check
if (bdrv_is_encrypted(bs)) {
bdrv_set_key(bs, ....key...);
}
If code forgets to do this, then 'crypt_method' is never set
and so when I/O is performed, QEMU writes plain text data
into a sector which is expected to contain cipher text, or
when reading, will return cipher text instead of plain
text.
Change the qcow[2] code to consult bs->encrypted when deciding
whether encryption is required, and assert(s->crypt_method)
to protect against cases where the caller forgets to set the
encryption key.
Also put an assert in the set_key methods to protect against
the case where the caller sets an encryption key on a block
device that does not have encryption
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-05-12 17:09:18 +01:00
|
|
|
if (bs->encrypted) {
|
2017-06-23 17:24:10 +01:00
|
|
|
assert(s->crypto);
|
2011-08-23 15:21:19 +02:00
|
|
|
if (!cluster_data) {
|
2015-06-16 14:19:22 +02:00
|
|
|
cluster_data = qemu_try_blockalign(bs->file->bs,
|
2014-05-20 17:12:47 +02:00
|
|
|
QCOW_MAX_CRYPT_CLUSTERS
|
|
|
|
* s->cluster_size);
|
|
|
|
if (cluster_data == NULL) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2010-09-13 18:24:10 +02:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
assert(hd_qiov.size <=
|
2011-08-23 15:21:18 +02:00
|
|
|
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
|
2012-06-07 20:21:06 +04:00
|
|
|
qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
|
2010-09-13 18:24:10 +02:00
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
if (qcrypto_block_encrypt(s->crypto,
|
|
|
|
(s->crypt_physical_offset ?
|
|
|
|
cluster_offset + offset_in_cluster :
|
2017-09-27 13:53:39 +01:00
|
|
|
offset),
|
2017-06-23 17:24:09 +01:00
|
|
|
cluster_data,
|
2017-08-29 15:08:36 +03:00
|
|
|
cur_bytes, NULL) < 0) {
|
2015-07-01 18:10:37 +01:00
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
|
|
|
}
|
2010-09-13 18:24:10 +02:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_reset(&hd_qiov);
|
2016-06-01 16:55:05 +02:00
|
|
|
qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2010-09-13 18:24:10 +02:00
|
|
|
|
2013-10-10 11:09:23 +02:00
|
|
|
ret = qcow2_pre_write_overlap_check(bs, 0,
|
2016-06-01 16:55:05 +02:00
|
|
|
cluster_offset + offset_in_cluster, cur_bytes);
|
2013-08-30 14:34:26 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2017-06-19 16:40:08 +03:00
|
|
|
/* If we need to do COW, check if it's possible to merge the
|
|
|
|
* writing of the guest data together with that of the COW regions.
|
|
|
|
* If it's not possible (or not necessary) then write the
|
|
|
|
* guest data now. */
|
|
|
|
if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
|
|
|
|
trace_qcow2_writev_data(qemu_coroutine_self(),
|
|
|
|
cluster_offset + offset_in_cluster);
|
|
|
|
ret = bdrv_co_pwritev(bs->file,
|
|
|
|
cluster_offset + offset_in_cluster,
|
|
|
|
cur_bytes, &hd_qiov, 0);
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2009-04-07 18:43:24 +00:00
|
|
|
|
2013-03-26 17:50:11 +01:00
|
|
|
while (l2meta != NULL) {
|
|
|
|
QCowL2Meta *next;
|
|
|
|
|
2012-12-07 18:08:46 +01:00
|
|
|
ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2011-08-23 15:21:14 +02:00
|
|
|
|
2012-12-07 18:08:48 +01:00
|
|
|
/* Take the request off the list of running requests */
|
|
|
|
if (l2meta->nb_clusters != 0) {
|
|
|
|
QLIST_REMOVE(l2meta, next_in_flight);
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_co_queue_restart_all(&l2meta->dependent_requests);
|
|
|
|
|
2013-03-26 17:50:11 +01:00
|
|
|
next = l2meta->next;
|
2012-12-07 18:08:46 +01:00
|
|
|
g_free(l2meta);
|
2013-03-26 17:50:11 +01:00
|
|
|
l2meta = next;
|
2012-12-07 18:08:46 +01:00
|
|
|
}
|
2011-09-01 15:02:13 +02:00
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
bytes -= cur_bytes;
|
|
|
|
offset += cur_bytes;
|
|
|
|
bytes_done += cur_bytes;
|
|
|
|
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
|
2011-08-23 15:21:18 +02:00
|
|
|
}
|
2011-08-23 15:21:19 +02:00
|
|
|
ret = 0;
|
2011-08-23 15:21:14 +02:00
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
fail:
|
2013-03-26 17:50:11 +01:00
|
|
|
while (l2meta != NULL) {
|
|
|
|
QCowL2Meta *next;
|
|
|
|
|
2012-12-07 18:08:48 +01:00
|
|
|
if (l2meta->nb_clusters != 0) {
|
|
|
|
QLIST_REMOVE(l2meta, next_in_flight);
|
|
|
|
}
|
|
|
|
qemu_co_queue_restart_all(&l2meta->dependent_requests);
|
2013-03-26 17:50:11 +01:00
|
|
|
|
|
|
|
next = l2meta->next;
|
2012-12-07 18:08:44 +01:00
|
|
|
g_free(l2meta);
|
2013-03-26 17:50:11 +01:00
|
|
|
l2meta = next;
|
2012-12-07 18:08:44 +01:00
|
|
|
}
|
2011-09-01 15:02:13 +02:00
|
|
|
|
2017-06-29 15:27:39 +02:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
2011-08-23 15:21:19 +02:00
|
|
|
qemu_iovec_destroy(&hd_qiov);
|
2011-09-10 10:59:16 +02:00
|
|
|
qemu_vfree(cluster_data);
|
2012-03-01 18:36:21 +01:00
|
|
|
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
|
2011-06-07 15:04:32 +02:00
|
|
|
|
2011-06-30 17:42:09 +02:00
|
|
|
return ret;
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
|
|
|
|
2015-12-22 16:04:57 +01:00
|
|
|
static int qcow2_inactivate(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int ret, result = 0;
|
2017-06-28 15:05:19 +03:00
|
|
|
Error *local_err = NULL;
|
2015-12-22 16:04:57 +01:00
|
|
|
|
2017-09-04 13:18:00 +03:00
|
|
|
qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
|
|
|
|
if (local_err != NULL) {
|
|
|
|
result = -EINVAL;
|
|
|
|
error_report_err(local_err);
|
|
|
|
error_report("Persistent bitmaps are lost for node '%s'",
|
|
|
|
bdrv_get_device_or_node_name(bs));
|
|
|
|
}
|
|
|
|
|
2015-12-22 16:04:57 +01:00
|
|
|
ret = qcow2_cache_flush(bs, s->l2_table_cache);
|
|
|
|
if (ret) {
|
|
|
|
result = ret;
|
|
|
|
error_report("Failed to flush the L2 table cache: %s",
|
|
|
|
strerror(-ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
|
|
|
|
if (ret) {
|
|
|
|
result = ret;
|
|
|
|
error_report("Failed to flush the refcount block cache: %s",
|
|
|
|
strerror(-ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (result == 0) {
|
|
|
|
qcow2_mark_clean(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2010-12-17 16:02:39 +01:00
|
|
|
static void qcow2_close(BlockDriverState *bs)
|
2006-08-05 21:14:20 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-05-20 17:12:47 +02:00
|
|
|
qemu_vfree(s->l1_table);
|
2013-08-30 14:34:26 +02:00
|
|
|
/* else pre-write overlap checks in cache_destroy may crash */
|
|
|
|
s->l1_table = NULL;
|
2011-01-10 17:17:28 +01:00
|
|
|
|
2015-12-22 16:10:32 +01:00
|
|
|
if (!(s->flags & BDRV_O_INACTIVE)) {
|
2015-12-22 16:04:57 +01:00
|
|
|
qcow2_inactivate(bs);
|
2014-03-11 15:15:03 +01:00
|
|
|
}
|
2012-07-27 09:05:19 +01:00
|
|
|
|
2015-08-04 15:14:40 +03:00
|
|
|
cache_clean_timer_del(bs);
|
2011-01-10 17:17:28 +01:00
|
|
|
qcow2_cache_destroy(bs, s->l2_table_cache);
|
|
|
|
qcow2_cache_destroy(bs, s->refcount_block_cache);
|
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
qcrypto_block_free(s->crypto);
|
|
|
|
s->crypto = NULL;
|
2015-07-01 18:10:37 +01:00
|
|
|
|
2011-12-15 12:20:58 +01:00
|
|
|
g_free(s->unknown_header_fields);
|
2012-02-02 14:52:08 +01:00
|
|
|
cleanup_unknown_header_ext(bs);
|
2011-12-15 12:20:58 +01:00
|
|
|
|
2015-04-07 15:03:16 +02:00
|
|
|
g_free(s->image_backing_file);
|
|
|
|
g_free(s->image_backing_format);
|
|
|
|
|
2011-08-20 22:09:37 -05:00
|
|
|
g_free(s->cluster_cache);
|
2011-09-10 10:59:16 +02:00
|
|
|
qemu_vfree(s->cluster_data);
|
2009-05-28 16:07:07 +02:00
|
|
|
qcow2_refcount_close(bs);
|
2011-12-07 17:25:48 +08:00
|
|
|
qcow2_free_snapshots(bs);
|
2006-08-05 21:14:20 +00:00
|
|
|
}
|
|
|
|
|
2014-03-12 15:59:16 +01:00
|
|
|
static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
|
2011-11-14 15:09:46 -06:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2011-11-14 15:09:46 -06:00
|
|
|
int flags = s->flags;
|
2017-06-23 17:24:10 +01:00
|
|
|
QCryptoBlock *crypto = NULL;
|
2013-03-18 13:08:10 +01:00
|
|
|
QDict *options;
|
2014-03-12 15:59:16 +01:00
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
2011-11-14 15:09:46 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Backing files are read-only which makes all of their metadata immutable,
|
|
|
|
* that means we don't have to worry about reopening them here.
|
|
|
|
*/
|
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
crypto = s->crypto;
|
|
|
|
s->crypto = NULL;
|
2011-11-14 15:09:46 -06:00
|
|
|
|
|
|
|
qcow2_close(bs);
|
|
|
|
|
2015-09-07 17:12:56 +02:00
|
|
|
memset(s, 0, sizeof(BDRVQcow2State));
|
2014-03-11 17:42:41 +01:00
|
|
|
options = qdict_clone_shallow(bs->options);
|
2014-03-12 15:59:16 +01:00
|
|
|
|
2015-12-22 16:10:32 +01:00
|
|
|
flags &= ~BDRV_O_INACTIVE;
|
2016-12-16 18:52:37 +01:00
|
|
|
ret = qcow2_do_open(bs, options, flags, &local_err);
|
2014-05-28 11:16:56 +02:00
|
|
|
QDECREF(options);
|
2014-03-12 15:59:16 +01:00
|
|
|
if (local_err) {
|
error: Use error_prepend() where it makes obvious sense
Done with this Coccinelle semantic patch
@@
expression FMT, E1, E2;
expression list ARGS;
@@
- error_setg(E1, FMT, ARGS, error_get_pretty(E2));
+ error_propagate(E1, E2);/*###*/
+ error_prepend(E1, FMT/*@@@*/, ARGS);
followed by manual cleanup, first because I can't figure out how to
make Coccinelle transform strings, and second to get rid of now
superfluous error_propagate().
We now use or propagate the original error whole instead of just its
message obtained with error_get_pretty(). This avoids suppressing its
hint (see commit 50b7b00), but I can't see how the errors touched in
this commit could come with hints. It also improves the message
printed with &error_abort when we screw up (see commit 1e9b65b).
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2015-12-18 16:35:15 +01:00
|
|
|
error_propagate(errp, local_err);
|
|
|
|
error_prepend(errp, "Could not reopen qcow2 layer: ");
|
2015-12-22 16:14:10 +01:00
|
|
|
bs->drv = NULL;
|
2014-03-12 15:59:16 +01:00
|
|
|
return;
|
|
|
|
} else if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
|
2015-12-22 16:14:10 +01:00
|
|
|
bs->drv = NULL;
|
2014-03-12 15:59:16 +01:00
|
|
|
return;
|
|
|
|
}
|
2013-03-18 13:08:10 +01:00
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
s->crypto = crypto;
|
2011-11-14 15:09:46 -06:00
|
|
|
}
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
|
|
|
|
size_t len, size_t buflen)
|
|
|
|
{
|
|
|
|
QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
|
|
|
|
size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
|
|
|
|
|
|
|
|
if (buflen < ext_len) {
|
|
|
|
return -ENOSPC;
|
|
|
|
}
|
|
|
|
|
|
|
|
*ext_backing_fmt = (QCowExtension) {
|
|
|
|
.magic = cpu_to_be32(magic),
|
|
|
|
.len = cpu_to_be32(len),
|
|
|
|
};
|
2016-09-13 09:56:27 +01:00
|
|
|
|
|
|
|
if (len) {
|
|
|
|
memcpy(buf + sizeof(QCowExtension), s, len);
|
|
|
|
}
|
2012-02-02 12:32:31 +01:00
|
|
|
|
|
|
|
return ext_len;
|
|
|
|
}
|
|
|
|
|
2010-01-12 12:55:17 +01:00
|
|
|
/*
|
2012-02-02 12:32:31 +01:00
|
|
|
* Updates the qcow2 header, including the variable length parts of it, i.e.
|
|
|
|
* the backing file name and all extensions. qcow2 was not designed to allow
|
|
|
|
* such changes, so if we run out of space (we can only use the first cluster)
|
|
|
|
* this function may fail.
|
2010-01-12 12:55:17 +01:00
|
|
|
*
|
|
|
|
* Returns 0 on success, -errno in error cases.
|
|
|
|
*/
|
2012-02-02 12:32:31 +01:00
|
|
|
int qcow2_update_header(BlockDriverState *bs)
|
2010-01-12 12:55:17 +01:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-02-02 12:32:31 +01:00
|
|
|
QCowHeader *header;
|
|
|
|
char *buf;
|
|
|
|
size_t buflen = s->cluster_size;
|
2010-01-12 12:55:17 +01:00
|
|
|
int ret;
|
2012-02-02 12:32:31 +01:00
|
|
|
uint64_t total_size;
|
|
|
|
uint32_t refcount_table_clusters;
|
2011-12-15 12:20:58 +01:00
|
|
|
size_t header_length;
|
2012-02-02 14:52:08 +01:00
|
|
|
Qcow2UnknownHeaderExtension *uext;
|
2010-01-12 12:55:17 +01:00
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
buf = qemu_blockalign(bs, buflen);
|
2010-01-12 12:55:17 +01:00
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
/* Header structure */
|
|
|
|
header = (QCowHeader*) buf;
|
2010-01-12 12:55:17 +01:00
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
if (buflen < sizeof(*header)) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto fail;
|
2010-01-12 12:55:17 +01:00
|
|
|
}
|
|
|
|
|
2011-12-15 12:20:58 +01:00
|
|
|
header_length = sizeof(*header) + s->unknown_header_fields_size;
|
2012-02-02 12:32:31 +01:00
|
|
|
total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
|
|
|
|
refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
|
|
|
|
|
|
|
|
*header = (QCowHeader) {
|
2011-12-15 12:20:58 +01:00
|
|
|
/* Version 2 fields */
|
2012-02-02 12:32:31 +01:00
|
|
|
.magic = cpu_to_be32(QCOW_MAGIC),
|
2011-12-15 12:20:58 +01:00
|
|
|
.version = cpu_to_be32(s->qcow_version),
|
2012-02-02 12:32:31 +01:00
|
|
|
.backing_file_offset = 0,
|
|
|
|
.backing_file_size = 0,
|
|
|
|
.cluster_bits = cpu_to_be32(s->cluster_bits),
|
|
|
|
.size = cpu_to_be64(total_size),
|
|
|
|
.crypt_method = cpu_to_be32(s->crypt_method_header),
|
|
|
|
.l1_size = cpu_to_be32(s->l1_size),
|
|
|
|
.l1_table_offset = cpu_to_be64(s->l1_table_offset),
|
|
|
|
.refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
|
|
|
|
.refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
|
|
|
|
.nb_snapshots = cpu_to_be32(s->nb_snapshots),
|
|
|
|
.snapshots_offset = cpu_to_be64(s->snapshots_offset),
|
2011-12-15 12:20:58 +01:00
|
|
|
|
|
|
|
/* Version 3 fields */
|
|
|
|
.incompatible_features = cpu_to_be64(s->incompatible_features),
|
|
|
|
.compatible_features = cpu_to_be64(s->compatible_features),
|
|
|
|
.autoclear_features = cpu_to_be64(s->autoclear_features),
|
2013-09-03 10:09:53 +02:00
|
|
|
.refcount_order = cpu_to_be32(s->refcount_order),
|
2011-12-15 12:20:58 +01:00
|
|
|
.header_length = cpu_to_be32(header_length),
|
2012-02-02 12:32:31 +01:00
|
|
|
};
|
2010-01-12 12:55:17 +01:00
|
|
|
|
2011-12-15 12:20:58 +01:00
|
|
|
/* For older versions, write a shorter header */
|
|
|
|
switch (s->qcow_version) {
|
|
|
|
case 2:
|
|
|
|
ret = offsetof(QCowHeader, incompatible_features);
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
ret = sizeof(*header);
|
|
|
|
break;
|
|
|
|
default:
|
2012-05-21 13:06:54 +02:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2011-12-15 12:20:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
memset(buf, 0, buflen);
|
|
|
|
|
|
|
|
/* Preserve any unknown field in the header */
|
|
|
|
if (s->unknown_header_fields_size) {
|
|
|
|
if (buflen < s->unknown_header_fields_size) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
|
|
|
|
buf += s->unknown_header_fields_size;
|
|
|
|
buflen -= s->unknown_header_fields_size;
|
|
|
|
}
|
2010-01-12 12:55:17 +01:00
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
/* Backing file format header extension */
|
2015-04-07 15:03:16 +02:00
|
|
|
if (s->image_backing_format) {
|
2012-02-02 12:32:31 +01:00
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
|
2015-04-07 15:03:16 +02:00
|
|
|
s->image_backing_format,
|
|
|
|
strlen(s->image_backing_format),
|
2012-02-02 12:32:31 +01:00
|
|
|
buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
2010-01-12 12:55:17 +01:00
|
|
|
}
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
2010-01-12 12:55:17 +01:00
|
|
|
}
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
/* Full disk encryption header pointer extension */
|
|
|
|
if (s->crypto_header.offset != 0) {
|
|
|
|
cpu_to_be64s(&s->crypto_header.offset);
|
|
|
|
cpu_to_be64s(&s->crypto_header.length);
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
|
|
|
|
&s->crypto_header, sizeof(s->crypto_header),
|
|
|
|
buflen);
|
|
|
|
be64_to_cpus(&s->crypto_header.offset);
|
|
|
|
be64_to_cpus(&s->crypto_header.length);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
2012-04-12 15:20:27 +02:00
|
|
|
/* Feature table */
|
2015-12-02 19:11:04 +01:00
|
|
|
if (s->qcow_version >= 3) {
|
|
|
|
Qcow2Feature features[] = {
|
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_DIRTY_BITNR,
|
|
|
|
.name = "dirty bit",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
|
|
|
|
.name = "corrupt bit",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_COMPATIBLE,
|
|
|
|
.bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
|
|
|
|
.name = "lazy refcounts",
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
|
|
|
|
features, sizeof(features), buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
2012-04-12 15:20:27 +02:00
|
|
|
}
|
|
|
|
|
2017-06-28 15:05:08 +03:00
|
|
|
/* Bitmap extension */
|
|
|
|
if (s->nb_bitmaps > 0) {
|
|
|
|
Qcow2BitmapHeaderExt bitmaps_header = {
|
|
|
|
.nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
|
|
|
|
.bitmap_directory_size =
|
|
|
|
cpu_to_be64(s->bitmap_directory_size),
|
|
|
|
.bitmap_directory_offset =
|
|
|
|
cpu_to_be64(s->bitmap_directory_offset)
|
|
|
|
};
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
|
|
|
|
&bitmaps_header, sizeof(bitmaps_header),
|
|
|
|
buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
2012-02-02 14:52:08 +01:00
|
|
|
/* Keep unknown header extensions */
|
|
|
|
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
|
|
|
|
ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
/* End of header extensions */
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
|
2010-01-12 12:55:17 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
2010-01-12 12:55:17 +01:00
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
/* Backing file name */
|
2015-04-07 15:03:16 +02:00
|
|
|
if (s->image_backing_file) {
|
|
|
|
size_t backing_file_len = strlen(s->image_backing_file);
|
2012-02-02 12:32:31 +01:00
|
|
|
|
|
|
|
if (buflen < backing_file_len) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-10-04 13:10:01 +02:00
|
|
|
/* Using strncpy is ok here, since buf is not NUL-terminated. */
|
2015-04-07 15:03:16 +02:00
|
|
|
strncpy(buf, s->image_backing_file, buflen);
|
2012-02-02 12:32:31 +01:00
|
|
|
|
|
|
|
header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
|
|
|
|
header->backing_file_size = cpu_to_be32(backing_file_len);
|
2010-01-12 12:55:17 +01:00
|
|
|
}
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
/* Write the new header */
|
2016-06-20 20:09:15 +02:00
|
|
|
ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
|
2010-01-12 12:55:17 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
fail:
|
2012-02-02 12:32:31 +01:00
|
|
|
qemu_vfree(header);
|
2010-01-12 12:55:17 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_change_backing_file(BlockDriverState *bs,
|
|
|
|
const char *backing_file, const char *backing_fmt)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-04-07 15:03:16 +02:00
|
|
|
|
2016-04-06 18:32:48 +02:00
|
|
|
if (backing_file && strlen(backing_file) > 1023) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
|
|
|
|
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
|
|
|
|
|
2015-04-07 15:03:16 +02:00
|
|
|
g_free(s->image_backing_file);
|
|
|
|
g_free(s->image_backing_format);
|
|
|
|
|
|
|
|
s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
|
|
|
|
s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
|
|
|
|
|
2012-02-02 12:32:31 +01:00
|
|
|
return qcow2_update_header(bs);
|
2010-01-12 12:55:17 +01:00
|
|
|
}
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
static int qcow2_crypt_method_from_format(const char *encryptfmt)
|
|
|
|
{
|
|
|
|
if (g_str_equal(encryptfmt, "luks")) {
|
|
|
|
return QCOW_CRYPT_LUKS;
|
|
|
|
} else if (g_str_equal(encryptfmt, "aes")) {
|
|
|
|
return QCOW_CRYPT_AES;
|
|
|
|
} else {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
2017-06-23 17:24:10 +01:00
|
|
|
|
|
|
|
static int qcow2_set_up_encryption(BlockDriverState *bs, const char *encryptfmt,
|
|
|
|
QemuOpts *opts, Error **errp)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
QCryptoBlockCreateOptions *cryptoopts = NULL;
|
|
|
|
QCryptoBlock *crypto = NULL;
|
|
|
|
int ret = -EINVAL;
|
|
|
|
QDict *options, *encryptopts;
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
int fmt;
|
2017-06-23 17:24:10 +01:00
|
|
|
|
|
|
|
options = qemu_opts_to_qdict(opts, NULL);
|
|
|
|
qdict_extract_subqdict(options, &encryptopts, "encrypt.");
|
|
|
|
QDECREF(options);
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
fmt = qcow2_crypt_method_from_format(encryptfmt);
|
|
|
|
|
|
|
|
switch (fmt) {
|
|
|
|
case QCOW_CRYPT_LUKS:
|
|
|
|
cryptoopts = block_crypto_create_opts_init(
|
|
|
|
Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp);
|
|
|
|
break;
|
|
|
|
case QCOW_CRYPT_AES:
|
|
|
|
cryptoopts = block_crypto_create_opts_init(
|
|
|
|
Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error_setg(errp, "Unknown encryption format '%s'", encryptfmt);
|
|
|
|
break;
|
2017-06-23 17:24:10 +01:00
|
|
|
}
|
|
|
|
if (!cryptoopts) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
s->crypt_method_header = fmt;
|
2017-06-23 17:24:10 +01:00
|
|
|
|
2017-06-23 17:24:17 +01:00
|
|
|
crypto = qcrypto_block_create(cryptoopts, "encrypt.",
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
qcow2_crypto_hdr_init_func,
|
|
|
|
qcow2_crypto_hdr_write_func,
|
2017-06-23 17:24:10 +01:00
|
|
|
bs, errp);
|
|
|
|
if (!crypto) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not write encryption header");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
QDECREF(encryptopts);
|
|
|
|
qcrypto_block_free(crypto);
|
|
|
|
qapi_free_QCryptoBlockCreateOptions(cryptoopts);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-10-09 23:55:32 +02:00
|
|
|
typedef struct PreallocCo {
|
|
|
|
BlockDriverState *bs;
|
|
|
|
uint64_t offset;
|
|
|
|
uint64_t new_length;
|
|
|
|
|
|
|
|
int ret;
|
|
|
|
} PreallocCo;
|
|
|
|
|
2017-06-13 22:21:00 +02:00
|
|
|
/**
|
|
|
|
* Preallocates metadata structures for data clusters between @offset (in the
|
|
|
|
* guest disk) and @new_length (which is thus generally the new guest disk
|
|
|
|
* size).
|
|
|
|
*
|
|
|
|
* Returns: 0 on success, -errno on failure.
|
|
|
|
*/
|
2017-10-09 23:55:32 +02:00
|
|
|
static void coroutine_fn preallocate_co(void *opaque)
|
2009-08-17 15:50:10 +02:00
|
|
|
{
|
2017-10-09 23:55:32 +02:00
|
|
|
PreallocCo *params = opaque;
|
|
|
|
BlockDriverState *bs = params->bs;
|
|
|
|
uint64_t offset = params->offset;
|
|
|
|
uint64_t new_length = params->new_length;
|
2017-06-13 22:21:01 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2016-06-01 16:55:05 +02:00
|
|
|
uint64_t bytes;
|
2012-12-07 18:08:45 +01:00
|
|
|
uint64_t host_offset = 0;
|
2016-06-01 16:55:05 +02:00
|
|
|
unsigned int cur_bytes;
|
2010-01-20 15:03:01 +01:00
|
|
|
int ret;
|
2012-12-07 18:08:46 +01:00
|
|
|
QCowL2Meta *meta;
|
2009-08-17 15:50:10 +02:00
|
|
|
|
2017-10-09 23:55:32 +02:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2017-06-13 22:21:01 +02:00
|
|
|
|
2017-06-13 22:21:00 +02:00
|
|
|
assert(offset <= new_length);
|
|
|
|
bytes = new_length - offset;
|
2009-08-17 15:50:10 +02:00
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
while (bytes) {
|
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
|
|
|
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
|
2012-12-07 18:08:45 +01:00
|
|
|
&host_offset, &meta);
|
2010-01-20 15:03:01 +01:00
|
|
|
if (ret < 0) {
|
2017-06-13 22:21:01 +02:00
|
|
|
goto done;
|
2009-08-17 15:50:10 +02:00
|
|
|
}
|
|
|
|
|
2014-04-01 11:12:57 +02:00
|
|
|
while (meta) {
|
|
|
|
QCowL2Meta *next = meta->next;
|
|
|
|
|
2014-01-26 11:12:39 +08:00
|
|
|
ret = qcow2_alloc_cluster_link_l2(bs, meta);
|
|
|
|
if (ret < 0) {
|
|
|
|
qcow2_free_any_clusters(bs, meta->alloc_offset,
|
|
|
|
meta->nb_clusters, QCOW2_DISCARD_NEVER);
|
2017-06-13 22:21:01 +02:00
|
|
|
goto done;
|
2014-01-26 11:12:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* There are no dependent requests, but we need to remove our
|
|
|
|
* request from the list of in-flight requests */
|
2012-12-07 18:08:48 +01:00
|
|
|
QLIST_REMOVE(meta, next_in_flight);
|
2014-04-01 11:12:57 +02:00
|
|
|
|
|
|
|
g_free(meta);
|
|
|
|
meta = next;
|
2012-12-07 18:08:46 +01:00
|
|
|
}
|
2009-08-31 16:48:49 +02:00
|
|
|
|
2009-08-17 15:50:10 +02:00
|
|
|
/* TODO Preallocate data if requested */
|
|
|
|
|
2016-06-01 16:55:05 +02:00
|
|
|
bytes -= cur_bytes;
|
|
|
|
offset += cur_bytes;
|
2009-08-17 15:50:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It is expected that the image file is large enough to actually contain
|
|
|
|
* all of the allocated clusters (otherwise we get failing reads after
|
|
|
|
* EOF). Extend the image to the last allocated sector.
|
|
|
|
*/
|
2012-12-07 18:08:45 +01:00
|
|
|
if (host_offset != 0) {
|
2016-06-01 16:55:05 +02:00
|
|
|
uint8_t data = 0;
|
2016-06-20 20:09:15 +02:00
|
|
|
ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
|
2016-06-01 16:55:05 +02:00
|
|
|
&data, 1);
|
2010-06-22 16:59:46 +02:00
|
|
|
if (ret < 0) {
|
2017-06-13 22:21:01 +02:00
|
|
|
goto done;
|
2010-06-22 16:59:46 +02:00
|
|
|
}
|
2009-08-17 15:50:10 +02:00
|
|
|
}
|
|
|
|
|
2017-06-13 22:21:01 +02:00
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
done:
|
2017-10-09 23:55:32 +02:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
params->ret = ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int preallocate(BlockDriverState *bs,
|
|
|
|
uint64_t offset, uint64_t new_length)
|
|
|
|
{
|
|
|
|
PreallocCo params = {
|
|
|
|
.bs = bs,
|
|
|
|
.offset = offset,
|
|
|
|
.new_length = new_length,
|
|
|
|
.ret = -EINPROGRESS,
|
|
|
|
};
|
|
|
|
|
2017-06-13 22:21:01 +02:00
|
|
|
if (qemu_in_coroutine()) {
|
2017-10-09 23:55:32 +02:00
|
|
|
preallocate_co(¶ms);
|
|
|
|
} else {
|
|
|
|
Coroutine *co = qemu_coroutine_create(preallocate_co, ¶ms);
|
|
|
|
bdrv_coroutine_enter(bs, co);
|
|
|
|
BDRV_POLL_WHILE(bs, params.ret == -EINPROGRESS);
|
2017-06-13 22:21:01 +02:00
|
|
|
}
|
2017-10-09 23:55:32 +02:00
|
|
|
return params.ret;
|
2009-08-17 15:50:10 +02:00
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:33 +01:00
|
|
|
/* qcow2_refcount_metadata_size:
|
|
|
|
* @clusters: number of clusters to refcount (including data and L1/L2 tables)
|
|
|
|
* @cluster_size: size of a cluster, in bytes
|
|
|
|
* @refcount_order: refcount bits power-of-2 exponent
|
2017-06-13 22:21:03 +02:00
|
|
|
* @generous_increase: allow for the refcount table to be 1.5x as large as it
|
|
|
|
* needs to be
|
2017-07-05 13:57:33 +01:00
|
|
|
*
|
|
|
|
* Returns: Number of bytes required for refcount blocks and table metadata.
|
|
|
|
*/
|
2017-06-13 22:21:03 +02:00
|
|
|
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
|
|
|
|
int refcount_order, bool generous_increase,
|
|
|
|
uint64_t *refblock_count)
|
2017-07-05 13:57:33 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Every host cluster is reference-counted, including metadata (even
|
|
|
|
* refcount metadata is recursively included).
|
|
|
|
*
|
|
|
|
* An accurate formula for the size of refcount metadata size is difficult
|
|
|
|
* to derive. An easier method of calculation is finding the fixed point
|
|
|
|
* where no further refcount blocks or table clusters are required to
|
|
|
|
* reference count every cluster.
|
|
|
|
*/
|
|
|
|
int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
|
|
|
|
int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
|
|
|
|
int64_t table = 0; /* number of refcount table clusters */
|
|
|
|
int64_t blocks = 0; /* number of refcount block clusters */
|
|
|
|
int64_t last;
|
|
|
|
int64_t n = 0;
|
|
|
|
|
|
|
|
do {
|
|
|
|
last = n;
|
|
|
|
blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
|
|
|
|
table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
|
|
|
|
n = clusters + blocks + table;
|
2017-06-13 22:21:03 +02:00
|
|
|
|
|
|
|
if (n == last && generous_increase) {
|
|
|
|
clusters += DIV_ROUND_UP(table, 2);
|
|
|
|
n = 0; /* force another loop */
|
|
|
|
generous_increase = false;
|
|
|
|
}
|
2017-07-05 13:57:33 +01:00
|
|
|
} while (n != last);
|
|
|
|
|
2017-06-13 22:21:03 +02:00
|
|
|
if (refblock_count) {
|
|
|
|
*refblock_count = blocks;
|
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:33 +01:00
|
|
|
return (blocks + table) * cluster_size;
|
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:32 +01:00
|
|
|
/**
|
|
|
|
* qcow2_calc_prealloc_size:
|
|
|
|
* @total_size: virtual disk size in bytes
|
|
|
|
* @cluster_size: cluster size in bytes
|
|
|
|
* @refcount_order: refcount bits power-of-2 exponent
|
|
|
|
*
|
|
|
|
* Returns: Total number of bytes required for the fully allocated image
|
|
|
|
* (including metadata).
|
|
|
|
*/
|
|
|
|
static int64_t qcow2_calc_prealloc_size(int64_t total_size,
|
|
|
|
size_t cluster_size,
|
|
|
|
int refcount_order)
|
|
|
|
{
|
|
|
|
int64_t meta_size = 0;
|
2017-07-05 13:57:33 +01:00
|
|
|
uint64_t nl1e, nl2e;
|
2017-07-05 13:57:32 +01:00
|
|
|
int64_t aligned_total_size = align_offset(total_size, cluster_size);
|
|
|
|
|
|
|
|
/* header: 1 cluster */
|
|
|
|
meta_size += cluster_size;
|
|
|
|
|
|
|
|
/* total size of L2 tables */
|
|
|
|
nl2e = aligned_total_size / cluster_size;
|
|
|
|
nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
|
|
|
|
meta_size += nl2e * sizeof(uint64_t);
|
|
|
|
|
|
|
|
/* total size of L1 tables */
|
|
|
|
nl1e = nl2e * sizeof(uint64_t) / cluster_size;
|
|
|
|
nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
|
|
|
|
meta_size += nl1e * sizeof(uint64_t);
|
|
|
|
|
2017-07-05 13:57:33 +01:00
|
|
|
/* total size of refcount table and blocks */
|
|
|
|
meta_size += qcow2_refcount_metadata_size(
|
|
|
|
(meta_size + aligned_total_size) / cluster_size,
|
2017-06-13 22:21:03 +02:00
|
|
|
cluster_size, refcount_order, false, NULL);
|
2017-07-05 13:57:32 +01:00
|
|
|
|
|
|
|
return meta_size + aligned_total_size;
|
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:34 +01:00
|
|
|
static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
|
2010-06-11 21:37:37 +02:00
|
|
|
{
|
2017-07-05 13:57:34 +01:00
|
|
|
size_t cluster_size;
|
2010-06-11 21:37:37 +02:00
|
|
|
int cluster_bits;
|
2015-08-26 19:47:48 +02:00
|
|
|
|
2017-07-05 13:57:34 +01:00
|
|
|
cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
|
|
|
|
DEFAULT_CLUSTER_SIZE);
|
2015-03-23 15:29:26 +00:00
|
|
|
cluster_bits = ctz32(cluster_size);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
|
|
|
|
(1 << cluster_bits) != cluster_size)
|
|
|
|
{
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "Cluster size must be a power of two between %d and "
|
|
|
|
"%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
|
2017-07-05 13:57:34 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return cluster_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
|
|
|
|
if (!buf) {
|
|
|
|
ret = 3; /* default */
|
|
|
|
} else if (!strcmp(buf, "0.10")) {
|
|
|
|
ret = 2;
|
|
|
|
} else if (!strcmp(buf, "1.1")) {
|
|
|
|
ret = 3;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Invalid compatibility level: '%s'", buf);
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
|
|
|
g_free(buf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
uint64_t refcount_bits;
|
|
|
|
|
|
|
|
refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
|
|
|
|
if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
|
|
|
|
error_setg(errp, "Refcount width must be a power of two and may not "
|
|
|
|
"exceed 64 bits");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (version < 3 && refcount_bits != 16) {
|
|
|
|
error_setg(errp, "Different refcount widths than 16 bits require "
|
|
|
|
"compatibility level 1.1 or above (use compat=1.1 or "
|
|
|
|
"greater)");
|
|
|
|
return 0;
|
2010-06-11 21:37:37 +02:00
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:34 +01:00
|
|
|
return refcount_bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_create2(const char *filename, int64_t total_size,
|
|
|
|
const char *backing_file, const char *backing_format,
|
|
|
|
int flags, size_t cluster_size, PreallocMode prealloc,
|
|
|
|
QemuOpts *opts, int version, int refcount_order,
|
|
|
|
const char *encryptfmt, Error **errp)
|
|
|
|
{
|
|
|
|
QDict *options;
|
|
|
|
|
2010-06-11 21:37:37 +02:00
|
|
|
/*
|
|
|
|
* Open the image file and write a minimal qcow2 header.
|
|
|
|
*
|
|
|
|
* We keep things simple and start with a zero-sized image. We also
|
|
|
|
* do without refcount blocks or a L1 table for now. We'll fix the
|
|
|
|
* inconsistency later.
|
|
|
|
*
|
|
|
|
* We do need a refcount table because growing the refcount table means
|
|
|
|
* allocating two new refcount blocks - the seconds of which would be at
|
|
|
|
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
|
|
|
|
* size for any qcow2 image.
|
|
|
|
*/
|
2016-03-08 15:57:05 +01:00
|
|
|
BlockBackend *blk;
|
2013-12-04 11:06:36 +01:00
|
|
|
QCowHeader *header;
|
2014-03-28 18:06:31 +01:00
|
|
|
uint64_t* refcount_table;
|
2013-09-05 09:40:43 +02:00
|
|
|
Error *local_err = NULL;
|
2010-06-11 21:37:37 +02:00
|
|
|
int ret;
|
|
|
|
|
2014-09-10 17:05:49 +08:00
|
|
|
if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
|
2017-07-05 13:57:32 +01:00
|
|
|
int64_t prealloc_size =
|
|
|
|
qcow2_calc_prealloc_size(total_size, cluster_size, refcount_order);
|
|
|
|
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, prealloc_size, &error_abort);
|
2017-08-24 10:46:08 +02:00
|
|
|
qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_str(prealloc),
|
2015-02-12 17:52:20 +01:00
|
|
|
&error_abort);
|
2014-09-10 17:05:49 +08:00
|
|
|
}
|
|
|
|
|
2014-06-05 17:21:11 +08:00
|
|
|
ret = bdrv_create_file(filename, opts, &local_err);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_propagate(errp, local_err);
|
2010-06-11 21:37:37 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-03-16 19:54:38 +01:00
|
|
|
blk = blk_new_open(filename, NULL, NULL,
|
2017-02-17 15:07:38 +01:00
|
|
|
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
|
|
|
|
&local_err);
|
2016-03-08 15:57:05 +01:00
|
|
|
if (blk == NULL) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_propagate(errp, local_err);
|
2016-03-08 15:57:05 +01:00
|
|
|
return -EIO;
|
2010-06-11 21:37:37 +02:00
|
|
|
}
|
|
|
|
|
2016-03-08 15:57:05 +01:00
|
|
|
blk_set_allow_write_beyond_eof(blk, true);
|
|
|
|
|
2010-06-11 21:37:37 +02:00
|
|
|
/* Write the header */
|
2013-12-04 11:06:36 +01:00
|
|
|
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
|
|
|
|
header = g_malloc0(cluster_size);
|
|
|
|
*header = (QCowHeader) {
|
|
|
|
.magic = cpu_to_be32(QCOW_MAGIC),
|
|
|
|
.version = cpu_to_be32(version),
|
2017-07-05 13:57:34 +01:00
|
|
|
.cluster_bits = cpu_to_be32(ctz32(cluster_size)),
|
2013-12-04 11:06:36 +01:00
|
|
|
.size = cpu_to_be64(0),
|
|
|
|
.l1_table_offset = cpu_to_be64(0),
|
|
|
|
.l1_size = cpu_to_be32(0),
|
|
|
|
.refcount_table_offset = cpu_to_be64(cluster_size),
|
|
|
|
.refcount_table_clusters = cpu_to_be32(1),
|
2015-02-18 17:40:46 -05:00
|
|
|
.refcount_order = cpu_to_be32(refcount_order),
|
2013-12-04 11:06:36 +01:00
|
|
|
.header_length = cpu_to_be32(sizeof(*header)),
|
|
|
|
};
|
2010-06-11 21:37:37 +02:00
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
/* We'll update this to correct value later */
|
|
|
|
header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
|
2010-06-11 21:37:37 +02:00
|
|
|
|
2012-07-27 09:05:22 +01:00
|
|
|
if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
|
2013-12-04 11:06:36 +01:00
|
|
|
header->compatible_features |=
|
2012-07-27 09:05:22 +01:00
|
|
|
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
|
|
|
|
}
|
|
|
|
|
2016-05-06 10:26:27 -06:00
|
|
|
ret = blk_pwrite(blk, 0, header, cluster_size, 0);
|
2013-12-04 11:06:36 +01:00
|
|
|
g_free(header);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not write qcow2 header");
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-03-28 18:06:31 +01:00
|
|
|
/* Write a refcount table with one refcount block */
|
|
|
|
refcount_table = g_malloc0(2 * cluster_size);
|
|
|
|
refcount_table[0] = cpu_to_be64(2 * cluster_size);
|
2016-05-06 10:26:27 -06:00
|
|
|
ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
|
2011-08-20 22:09:37 -05:00
|
|
|
g_free(refcount_table);
|
2010-06-11 21:37:37 +02:00
|
|
|
|
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not write refcount table");
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-03-08 15:57:05 +01:00
|
|
|
blk_unref(blk);
|
|
|
|
blk = NULL;
|
2010-06-11 21:37:37 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* And now open the image and make it consistent first (i.e. increase the
|
|
|
|
* refcount of the cluster that is occupied by the header and the refcount
|
|
|
|
* table)
|
|
|
|
*/
|
2015-08-26 19:47:48 +02:00
|
|
|
options = qdict_new();
|
2017-04-27 16:58:17 -05:00
|
|
|
qdict_put_str(options, "driver", "qcow2");
|
2016-03-16 19:54:38 +01:00
|
|
|
blk = blk_new_open(filename, NULL, options,
|
2017-02-17 15:07:38 +01:00
|
|
|
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
|
|
|
|
&local_err);
|
2016-03-08 15:57:05 +01:00
|
|
|
if (blk == NULL) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_propagate(errp, local_err);
|
2016-03-08 15:57:05 +01:00
|
|
|
ret = -EIO;
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-03-08 15:57:05 +01:00
|
|
|
ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
|
|
|
|
"header and refcount table");
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
} else if (ret != 0) {
|
|
|
|
error_report("Huh, first cluster in empty image is already in use?");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2015-12-02 18:34:39 +01:00
|
|
|
/* Create a full header (including things like feature table) */
|
2016-03-08 15:57:05 +01:00
|
|
|
ret = qcow2_update_header(blk_bs(blk));
|
2015-12-02 18:34:39 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not update qcow2 header");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2010-06-11 21:37:37 +02:00
|
|
|
/* Okay, now that we have a valid image, let's give it the right size */
|
2017-06-13 22:20:54 +02:00
|
|
|
ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (ret < 0) {
|
2017-03-28 22:51:27 +02:00
|
|
|
error_prepend(errp, "Could not resize image: ");
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Want a backing file? There you go.*/
|
|
|
|
if (backing_file) {
|
2016-03-08 15:57:05 +01:00
|
|
|
ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
|
|
|
|
"with format '%s'", backing_file, backing_format);
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
/* Want encryption? There you go. */
|
|
|
|
if (encryptfmt) {
|
|
|
|
ret = qcow2_set_up_encryption(blk_bs(blk), encryptfmt, opts, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-11 21:37:37 +02:00
|
|
|
/* And if we're supposed to preallocate metadata, do that now */
|
2014-09-10 17:05:49 +08:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
2017-06-13 22:21:00 +02:00
|
|
|
ret = preallocate(blk_bs(blk), 0, total_size);
|
2010-06-11 21:37:37 +02:00
|
|
|
if (ret < 0) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg_errno(errp, -ret, "Could not preallocate metadata");
|
2010-06-11 21:37:37 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-08 15:57:05 +01:00
|
|
|
blk_unref(blk);
|
|
|
|
blk = NULL;
|
2013-10-24 20:35:06 +02:00
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
|
|
|
|
* Using BDRV_O_NO_IO, since encryption is now setup we don't want to
|
|
|
|
* have to setup decryption context. We're not doing any I/O on the top
|
|
|
|
* level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
|
|
|
|
* not have effect.
|
|
|
|
*/
|
2015-08-26 19:47:48 +02:00
|
|
|
options = qdict_new();
|
2017-04-27 16:58:17 -05:00
|
|
|
qdict_put_str(options, "driver", "qcow2");
|
2016-03-16 19:54:38 +01:00
|
|
|
blk = blk_new_open(filename, NULL, options,
|
2017-06-23 17:24:10 +01:00
|
|
|
BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
|
|
|
|
&local_err);
|
2016-03-08 15:57:05 +01:00
|
|
|
if (blk == NULL) {
|
2013-10-24 20:35:06 +02:00
|
|
|
error_propagate(errp, local_err);
|
2016-03-08 15:57:05 +01:00
|
|
|
ret = -EIO;
|
2013-10-24 20:35:06 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2010-06-11 21:37:37 +02:00
|
|
|
ret = 0;
|
|
|
|
out:
|
2016-03-08 15:57:05 +01:00
|
|
|
if (blk) {
|
|
|
|
blk_unref(blk);
|
2014-02-18 18:33:05 +01:00
|
|
|
}
|
2010-06-11 21:37:37 +02:00
|
|
|
return ret;
|
|
|
|
}
|
2010-05-07 12:43:45 +02:00
|
|
|
|
2014-06-05 17:20:59 +08:00
|
|
|
static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
|
2010-05-07 12:43:45 +02:00
|
|
|
{
|
2014-06-05 17:20:59 +08:00
|
|
|
char *backing_file = NULL;
|
|
|
|
char *backing_fmt = NULL;
|
|
|
|
char *buf = NULL;
|
2014-09-10 17:05:46 +08:00
|
|
|
uint64_t size = 0;
|
2010-05-07 12:43:45 +02:00
|
|
|
int flags = 0;
|
2011-05-31 15:01:46 +02:00
|
|
|
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
|
2014-09-10 17:05:47 +08:00
|
|
|
PreallocMode prealloc;
|
2017-07-05 13:57:34 +01:00
|
|
|
int version;
|
|
|
|
uint64_t refcount_bits;
|
2015-02-18 17:40:46 -05:00
|
|
|
int refcount_order;
|
2017-07-14 11:31:05 +01:00
|
|
|
char *encryptfmt = NULL;
|
2013-09-05 09:40:43 +02:00
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
2010-05-07 12:43:45 +02:00
|
|
|
|
|
|
|
/* Read out options */
|
2014-09-10 17:05:46 +08:00
|
|
|
size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
|
|
|
|
BDRV_SECTOR_SIZE);
|
2014-06-05 17:20:59 +08:00
|
|
|
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
|
|
|
|
backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
|
2017-06-23 17:24:06 +01:00
|
|
|
encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
|
|
|
|
if (encryptfmt) {
|
2017-07-14 11:31:05 +01:00
|
|
|
if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) {
|
2017-06-23 17:24:06 +01:00
|
|
|
error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and "
|
|
|
|
BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
} else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
|
2017-07-14 11:31:05 +01:00
|
|
|
encryptfmt = g_strdup("aes");
|
2014-06-05 17:20:59 +08:00
|
|
|
}
|
2017-07-05 13:57:34 +01:00
|
|
|
cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
2014-06-05 17:20:59 +08:00
|
|
|
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
|
2017-08-24 10:46:10 +02:00
|
|
|
prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
|
2017-08-24 10:45:57 +02:00
|
|
|
PREALLOC_MODE_OFF, &local_err);
|
2014-09-10 17:05:47 +08:00
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
2014-06-05 17:20:59 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
2017-07-05 13:57:34 +01:00
|
|
|
|
|
|
|
version = qcow2_opt_get_version_del(opts, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
2014-06-05 17:20:59 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_LAZY_REFCOUNTS, false)) {
|
|
|
|
flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
|
2010-05-07 12:43:45 +02:00
|
|
|
}
|
|
|
|
|
2014-09-10 17:05:47 +08:00
|
|
|
if (backing_file && prealloc != PREALLOC_MODE_OFF) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "Backing file and preallocation cannot be used at "
|
|
|
|
"the same time");
|
2014-06-05 17:20:59 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
2010-05-07 12:43:45 +02:00
|
|
|
}
|
|
|
|
|
2012-07-27 09:05:22 +01:00
|
|
|
if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
|
2013-09-05 09:40:43 +02:00
|
|
|
error_setg(errp, "Lazy refcounts only supported with compatibility "
|
|
|
|
"level 1.1 and above (use compat=1.1 or greater)");
|
2014-06-05 17:20:59 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
2012-07-27 09:05:22 +01:00
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:34 +01:00
|
|
|
refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
2015-02-18 17:40:46 -05:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
2015-03-23 15:29:26 +00:00
|
|
|
refcount_order = ctz32(refcount_bits);
|
2015-02-18 17:40:46 -05:00
|
|
|
|
2014-09-10 17:05:46 +08:00
|
|
|
ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
|
2015-02-18 17:40:46 -05:00
|
|
|
cluster_size, prealloc, opts, version, refcount_order,
|
2017-06-23 17:24:06 +01:00
|
|
|
encryptfmt, &local_err);
|
2016-06-13 18:57:56 -03:00
|
|
|
error_propagate(errp, local_err);
|
2014-06-05 17:20:59 +08:00
|
|
|
|
|
|
|
finish:
|
|
|
|
g_free(backing_file);
|
|
|
|
g_free(backing_fmt);
|
2017-07-14 11:31:05 +01:00
|
|
|
g_free(encryptfmt);
|
2014-06-05 17:20:59 +08:00
|
|
|
g_free(buf);
|
2013-09-05 09:40:43 +02:00
|
|
|
return ret;
|
2010-05-07 12:43:45 +02:00
|
|
|
}
|
|
|
|
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2017-10-11 22:47:00 -05:00
|
|
|
static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
|
2016-05-11 10:00:14 +03:00
|
|
|
{
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
int64_t nr;
|
|
|
|
int res;
|
2017-10-11 22:47:00 -05:00
|
|
|
|
|
|
|
/* Clamp to image length, before checking status of underlying sectors */
|
2017-10-11 22:47:19 -05:00
|
|
|
if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
|
|
|
|
bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
|
qcow2: Optimize write zero of unaligned tail cluster
We've already improved discards to operate efficiently on the tail
of an unaligned qcow2 image; it's time to make a similar improvement
to write zeroes. The special case is only valid at the tail
cluster of a file, where we must recognize that any sectors beyond
the image end would implicitly read as zero, and therefore should
not penalize our logic for widening a partial cluster into writing
the whole cluster as zero.
However, note that for now, the special case of end-of-file is only
recognized if there is no backing file, or if the backing file has
the same length; that's because when the backing file is shorter
than the active layer, we don't have code in place to recognize
that reads of a sector unallocated at the top and beyond the backing
end-of-file are implicitly zero. It's not much of a real loss,
because most people don't use images that aren't cluster-aligned,
or where the active layer is a different size than the backing
layer (especially where the difference falls within a single cluster).
Update test 154 to cover the new scenarios, using two images of
intentionally differing length.
While at it, fix the test to gracefully skip when run as
./check -qcow2 -o compat=0.10 154
since the older format lacks zero clusters already required earlier
in the test.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-11-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-06 19:05:50 -05:00
|
|
|
}
|
|
|
|
|
2017-10-11 22:47:00 -05:00
|
|
|
if (!bytes) {
|
2016-05-25 21:48:49 -06:00
|
|
|
return true;
|
|
|
|
}
|
2017-10-11 22:47:19 -05:00
|
|
|
res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
|
2016-05-11 10:00:14 +03:00
|
|
|
}
|
|
|
|
|
2016-06-01 15:10:06 -06:00
|
|
|
static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
|
2017-06-09 13:18:08 +03:00
|
|
|
int64_t offset, int bytes, BdrvRequestFlags flags)
|
2012-03-20 15:12:58 +01:00
|
|
|
{
|
|
|
|
int ret;
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-03-20 15:12:58 +01:00
|
|
|
|
2016-06-01 15:10:06 -06:00
|
|
|
uint32_t head = offset % s->cluster_size;
|
2017-06-09 13:18:08 +03:00
|
|
|
uint32_t tail = (offset + bytes) % s->cluster_size;
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
|
|
|
|
if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
|
qcow2: Optimize write zero of unaligned tail cluster
We've already improved discards to operate efficiently on the tail
of an unaligned qcow2 image; it's time to make a similar improvement
to write zeroes. The special case is only valid at the tail
cluster of a file, where we must recognize that any sectors beyond
the image end would implicitly read as zero, and therefore should
not penalize our logic for widening a partial cluster into writing
the whole cluster as zero.
However, note that for now, the special case of end-of-file is only
recognized if there is no backing file, or if the backing file has
the same length; that's because when the backing file is shorter
than the active layer, we don't have code in place to recognize
that reads of a sector unallocated at the top and beyond the backing
end-of-file are implicitly zero. It's not much of a real loss,
because most people don't use images that aren't cluster-aligned,
or where the active layer is a different size than the backing
layer (especially where the difference falls within a single cluster).
Update test 154 to cover the new scenarios, using two images of
intentionally differing length.
While at it, fix the test to gracefully skip when run as
./check -qcow2 -o compat=0.10 154
since the older format lacks zero clusters already required earlier
in the test.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-11-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-06 19:05:50 -05:00
|
|
|
tail = 0;
|
|
|
|
}
|
2016-05-25 21:48:47 -06:00
|
|
|
|
2016-05-25 21:48:49 -06:00
|
|
|
if (head || tail) {
|
|
|
|
uint64_t off;
|
2016-05-31 16:13:07 +02:00
|
|
|
unsigned int nr;
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
assert(head + bytes <= s->cluster_size);
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2016-05-25 21:48:49 -06:00
|
|
|
/* check whether remainder of cluster already reads as zero */
|
2017-10-11 22:47:00 -05:00
|
|
|
if (!(is_zero(bs, offset - head, head) &&
|
|
|
|
is_zero(bs, offset + bytes,
|
|
|
|
tail ? s->cluster_size - tail : 0))) {
|
2016-05-11 10:00:14 +03:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
/* We can have new write after previous check */
|
2017-10-11 22:47:00 -05:00
|
|
|
offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
|
2017-06-09 13:18:08 +03:00
|
|
|
bytes = s->cluster_size;
|
2016-05-31 16:13:07 +02:00
|
|
|
nr = s->cluster_size;
|
2016-06-01 15:10:06 -06:00
|
|
|
ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
|
2017-05-06 19:05:46 -05:00
|
|
|
if (ret != QCOW2_CLUSTER_UNALLOCATED &&
|
|
|
|
ret != QCOW2_CLUSTER_ZERO_PLAIN &&
|
|
|
|
ret != QCOW2_CLUSTER_ZERO_ALLOC) {
|
2016-05-11 10:00:14 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2012-03-20 15:12:58 +01:00
|
|
|
}
|
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
|
2016-05-25 21:48:47 -06:00
|
|
|
|
2012-03-20 15:12:58 +01:00
|
|
|
/* Whatever is left can use real zero clusters */
|
2017-06-09 13:18:08 +03:00
|
|
|
ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
|
2012-03-20 15:12:58 +01:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-07-15 17:23:03 -06:00
|
|
|
static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
|
2017-06-09 13:18:08 +03:00
|
|
|
int64_t offset, int bytes)
|
2011-01-26 16:56:48 +01:00
|
|
|
{
|
2011-10-20 13:16:25 +02:00
|
|
|
int ret;
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2011-10-20 13:16:25 +02:00
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
|
|
|
|
assert(bytes < s->cluster_size);
|
2017-04-06 20:37:09 -05:00
|
|
|
/* Ignore partial clusters, except for the special case of the
|
|
|
|
* complete partial cluster at the end of an unaligned file */
|
|
|
|
if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
|
2017-06-09 13:18:08 +03:00
|
|
|
offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
|
2017-04-06 20:37:09 -05:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2016-11-17 14:13:57 -06:00
|
|
|
}
|
|
|
|
|
2011-10-20 13:16:25 +02:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2017-06-09 13:18:08 +03:00
|
|
|
ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-06 19:05:52 -05:00
|
|
|
false);
|
2011-10-20 13:16:25 +02:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return ret;
|
2011-01-26 16:56:48 +01:00
|
|
|
}
|
|
|
|
|
2017-06-13 22:20:52 +02:00
|
|
|
static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
|
|
|
|
PreallocMode prealloc, Error **errp)
|
2010-04-28 11:36:11 +01:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2017-06-13 22:21:02 +02:00
|
|
|
uint64_t old_length;
|
2013-05-14 16:14:33 +02:00
|
|
|
int64_t new_l1_size;
|
|
|
|
int ret;
|
2010-04-28 11:36:11 +01:00
|
|
|
|
2017-06-13 22:21:05 +02:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
|
|
|
|
prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
|
|
|
|
{
|
2017-06-13 22:20:52 +02:00
|
|
|
error_setg(errp, "Unsupported preallocation mode '%s'",
|
2017-08-24 10:46:08 +02:00
|
|
|
PreallocMode_str(prealloc));
|
2017-06-13 22:20:52 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2010-04-28 11:36:11 +01:00
|
|
|
if (offset & 511) {
|
2017-03-28 22:51:28 +02:00
|
|
|
error_setg(errp, "The new size must be a multiple of 512");
|
2010-04-28 11:36:11 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* cannot proceed if image has snapshots */
|
|
|
|
if (s->nb_snapshots) {
|
2017-03-28 22:51:28 +02:00
|
|
|
error_setg(errp, "Can't resize an image which has snapshots");
|
2010-04-28 11:36:11 +01:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2017-06-28 15:05:08 +03:00
|
|
|
/* cannot proceed if image has bitmaps */
|
|
|
|
if (s->nb_bitmaps) {
|
|
|
|
/* TODO: resize bitmaps in the image */
|
|
|
|
error_setg(errp, "Can't resize an image which has bitmaps");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2017-06-13 22:21:02 +02:00
|
|
|
old_length = bs->total_sectors * 512;
|
2017-09-18 15:42:29 +03:00
|
|
|
new_l1_size = size_to_l1(s, offset);
|
2017-06-13 22:21:02 +02:00
|
|
|
|
|
|
|
if (offset < old_length) {
|
2017-09-29 15:16:13 +03:00
|
|
|
int64_t last_cluster, old_file_size;
|
2017-09-18 15:42:29 +03:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Preallocation can't be used for shrinking an image");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2010-04-28 11:36:11 +01:00
|
|
|
|
2017-09-18 15:42:29 +03:00
|
|
|
ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
|
|
|
|
old_length - ROUND_UP(offset,
|
|
|
|
s->cluster_size),
|
|
|
|
QCOW2_DISCARD_ALWAYS, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_shrink_l1_table(bs, new_l1_size);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to reduce the number of L2 tables");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_shrink_reftable(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to discard unused refblocks");
|
|
|
|
return ret;
|
|
|
|
}
|
2017-09-29 15:16:13 +03:00
|
|
|
|
|
|
|
old_file_size = bdrv_getlength(bs->file->bs);
|
|
|
|
if (old_file_size < 0) {
|
|
|
|
error_setg_errno(errp, -old_file_size,
|
|
|
|
"Failed to inquire current file length");
|
|
|
|
return old_file_size;
|
|
|
|
}
|
|
|
|
last_cluster = qcow2_get_last_cluster(bs, old_file_size);
|
|
|
|
if (last_cluster < 0) {
|
|
|
|
error_setg_errno(errp, -last_cluster,
|
|
|
|
"Failed to find the last cluster");
|
|
|
|
return last_cluster;
|
|
|
|
}
|
|
|
|
if ((last_cluster + 1) * s->cluster_size < old_file_size) {
|
2017-10-09 17:54:31 +02:00
|
|
|
Error *local_err = NULL;
|
|
|
|
|
|
|
|
bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
|
|
|
|
PREALLOC_MODE_OFF, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
warn_reportf_err(local_err,
|
|
|
|
"Failed to truncate the tail of the image: ");
|
2017-09-29 15:16:13 +03:00
|
|
|
}
|
|
|
|
}
|
2017-09-18 15:42:29 +03:00
|
|
|
} else {
|
|
|
|
ret = qcow2_grow_l1_table(bs, new_l1_size, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to grow the L1 table");
|
|
|
|
return ret;
|
|
|
|
}
|
2010-04-28 11:36:11 +01:00
|
|
|
}
|
|
|
|
|
2017-06-13 22:21:02 +02:00
|
|
|
switch (prealloc) {
|
|
|
|
case PREALLOC_MODE_OFF:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PREALLOC_MODE_METADATA:
|
|
|
|
ret = preallocate(bs, old_length, offset);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Preallocation failed");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2017-06-13 22:21:05 +02:00
|
|
|
case PREALLOC_MODE_FALLOC:
|
|
|
|
case PREALLOC_MODE_FULL:
|
|
|
|
{
|
|
|
|
int64_t allocation_start, host_offset, guest_offset;
|
|
|
|
int64_t clusters_allocated;
|
|
|
|
int64_t old_file_size, new_file_size;
|
|
|
|
uint64_t nb_new_data_clusters, nb_new_l2_tables;
|
|
|
|
|
|
|
|
old_file_size = bdrv_getlength(bs->file->bs);
|
|
|
|
if (old_file_size < 0) {
|
|
|
|
error_setg_errno(errp, -old_file_size,
|
|
|
|
"Failed to inquire current file length");
|
2017-09-29 15:16:12 +03:00
|
|
|
return old_file_size;
|
2017-06-13 22:21:05 +02:00
|
|
|
}
|
2017-10-09 23:55:31 +02:00
|
|
|
old_file_size = ROUND_UP(old_file_size, s->cluster_size);
|
2017-06-13 22:21:05 +02:00
|
|
|
|
|
|
|
nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
|
|
|
|
s->cluster_size);
|
|
|
|
|
|
|
|
/* This is an overestimation; we will not actually allocate space for
|
|
|
|
* these in the file but just make sure the new refcount structures are
|
|
|
|
* able to cover them so we will not have to allocate new refblocks
|
|
|
|
* while entering the data blocks in the potentially new L2 tables.
|
|
|
|
* (We do not actually care where the L2 tables are placed. Maybe they
|
|
|
|
* are already allocated or they can be placed somewhere before
|
|
|
|
* @old_file_size. It does not matter because they will be fully
|
|
|
|
* allocated automatically, so they do not need to be covered by the
|
|
|
|
* preallocation. All that matters is that we will not have to allocate
|
|
|
|
* new refcount structures for them.) */
|
|
|
|
nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
|
|
|
|
s->cluster_size / sizeof(uint64_t));
|
|
|
|
/* The cluster range may not be aligned to L2 boundaries, so add one L2
|
|
|
|
* table for a potential head/tail */
|
|
|
|
nb_new_l2_tables++;
|
|
|
|
|
|
|
|
allocation_start = qcow2_refcount_area(bs, old_file_size,
|
|
|
|
nb_new_data_clusters +
|
|
|
|
nb_new_l2_tables,
|
|
|
|
true, 0, 0);
|
|
|
|
if (allocation_start < 0) {
|
|
|
|
error_setg_errno(errp, -allocation_start,
|
|
|
|
"Failed to resize refcount structures");
|
2017-09-29 15:16:12 +03:00
|
|
|
return allocation_start;
|
2017-06-13 22:21:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
|
|
|
|
nb_new_data_clusters);
|
|
|
|
if (clusters_allocated < 0) {
|
|
|
|
error_setg_errno(errp, -clusters_allocated,
|
|
|
|
"Failed to allocate data clusters");
|
|
|
|
return -clusters_allocated;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(clusters_allocated == nb_new_data_clusters);
|
|
|
|
|
|
|
|
/* Allocate the data area */
|
|
|
|
new_file_size = allocation_start +
|
|
|
|
nb_new_data_clusters * s->cluster_size;
|
|
|
|
ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_prepend(errp, "Failed to resize underlying file: ");
|
|
|
|
qcow2_free_clusters(bs, allocation_start,
|
|
|
|
nb_new_data_clusters * s->cluster_size,
|
|
|
|
QCOW2_DISCARD_OTHER);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create the necessary L2 entries */
|
|
|
|
host_offset = allocation_start;
|
|
|
|
guest_offset = old_length;
|
|
|
|
while (nb_new_data_clusters) {
|
|
|
|
int64_t guest_cluster = guest_offset >> s->cluster_bits;
|
|
|
|
int64_t nb_clusters = MIN(nb_new_data_clusters,
|
|
|
|
s->l2_size - guest_cluster % s->l2_size);
|
|
|
|
QCowL2Meta allocation = {
|
|
|
|
.offset = guest_offset,
|
|
|
|
.alloc_offset = host_offset,
|
|
|
|
.nb_clusters = nb_clusters,
|
|
|
|
};
|
|
|
|
qemu_co_queue_init(&allocation.dependent_requests);
|
|
|
|
|
|
|
|
ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to update L2 tables");
|
|
|
|
qcow2_free_clusters(bs, host_offset,
|
|
|
|
nb_new_data_clusters * s->cluster_size,
|
|
|
|
QCOW2_DISCARD_OTHER);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
guest_offset += nb_clusters * s->cluster_size;
|
|
|
|
host_offset += nb_clusters * s->cluster_size;
|
|
|
|
nb_new_data_clusters -= nb_clusters;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-06-13 22:21:02 +02:00
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
|
|
/* Flush metadata before actually changing the image size */
|
|
|
|
ret = bdrv_flush(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to flush the preallocated area to disk");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-04-28 11:36:11 +01:00
|
|
|
/* write updated header.size */
|
|
|
|
offset = cpu_to_be64(offset);
|
2016-06-20 20:09:15 +02:00
|
|
|
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
|
2010-06-16 17:44:35 +02:00
|
|
|
&offset, sizeof(uint64_t));
|
2010-04-28 11:36:11 +01:00
|
|
|
if (ret < 0) {
|
2017-03-28 22:51:29 +02:00
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image size");
|
2010-04-28 11:36:11 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->l1_vm_state_index = new_l1_size;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-23 20:19:47 +00:00
|
|
|
/* XXX: put compressed sectors first, then all the cluster aligned
|
|
|
|
tables to avoid losing bytes in alignment */
|
2016-07-22 11:17:43 +03:00
|
|
|
static coroutine_fn int
|
|
|
|
qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov)
|
2010-04-23 20:19:47 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2016-07-22 11:17:43 +03:00
|
|
|
QEMUIOVector hd_qiov;
|
|
|
|
struct iovec iov;
|
2010-04-23 20:19:47 +00:00
|
|
|
z_stream strm;
|
|
|
|
int ret, out_len;
|
2016-07-22 11:17:43 +03:00
|
|
|
uint8_t *buf, *out_buf;
|
2017-08-09 15:38:08 -05:00
|
|
|
int64_t cluster_offset;
|
2010-04-23 20:19:47 +00:00
|
|
|
|
2016-07-22 11:17:43 +03:00
|
|
|
if (bytes == 0) {
|
2010-04-23 20:19:47 +00:00
|
|
|
/* align end of file to a sector boundary to ease reading with
|
|
|
|
sector based I/Os */
|
2015-06-16 14:19:22 +02:00
|
|
|
cluster_offset = bdrv_getlength(bs->file->bs);
|
2017-08-09 15:38:08 -05:00
|
|
|
if (cluster_offset < 0) {
|
|
|
|
return cluster_offset;
|
|
|
|
}
|
2017-06-13 22:20:53 +02:00
|
|
|
return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL);
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
|
2016-07-22 11:17:44 +03:00
|
|
|
buf = qemu_blockalign(bs, s->cluster_size);
|
2016-07-22 11:17:43 +03:00
|
|
|
if (bytes != s->cluster_size) {
|
2016-07-22 11:17:44 +03:00
|
|
|
if (bytes > s->cluster_size ||
|
|
|
|
offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
|
2016-07-22 11:17:43 +03:00
|
|
|
{
|
2016-07-22 11:17:44 +03:00
|
|
|
qemu_vfree(buf);
|
|
|
|
return -EINVAL;
|
2013-04-15 17:17:31 +02:00
|
|
|
}
|
2016-07-22 11:17:44 +03:00
|
|
|
/* Zero-pad last write if image size is not cluster aligned */
|
|
|
|
memset(buf + bytes, 0, s->cluster_size - bytes);
|
2013-04-15 17:17:31 +02:00
|
|
|
}
|
2016-08-15 12:39:22 +03:00
|
|
|
qemu_iovec_to_buf(qiov, 0, buf, bytes);
|
2010-04-23 20:19:47 +00:00
|
|
|
|
2016-07-14 19:59:25 +03:00
|
|
|
out_buf = g_malloc(s->cluster_size);
|
2010-04-23 20:19:47 +00:00
|
|
|
|
|
|
|
/* best compression, small window, no zlib header */
|
|
|
|
memset(&strm, 0, sizeof(strm));
|
|
|
|
ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
|
|
|
|
Z_DEFLATED, -12,
|
|
|
|
9, Z_DEFAULT_STRATEGY);
|
|
|
|
if (ret != 0) {
|
2011-10-18 17:12:44 +02:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
strm.avail_in = s->cluster_size;
|
|
|
|
strm.next_in = (uint8_t *)buf;
|
|
|
|
strm.avail_out = s->cluster_size;
|
|
|
|
strm.next_out = out_buf;
|
|
|
|
|
|
|
|
ret = deflate(&strm, Z_FINISH);
|
|
|
|
if (ret != Z_STREAM_END && ret != Z_OK) {
|
|
|
|
deflateEnd(&strm);
|
2011-10-18 17:12:44 +02:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
out_len = strm.next_out - out_buf;
|
|
|
|
|
|
|
|
deflateEnd(&strm);
|
|
|
|
|
|
|
|
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
|
|
|
|
/* could not compress: write normal cluster */
|
2016-07-22 11:17:43 +03:00
|
|
|
ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
|
2011-10-18 17:12:44 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2016-07-22 11:17:43 +03:00
|
|
|
goto success;
|
|
|
|
}
|
2013-08-30 14:34:26 +02:00
|
|
|
|
2016-07-22 11:17:43 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
cluster_offset =
|
|
|
|
qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
|
|
|
|
if (!cluster_offset) {
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
cluster_offset &= s->cluster_offset_mask;
|
2013-08-30 14:34:26 +02:00
|
|
|
|
2016-07-22 11:17:43 +03:00
|
|
|
ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
|
2016-07-22 11:17:43 +03:00
|
|
|
iov = (struct iovec) {
|
|
|
|
.iov_base = out_buf,
|
|
|
|
.iov_len = out_len,
|
|
|
|
};
|
|
|
|
qemu_iovec_init_external(&hd_qiov, &iov, 1);
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
|
|
|
|
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
success:
|
2011-10-18 17:12:44 +02:00
|
|
|
ret = 0;
|
|
|
|
fail:
|
2016-07-22 11:17:43 +03:00
|
|
|
qemu_vfree(buf);
|
2011-08-20 22:09:37 -05:00
|
|
|
g_free(out_buf);
|
2011-10-18 17:12:44 +02:00
|
|
|
return ret;
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
|
2014-10-24 15:57:32 +02:00
|
|
|
static int make_completely_empty(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2017-03-28 22:51:27 +02:00
|
|
|
Error *local_err = NULL;
|
2014-10-24 15:57:32 +02:00
|
|
|
int ret, l1_clusters;
|
|
|
|
int64_t offset;
|
|
|
|
uint64_t *new_reftable = NULL;
|
|
|
|
uint64_t rt_entry, l1_size2;
|
|
|
|
struct {
|
|
|
|
uint64_t l1_offset;
|
|
|
|
uint64_t reftable_offset;
|
|
|
|
uint32_t reftable_clusters;
|
|
|
|
} QEMU_PACKED l1_ofs_rt_ofs_cls;
|
|
|
|
|
|
|
|
ret = qcow2_cache_empty(bs, s->l2_table_cache);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_cache_empty(bs, s->refcount_block_cache);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Refcounts will be broken utterly */
|
|
|
|
ret = qcow2_mark_dirty(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
|
|
|
|
|
|
|
|
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
|
|
|
|
l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
|
|
|
|
|
|
|
|
/* After this call, neither the in-memory nor the on-disk refcount
|
|
|
|
* information accurately describe the actual references */
|
|
|
|
|
2016-06-16 15:13:15 +02:00
|
|
|
ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
|
2016-06-01 15:10:04 -06:00
|
|
|
l1_clusters * s->cluster_size, 0);
|
2014-10-24 15:57:32 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
memset(s->l1_table, 0, l1_size2);
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
|
|
|
|
|
|
|
|
/* Overwrite enough clusters at the beginning of the sectors to place
|
|
|
|
* the refcount table, a refcount block and the L1 table in; this may
|
|
|
|
* overwrite parts of the existing refcount and L1 table, which is not
|
|
|
|
* an issue because the dirty flag is set, complete data loss is in fact
|
|
|
|
* desired and partial data loss is consequently fine as well */
|
2016-06-16 15:13:15 +02:00
|
|
|
ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
|
2016-06-01 15:10:04 -06:00
|
|
|
(2 + l1_clusters) * s->cluster_size, 0);
|
2014-10-24 15:57:32 +02:00
|
|
|
/* This call (even if it failed overall) may have overwritten on-disk
|
|
|
|
* refcount structures; in that case, the in-memory refcount information
|
|
|
|
* will probably differ from the on-disk information which makes the BDS
|
|
|
|
* unusable */
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
|
|
|
|
|
|
|
|
/* "Create" an empty reftable (one cluster) directly after the image
|
|
|
|
* header and an empty L1 table three clusters after the image header;
|
|
|
|
* the cluster between those two will be used as the first refblock */
|
2016-06-16 17:06:17 +01:00
|
|
|
l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
|
|
|
|
l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
|
|
|
|
l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
|
2016-06-20 20:09:15 +02:00
|
|
|
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
|
2014-10-24 15:57:32 +02:00
|
|
|
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->l1_table_offset = 3 * s->cluster_size;
|
|
|
|
|
|
|
|
new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
|
|
|
|
if (!new_reftable) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->refcount_table_offset = s->cluster_size;
|
|
|
|
s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
|
2017-02-01 14:38:28 +02:00
|
|
|
s->max_refcount_table_index = 0;
|
2014-10-24 15:57:32 +02:00
|
|
|
|
|
|
|
g_free(s->refcount_table);
|
|
|
|
s->refcount_table = new_reftable;
|
|
|
|
new_reftable = NULL;
|
|
|
|
|
|
|
|
/* Now the in-memory refcount information again corresponds to the on-disk
|
|
|
|
* information (reftable is empty and no refblocks (the refblock cache is
|
|
|
|
* empty)); however, this means some clusters (e.g. the image header) are
|
|
|
|
* referenced, but not refcounted, but the normal qcow2 code assumes that
|
|
|
|
* the in-memory information is always correct */
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
|
|
|
|
|
|
|
|
/* Enter the first refblock into the reftable */
|
|
|
|
rt_entry = cpu_to_be64(2 * s->cluster_size);
|
2016-06-20 20:09:15 +02:00
|
|
|
ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
|
2014-10-24 15:57:32 +02:00
|
|
|
&rt_entry, sizeof(rt_entry));
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
s->refcount_table[0] = 2 * s->cluster_size;
|
|
|
|
|
|
|
|
s->free_cluster_index = 0;
|
|
|
|
assert(3 + l1_clusters <= s->refcount_block_size);
|
|
|
|
offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
|
|
|
|
if (offset < 0) {
|
|
|
|
ret = offset;
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
} else if (offset > 0) {
|
|
|
|
error_report("First cluster in emptied image is in use");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now finally the in-memory information corresponds to the on-disk
|
|
|
|
* structures and is correct */
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2017-03-28 22:51:27 +02:00
|
|
|
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
|
2017-06-13 22:20:53 +02:00
|
|
|
PREALLOC_MODE_OFF, &local_err);
|
2014-10-24 15:57:32 +02:00
|
|
|
if (ret < 0) {
|
2017-03-28 22:51:27 +02:00
|
|
|
error_report_err(local_err);
|
2014-10-24 15:57:32 +02:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fail_broken_refcounts:
|
|
|
|
/* The BDS is unusable at this point. If we wanted to make it usable, we
|
|
|
|
* would have to call qcow2_refcount_close(), qcow2_refcount_init(),
|
|
|
|
* qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
|
|
|
|
* again. However, because the functions which could have caused this error
|
|
|
|
* path to be taken are used by those functions as well, it's very likely
|
|
|
|
* that that sequence will fail as well. Therefore, just eject the BDS. */
|
|
|
|
bs->drv = NULL;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
g_free(new_reftable);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-10-24 15:57:31 +02:00
|
|
|
static int qcow2_make_empty(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-06 19:05:52 -05:00
|
|
|
uint64_t offset, end_offset;
|
|
|
|
int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
|
2014-10-24 15:57:32 +02:00
|
|
|
int l1_clusters, ret = 0;
|
|
|
|
|
|
|
|
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
|
|
|
|
|
|
|
|
if (s->qcow_version >= 3 && !s->snapshots &&
|
|
|
|
3 + l1_clusters <= s->refcount_block_size) {
|
|
|
|
/* The following function only works for qcow2 v3 images (it requires
|
|
|
|
* the dirty flag) and only as long as there are no snapshots (because
|
|
|
|
* it completely empties the image). Furthermore, the L1 table and three
|
|
|
|
* additional clusters (image header, refcount table, one refcount
|
|
|
|
* block) have to fit inside one refcount block. */
|
|
|
|
return make_completely_empty(bs);
|
|
|
|
}
|
2014-10-24 15:57:31 +02:00
|
|
|
|
2014-10-24 15:57:32 +02:00
|
|
|
/* This fallback code simply discards every active cluster; this is slow,
|
|
|
|
* but works in all cases */
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-06 19:05:52 -05:00
|
|
|
end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
|
|
|
|
for (offset = 0; offset < end_offset; offset += step) {
|
2014-10-24 15:57:31 +02:00
|
|
|
/* As this function is generally used after committing an external
|
|
|
|
* snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
|
|
|
|
* default action for this kind of discard is to pass the discard,
|
|
|
|
* which will ideally result in an actually smaller image file, as
|
|
|
|
* is probably desired. */
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-06 19:05:52 -05:00
|
|
|
ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
|
|
|
|
QCOW2_DISCARD_SNAPSHOT, true);
|
2014-10-24 15:57:31 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-11-10 16:23:22 +08:00
|
|
|
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
|
2010-04-23 20:19:47 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2011-01-10 17:17:28 +01:00
|
|
|
int ret;
|
|
|
|
|
2011-10-20 13:16:24 +02:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2016-06-02 18:58:15 +03:00
|
|
|
ret = qcow2_cache_write(bs, s->l2_table_cache);
|
2011-01-10 17:17:28 +01:00
|
|
|
if (ret < 0) {
|
2011-10-27 17:22:28 +08:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2011-10-20 13:16:24 +02:00
|
|
|
return ret;
|
2011-01-10 17:17:28 +01:00
|
|
|
}
|
|
|
|
|
2012-07-27 09:05:22 +01:00
|
|
|
if (qcow2_need_accurate_refcounts(s)) {
|
2016-06-02 18:58:15 +03:00
|
|
|
ret = qcow2_cache_write(bs, s->refcount_block_cache);
|
2012-07-27 09:05:22 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
2011-01-10 17:17:28 +01:00
|
|
|
}
|
2011-10-20 13:16:24 +02:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2011-01-10 17:17:28 +01:00
|
|
|
|
2011-11-10 18:10:11 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-05 13:57:35 +01:00
|
|
|
static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
Error *local_err = NULL;
|
|
|
|
BlockMeasureInfo *info;
|
|
|
|
uint64_t required = 0; /* bytes that contribute to required size */
|
|
|
|
uint64_t virtual_size; /* disk size as seen by guest */
|
|
|
|
uint64_t refcount_bits;
|
|
|
|
uint64_t l2_tables;
|
|
|
|
size_t cluster_size;
|
|
|
|
int version;
|
|
|
|
char *optstr;
|
|
|
|
PreallocMode prealloc;
|
|
|
|
bool has_backing_file;
|
|
|
|
|
|
|
|
/* Parse image creation options */
|
|
|
|
cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
version = qcow2_opt_get_version_del(opts, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
|
2017-08-24 10:46:10 +02:00
|
|
|
prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
|
2017-08-24 10:45:57 +02:00
|
|
|
PREALLOC_MODE_OFF, &local_err);
|
2017-07-05 13:57:35 +01:00
|
|
|
g_free(optstr);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
|
|
|
|
has_backing_file = !!optstr;
|
|
|
|
g_free(optstr);
|
|
|
|
|
|
|
|
virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
|
|
|
|
cluster_size);
|
|
|
|
|
|
|
|
/* Check that virtual disk size is valid */
|
|
|
|
l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
|
|
|
|
cluster_size / sizeof(uint64_t));
|
|
|
|
if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
|
|
|
|
error_setg(&local_err, "The image size is too large "
|
|
|
|
"(try using a larger cluster size)");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Account for input image */
|
|
|
|
if (in_bs) {
|
|
|
|
int64_t ssize = bdrv_getlength(in_bs);
|
|
|
|
if (ssize < 0) {
|
|
|
|
error_setg_errno(&local_err, -ssize,
|
|
|
|
"Unable to get image virtual_size");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual_size = align_offset(ssize, cluster_size);
|
|
|
|
|
|
|
|
if (has_backing_file) {
|
|
|
|
/* We don't how much of the backing chain is shared by the input
|
|
|
|
* image and the new image file. In the worst case the new image's
|
|
|
|
* backing file has nothing in common with the input image. Be
|
|
|
|
* conservative and assume all clusters need to be written.
|
|
|
|
*/
|
|
|
|
required = virtual_size;
|
|
|
|
} else {
|
2017-09-25 09:55:22 -05:00
|
|
|
int64_t offset;
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
int64_t pnum = 0;
|
2017-07-05 13:57:35 +01:00
|
|
|
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
for (offset = 0; offset < ssize; offset += pnum) {
|
|
|
|
int ret;
|
2017-07-05 13:57:35 +01:00
|
|
|
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
ret = bdrv_block_status_above(in_bs, NULL, offset,
|
|
|
|
ssize - offset, &pnum, NULL,
|
|
|
|
NULL);
|
2017-07-05 13:57:35 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(&local_err, -ret,
|
|
|
|
"Unable to get block status");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret & BDRV_BLOCK_ZERO) {
|
|
|
|
/* Skip zero regions (safe with no backing file) */
|
|
|
|
} else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
|
|
|
|
(BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
|
|
|
|
/* Extend pnum to end of cluster for next iteration */
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
|
2017-07-05 13:57:35 +01:00
|
|
|
|
|
|
|
/* Count clusters we've seen */
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-11 22:47:08 -05:00
|
|
|
required += offset % cluster_size + pnum;
|
2017-07-05 13:57:35 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Take into account preallocation. Nothing special is needed for
|
|
|
|
* PREALLOC_MODE_METADATA since metadata is always counted.
|
|
|
|
*/
|
|
|
|
if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
|
|
|
|
required = virtual_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
info = g_new(BlockMeasureInfo, 1);
|
|
|
|
info->fully_allocated =
|
|
|
|
qcow2_calc_prealloc_size(virtual_size, cluster_size,
|
|
|
|
ctz32(refcount_bits));
|
|
|
|
|
|
|
|
/* Remove data clusters that are not required. This overestimates the
|
|
|
|
* required size because metadata needed for the fully allocated file is
|
|
|
|
* still counted.
|
|
|
|
*/
|
|
|
|
info->required = info->fully_allocated - virtual_size + required;
|
|
|
|
return info;
|
|
|
|
|
|
|
|
err:
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2010-12-17 16:02:39 +01:00
|
|
|
static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
2010-04-23 20:19:47 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-11-22 13:39:50 +01:00
|
|
|
bdi->unallocated_blocks_are_zero = true;
|
|
|
|
bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
|
2010-04-23 20:19:47 +00:00
|
|
|
bdi->cluster_size = s->cluster_size;
|
2010-12-17 16:02:39 +01:00
|
|
|
bdi->vm_state_offset = qcow2_vm_state_offset(s);
|
2010-04-23 20:19:47 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-10-09 10:46:18 +02:00
|
|
|
static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2017-06-23 17:24:18 +01:00
|
|
|
ImageInfoSpecific *spec_info;
|
|
|
|
QCryptoBlockInfo *encrypt_info = NULL;
|
2013-10-09 10:46:18 +02:00
|
|
|
|
2017-06-23 17:24:18 +01:00
|
|
|
if (s->crypto != NULL) {
|
|
|
|
encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort);
|
|
|
|
}
|
|
|
|
|
|
|
|
spec_info = g_new(ImageInfoSpecific, 1);
|
2013-10-09 10:46:18 +02:00
|
|
|
*spec_info = (ImageInfoSpecific){
|
2015-10-26 16:34:54 -06:00
|
|
|
.type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
|
qapi: Don't special-case simple union wrappers
Simple unions were carrying a special case that hid their 'data'
QMP member from the resulting C struct, via the hack method
QAPISchemaObjectTypeVariant.simple_union_type(). But by using
the work we started by unboxing flat union and alternate
branches, coupled with the ability to visit the members of an
implicit type, we can now expose the simple union's implicit
type in qapi-types.h:
| struct q_obj_ImageInfoSpecificQCow2_wrapper {
| ImageInfoSpecificQCow2 *data;
| };
|
| struct q_obj_ImageInfoSpecificVmdk_wrapper {
| ImageInfoSpecificVmdk *data;
| };
...
| struct ImageInfoSpecific {
| ImageInfoSpecificKind type;
| union { /* union tag is @type */
| void *data;
|- ImageInfoSpecificQCow2 *qcow2;
|- ImageInfoSpecificVmdk *vmdk;
|+ q_obj_ImageInfoSpecificQCow2_wrapper qcow2;
|+ q_obj_ImageInfoSpecificVmdk_wrapper vmdk;
| } u;
| };
Doing this removes asymmetry between QAPI's QMP side and its
C side (both sides now expose 'data'), and means that the
treatment of a simple union as sugar for a flat union is now
equivalent in both languages (previously the two approaches used
a different layer of dereferencing, where the simple union could
be converted to a flat union with equivalent C layout but
different {} on the wire, or to an equivalent QMP wire form
but with different C representation). Using the implicit type
also lets us get rid of the simple_union_type() hack.
Of course, now all clients of simple unions have to adjust from
using su->u.member to using su->u.member.data; while this touches
a number of files in the tree, some earlier cleanup patches
helped minimize the change to the initialization of a temporary
variable rather than every single member access. The generated
qapi-visit.c code is also affected by the layout change:
|@@ -7393,10 +7393,10 @@ void visit_type_ImageInfoSpecific_member
| }
| switch (obj->type) {
| case IMAGE_INFO_SPECIFIC_KIND_QCOW2:
|- visit_type_ImageInfoSpecificQCow2(v, "data", &obj->u.qcow2, &err);
|+ visit_type_q_obj_ImageInfoSpecificQCow2_wrapper_members(v, &obj->u.qcow2, &err);
| break;
| case IMAGE_INFO_SPECIFIC_KIND_VMDK:
|- visit_type_ImageInfoSpecificVmdk(v, "data", &obj->u.vmdk, &err);
|+ visit_type_q_obj_ImageInfoSpecificVmdk_wrapper_members(v, &obj->u.vmdk, &err);
| break;
| default:
| abort();
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1458254921-17042-13-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-17 16:48:37 -06:00
|
|
|
.u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
|
2013-10-09 10:46:18 +02:00
|
|
|
};
|
|
|
|
if (s->qcow_version == 2) {
|
qapi: Don't special-case simple union wrappers
Simple unions were carrying a special case that hid their 'data'
QMP member from the resulting C struct, via the hack method
QAPISchemaObjectTypeVariant.simple_union_type(). But by using
the work we started by unboxing flat union and alternate
branches, coupled with the ability to visit the members of an
implicit type, we can now expose the simple union's implicit
type in qapi-types.h:
| struct q_obj_ImageInfoSpecificQCow2_wrapper {
| ImageInfoSpecificQCow2 *data;
| };
|
| struct q_obj_ImageInfoSpecificVmdk_wrapper {
| ImageInfoSpecificVmdk *data;
| };
...
| struct ImageInfoSpecific {
| ImageInfoSpecificKind type;
| union { /* union tag is @type */
| void *data;
|- ImageInfoSpecificQCow2 *qcow2;
|- ImageInfoSpecificVmdk *vmdk;
|+ q_obj_ImageInfoSpecificQCow2_wrapper qcow2;
|+ q_obj_ImageInfoSpecificVmdk_wrapper vmdk;
| } u;
| };
Doing this removes asymmetry between QAPI's QMP side and its
C side (both sides now expose 'data'), and means that the
treatment of a simple union as sugar for a flat union is now
equivalent in both languages (previously the two approaches used
a different layer of dereferencing, where the simple union could
be converted to a flat union with equivalent C layout but
different {} on the wire, or to an equivalent QMP wire form
but with different C representation). Using the implicit type
also lets us get rid of the simple_union_type() hack.
Of course, now all clients of simple unions have to adjust from
using su->u.member to using su->u.member.data; while this touches
a number of files in the tree, some earlier cleanup patches
helped minimize the change to the initialization of a temporary
variable rather than every single member access. The generated
qapi-visit.c code is also affected by the layout change:
|@@ -7393,10 +7393,10 @@ void visit_type_ImageInfoSpecific_member
| }
| switch (obj->type) {
| case IMAGE_INFO_SPECIFIC_KIND_QCOW2:
|- visit_type_ImageInfoSpecificQCow2(v, "data", &obj->u.qcow2, &err);
|+ visit_type_q_obj_ImageInfoSpecificQCow2_wrapper_members(v, &obj->u.qcow2, &err);
| break;
| case IMAGE_INFO_SPECIFIC_KIND_VMDK:
|- visit_type_ImageInfoSpecificVmdk(v, "data", &obj->u.vmdk, &err);
|+ visit_type_q_obj_ImageInfoSpecificVmdk_wrapper_members(v, &obj->u.vmdk, &err);
| break;
| default:
| abort();
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1458254921-17042-13-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-17 16:48:37 -06:00
|
|
|
*spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
|
2015-02-10 15:28:44 -05:00
|
|
|
.compat = g_strdup("0.10"),
|
|
|
|
.refcount_bits = s->refcount_bits,
|
2013-10-09 10:46:18 +02:00
|
|
|
};
|
|
|
|
} else if (s->qcow_version == 3) {
|
qapi: Don't special-case simple union wrappers
Simple unions were carrying a special case that hid their 'data'
QMP member from the resulting C struct, via the hack method
QAPISchemaObjectTypeVariant.simple_union_type(). But by using
the work we started by unboxing flat union and alternate
branches, coupled with the ability to visit the members of an
implicit type, we can now expose the simple union's implicit
type in qapi-types.h:
| struct q_obj_ImageInfoSpecificQCow2_wrapper {
| ImageInfoSpecificQCow2 *data;
| };
|
| struct q_obj_ImageInfoSpecificVmdk_wrapper {
| ImageInfoSpecificVmdk *data;
| };
...
| struct ImageInfoSpecific {
| ImageInfoSpecificKind type;
| union { /* union tag is @type */
| void *data;
|- ImageInfoSpecificQCow2 *qcow2;
|- ImageInfoSpecificVmdk *vmdk;
|+ q_obj_ImageInfoSpecificQCow2_wrapper qcow2;
|+ q_obj_ImageInfoSpecificVmdk_wrapper vmdk;
| } u;
| };
Doing this removes asymmetry between QAPI's QMP side and its
C side (both sides now expose 'data'), and means that the
treatment of a simple union as sugar for a flat union is now
equivalent in both languages (previously the two approaches used
a different layer of dereferencing, where the simple union could
be converted to a flat union with equivalent C layout but
different {} on the wire, or to an equivalent QMP wire form
but with different C representation). Using the implicit type
also lets us get rid of the simple_union_type() hack.
Of course, now all clients of simple unions have to adjust from
using su->u.member to using su->u.member.data; while this touches
a number of files in the tree, some earlier cleanup patches
helped minimize the change to the initialization of a temporary
variable rather than every single member access. The generated
qapi-visit.c code is also affected by the layout change:
|@@ -7393,10 +7393,10 @@ void visit_type_ImageInfoSpecific_member
| }
| switch (obj->type) {
| case IMAGE_INFO_SPECIFIC_KIND_QCOW2:
|- visit_type_ImageInfoSpecificQCow2(v, "data", &obj->u.qcow2, &err);
|+ visit_type_q_obj_ImageInfoSpecificQCow2_wrapper_members(v, &obj->u.qcow2, &err);
| break;
| case IMAGE_INFO_SPECIFIC_KIND_VMDK:
|- visit_type_ImageInfoSpecificVmdk(v, "data", &obj->u.vmdk, &err);
|+ visit_type_q_obj_ImageInfoSpecificVmdk_wrapper_members(v, &obj->u.vmdk, &err);
| break;
| default:
| abort();
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1458254921-17042-13-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-17 16:48:37 -06:00
|
|
|
*spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
|
2013-10-09 10:46:18 +02:00
|
|
|
.compat = g_strdup("1.1"),
|
|
|
|
.lazy_refcounts = s->compatible_features &
|
|
|
|
QCOW2_COMPAT_LAZY_REFCOUNTS,
|
|
|
|
.has_lazy_refcounts = true,
|
2014-09-30 21:31:28 +02:00
|
|
|
.corrupt = s->incompatible_features &
|
|
|
|
QCOW2_INCOMPAT_CORRUPT,
|
|
|
|
.has_corrupt = true,
|
2015-02-10 15:28:44 -05:00
|
|
|
.refcount_bits = s->refcount_bits,
|
2013-10-09 10:46:18 +02:00
|
|
|
};
|
2015-12-10 12:55:48 +03:00
|
|
|
} else {
|
|
|
|
/* if this assertion fails, this probably means a new version was
|
|
|
|
* added without having it covered here */
|
|
|
|
assert(false);
|
2013-10-09 10:46:18 +02:00
|
|
|
}
|
|
|
|
|
2017-06-23 17:24:18 +01:00
|
|
|
if (encrypt_info) {
|
|
|
|
ImageInfoSpecificQCow2Encryption *qencrypt =
|
|
|
|
g_new(ImageInfoSpecificQCow2Encryption, 1);
|
|
|
|
switch (encrypt_info->format) {
|
|
|
|
case Q_CRYPTO_BLOCK_FORMAT_QCOW:
|
|
|
|
qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
|
|
|
|
qencrypt->u.aes = encrypt_info->u.qcow;
|
|
|
|
break;
|
|
|
|
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
|
|
|
|
qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
|
|
|
|
qencrypt->u.luks = encrypt_info->u.luks;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
/* Since we did shallow copy above, erase any pointers
|
|
|
|
* in the original info */
|
|
|
|
memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
|
|
|
|
qapi_free_QCryptoBlockInfo(encrypt_info);
|
|
|
|
|
|
|
|
spec_info->u.qcow2.data->has_encrypt = true;
|
|
|
|
spec_info->u.qcow2.data->encrypt = qencrypt;
|
|
|
|
}
|
|
|
|
|
2013-10-09 10:46:18 +02:00
|
|
|
return spec_info;
|
|
|
|
}
|
|
|
|
|
2013-04-05 21:27:53 +02:00
|
|
|
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
|
|
|
|
int64_t pos)
|
2010-04-23 20:19:47 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2010-04-23 20:19:47 +00:00
|
|
|
|
2010-04-14 14:17:38 +02:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
|
2016-06-01 17:07:24 +02:00
|
|
|
return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
|
|
|
|
qiov->size, qiov, 0);
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
|
2016-06-09 16:50:16 +02:00
|
|
|
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
|
|
|
|
int64_t pos)
|
2010-04-23 20:19:47 +00:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2010-04-23 20:19:47 +00:00
|
|
|
|
2010-04-14 14:17:38 +02:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
|
2016-06-01 17:07:24 +02:00
|
|
|
return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
|
|
|
|
qiov->size, qiov, 0);
|
2010-04-23 20:19:47 +00:00
|
|
|
}
|
|
|
|
|
2013-09-03 10:09:54 +02:00
|
|
|
/*
|
|
|
|
* Downgrades an image's version. To achieve this, any incompatible features
|
|
|
|
* have to be removed.
|
|
|
|
*/
|
2014-10-27 11:12:53 +01:00
|
|
|
static int qcow2_downgrade(BlockDriverState *bs, int target_version,
|
2015-07-27 17:51:32 +02:00
|
|
|
BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
|
2013-09-03 10:09:54 +02:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-09-03 10:09:54 +02:00
|
|
|
int current_version = s->qcow_version;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (target_version == current_version) {
|
|
|
|
return 0;
|
|
|
|
} else if (target_version > current_version) {
|
|
|
|
return -EINVAL;
|
|
|
|
} else if (target_version != 2) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->refcount_order != 4) {
|
2015-07-27 17:51:38 +02:00
|
|
|
error_report("compat=0.10 requires refcount_bits=16");
|
2013-09-03 10:09:54 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* clear incompatible features */
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
|
|
|
|
* the first place; if that happens nonetheless, returning -ENOTSUP is the
|
|
|
|
* best thing to do anyway */
|
|
|
|
|
|
|
|
if (s->incompatible_features) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* since we can ignore compatible features, we can set them to 0 as well */
|
|
|
|
s->compatible_features = 0;
|
|
|
|
/* if lazy refcounts have been used, they have already been fixed through
|
|
|
|
* clearing the dirty flag */
|
|
|
|
|
|
|
|
/* clearing autoclear features is trivial */
|
|
|
|
s->autoclear_features = 0;
|
|
|
|
|
2015-07-27 17:51:32 +02:00
|
|
|
ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
|
2013-09-03 10:09:54 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->qcow_version = target_version;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->qcow_version = current_version;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-27 17:51:36 +02:00
|
|
|
typedef enum Qcow2AmendOperation {
|
|
|
|
/* This is the value Qcow2AmendHelperCBInfo::last_operation will be
|
|
|
|
* statically initialized to so that the helper CB can discern the first
|
|
|
|
* invocation from an operation change */
|
|
|
|
QCOW2_NO_OPERATION = 0,
|
|
|
|
|
2015-07-27 17:51:38 +02:00
|
|
|
QCOW2_CHANGING_REFCOUNT_ORDER,
|
2015-07-27 17:51:36 +02:00
|
|
|
QCOW2_DOWNGRADING,
|
|
|
|
} Qcow2AmendOperation;
|
|
|
|
|
|
|
|
typedef struct Qcow2AmendHelperCBInfo {
|
|
|
|
/* The code coordinating the amend operations should only modify
|
|
|
|
* these four fields; the rest will be managed by the CB */
|
|
|
|
BlockDriverAmendStatusCB *original_status_cb;
|
|
|
|
void *original_cb_opaque;
|
|
|
|
|
|
|
|
Qcow2AmendOperation current_operation;
|
|
|
|
|
|
|
|
/* Total number of operations to perform (only set once) */
|
|
|
|
int total_operations;
|
|
|
|
|
|
|
|
/* The following fields are managed by the CB */
|
|
|
|
|
|
|
|
/* Number of operations completed */
|
|
|
|
int operations_completed;
|
|
|
|
|
|
|
|
/* Cumulative offset of all completed operations */
|
|
|
|
int64_t offset_completed;
|
|
|
|
|
|
|
|
Qcow2AmendOperation last_operation;
|
|
|
|
int64_t last_work_size;
|
|
|
|
} Qcow2AmendHelperCBInfo;
|
|
|
|
|
|
|
|
static void qcow2_amend_helper_cb(BlockDriverState *bs,
|
|
|
|
int64_t operation_offset,
|
|
|
|
int64_t operation_work_size, void *opaque)
|
|
|
|
{
|
|
|
|
Qcow2AmendHelperCBInfo *info = opaque;
|
|
|
|
int64_t current_work_size;
|
|
|
|
int64_t projected_work_size;
|
|
|
|
|
|
|
|
if (info->current_operation != info->last_operation) {
|
|
|
|
if (info->last_operation != QCOW2_NO_OPERATION) {
|
|
|
|
info->offset_completed += info->last_work_size;
|
|
|
|
info->operations_completed++;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->last_operation = info->current_operation;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(info->total_operations > 0);
|
|
|
|
assert(info->operations_completed < info->total_operations);
|
|
|
|
|
|
|
|
info->last_work_size = operation_work_size;
|
|
|
|
|
|
|
|
current_work_size = info->offset_completed + operation_work_size;
|
|
|
|
|
|
|
|
/* current_work_size is the total work size for (operations_completed + 1)
|
|
|
|
* operations (which includes this one), so multiply it by the number of
|
|
|
|
* operations not covered and divide it by the number of operations
|
|
|
|
* covered to get a projection for the operations not covered */
|
|
|
|
projected_work_size = current_work_size * (info->total_operations -
|
|
|
|
info->operations_completed - 1)
|
|
|
|
/ (info->operations_completed + 1);
|
|
|
|
|
|
|
|
info->original_status_cb(bs, info->offset_completed + operation_offset,
|
|
|
|
current_work_size + projected_work_size,
|
|
|
|
info->original_cb_opaque);
|
|
|
|
}
|
|
|
|
|
2014-10-27 11:12:50 +01:00
|
|
|
static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
|
2015-07-27 17:51:32 +02:00
|
|
|
BlockDriverAmendStatusCB *status_cb,
|
|
|
|
void *cb_opaque)
|
2013-09-03 10:09:54 +02:00
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-09-03 10:09:54 +02:00
|
|
|
int old_version = s->qcow_version, new_version = old_version;
|
|
|
|
uint64_t new_size = 0;
|
|
|
|
const char *backing_file = NULL, *backing_format = NULL;
|
|
|
|
bool lazy_refcounts = s->use_lazy_refcounts;
|
2014-06-05 17:20:59 +08:00
|
|
|
const char *compat = NULL;
|
|
|
|
uint64_t cluster_size = s->cluster_size;
|
|
|
|
bool encrypt;
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
int encformat;
|
2015-07-27 17:51:38 +02:00
|
|
|
int refcount_bits = s->refcount_bits;
|
2017-01-13 19:02:32 +01:00
|
|
|
Error *local_err = NULL;
|
2013-09-03 10:09:54 +02:00
|
|
|
int ret;
|
2014-06-05 17:20:59 +08:00
|
|
|
QemuOptDesc *desc = opts->list->desc;
|
2015-07-27 17:51:36 +02:00
|
|
|
Qcow2AmendHelperCBInfo helper_cb_info;
|
2013-09-03 10:09:54 +02:00
|
|
|
|
2014-06-05 17:20:59 +08:00
|
|
|
while (desc && desc->name) {
|
|
|
|
if (!qemu_opt_find(opts, desc->name)) {
|
2013-09-03 10:09:54 +02:00
|
|
|
/* only change explicitly defined options */
|
2014-06-05 17:20:59 +08:00
|
|
|
desc++;
|
2013-09-03 10:09:54 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-02-18 17:40:47 -05:00
|
|
|
if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
|
|
|
|
compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
|
2014-06-05 17:20:59 +08:00
|
|
|
if (!compat) {
|
2013-09-03 10:09:54 +02:00
|
|
|
/* preserve default */
|
2014-06-05 17:20:59 +08:00
|
|
|
} else if (!strcmp(compat, "0.10")) {
|
2013-09-03 10:09:54 +02:00
|
|
|
new_version = 2;
|
2014-06-05 17:20:59 +08:00
|
|
|
} else if (!strcmp(compat, "1.1")) {
|
2013-09-03 10:09:54 +02:00
|
|
|
new_version = 3;
|
|
|
|
} else {
|
2015-07-27 17:51:33 +02:00
|
|
|
error_report("Unknown compatibility level %s", compat);
|
2013-09-03 10:09:54 +02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2015-02-18 17:40:47 -05:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
|
2015-07-27 17:51:33 +02:00
|
|
|
error_report("Cannot change preallocation mode");
|
2013-09-03 10:09:54 +02:00
|
|
|
return -ENOTSUP;
|
2015-02-18 17:40:47 -05:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
|
|
|
|
new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
|
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
|
|
|
|
backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
|
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
|
|
|
|
backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
|
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
|
|
|
|
encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
|
2017-06-23 17:24:10 +01:00
|
|
|
!!s->crypto);
|
2015-07-01 18:10:37 +01:00
|
|
|
|
2017-06-23 17:24:10 +01:00
|
|
|
if (encrypt != !!s->crypto) {
|
2015-07-27 17:51:33 +02:00
|
|
|
error_report("Changing the encryption flag is not supported");
|
2013-09-03 10:09:54 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
|
|
|
|
encformat = qcow2_crypt_method_from_format(
|
|
|
|
qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
|
|
|
|
|
|
|
|
if (encformat != s->crypt_method_header) {
|
|
|
|
error_report("Changing the encryption format is not supported");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2015-02-18 17:40:47 -05:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
|
|
|
|
cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
|
2014-06-05 17:20:59 +08:00
|
|
|
cluster_size);
|
|
|
|
if (cluster_size != s->cluster_size) {
|
2015-07-27 17:51:33 +02:00
|
|
|
error_report("Changing the cluster size is not supported");
|
2013-09-03 10:09:54 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2015-02-18 17:40:47 -05:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
|
|
|
|
lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
|
2014-06-05 17:20:59 +08:00
|
|
|
lazy_refcounts);
|
2015-02-18 17:40:49 -05:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
|
2015-07-27 17:51:38 +02:00
|
|
|
refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
|
|
|
|
refcount_bits);
|
|
|
|
|
|
|
|
if (refcount_bits <= 0 || refcount_bits > 64 ||
|
|
|
|
!is_power_of_2(refcount_bits))
|
|
|
|
{
|
|
|
|
error_report("Refcount width must be a power of two and may "
|
|
|
|
"not exceed 64 bits");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2013-09-03 10:09:54 +02:00
|
|
|
} else {
|
2015-07-27 17:51:34 +02:00
|
|
|
/* if this point is reached, this probably means a new option was
|
2013-09-03 10:09:54 +02:00
|
|
|
* added without having it covered here */
|
2015-07-27 17:51:34 +02:00
|
|
|
abort();
|
2013-09-03 10:09:54 +02:00
|
|
|
}
|
2014-06-05 17:20:59 +08:00
|
|
|
|
|
|
|
desc++;
|
2013-09-03 10:09:54 +02:00
|
|
|
}
|
|
|
|
|
2015-07-27 17:51:36 +02:00
|
|
|
helper_cb_info = (Qcow2AmendHelperCBInfo){
|
|
|
|
.original_status_cb = status_cb,
|
|
|
|
.original_cb_opaque = cb_opaque,
|
|
|
|
.total_operations = (new_version < old_version)
|
2015-07-27 17:51:38 +02:00
|
|
|
+ (s->refcount_bits != refcount_bits)
|
2015-07-27 17:51:36 +02:00
|
|
|
};
|
|
|
|
|
2015-07-27 17:51:35 +02:00
|
|
|
/* Upgrade first (some features may require compat=1.1) */
|
|
|
|
if (new_version > old_version) {
|
|
|
|
s->qcow_version = new_version;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->qcow_version = old_version;
|
|
|
|
return ret;
|
2013-09-03 10:09:54 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-27 17:51:38 +02:00
|
|
|
if (s->refcount_bits != refcount_bits) {
|
|
|
|
int refcount_order = ctz32(refcount_bits);
|
|
|
|
|
|
|
|
if (new_version < 3 && refcount_bits != 16) {
|
|
|
|
error_report("Different refcount widths than 16 bits require "
|
|
|
|
"compatibility level 1.1 or above (use compat=1.1 or "
|
|
|
|
"greater)");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
|
|
|
|
ret = qcow2_change_refcount_order(bs, refcount_order,
|
|
|
|
&qcow2_amend_helper_cb,
|
2017-05-11 18:03:37 +03:00
|
|
|
&helper_cb_info, &local_err);
|
2015-07-27 17:51:38 +02:00
|
|
|
if (ret < 0) {
|
2017-05-11 18:03:37 +03:00
|
|
|
error_report_err(local_err);
|
2015-07-27 17:51:38 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-03 10:09:54 +02:00
|
|
|
if (backing_file || backing_format) {
|
2015-04-07 15:03:16 +02:00
|
|
|
ret = qcow2_change_backing_file(bs,
|
|
|
|
backing_file ?: s->image_backing_file,
|
|
|
|
backing_format ?: s->image_backing_format);
|
2013-09-03 10:09:54 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->use_lazy_refcounts != lazy_refcounts) {
|
|
|
|
if (lazy_refcounts) {
|
2015-07-27 17:51:35 +02:00
|
|
|
if (new_version < 3) {
|
2015-07-27 17:51:33 +02:00
|
|
|
error_report("Lazy refcounts only supported with compatibility "
|
|
|
|
"level 1.1 and above (use compat=1.1 or greater)");
|
2013-09-03 10:09:54 +02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
s->use_lazy_refcounts = true;
|
|
|
|
} else {
|
|
|
|
/* make image clean first */
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
/* now disallow lazy refcounts */
|
|
|
|
s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
s->use_lazy_refcounts = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (new_size) {
|
2017-01-20 17:07:26 +01:00
|
|
|
BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
|
2017-01-13 19:02:32 +01:00
|
|
|
ret = blk_insert_bs(blk, bs, &local_err);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
blk_unref(blk);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-06-13 22:20:54 +02:00
|
|
|
ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, &local_err);
|
2017-02-17 10:58:25 +01:00
|
|
|
blk_unref(blk);
|
2013-09-03 10:09:54 +02:00
|
|
|
if (ret < 0) {
|
2017-03-28 22:51:27 +02:00
|
|
|
error_report_err(local_err);
|
2013-09-03 10:09:54 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-27 17:51:35 +02:00
|
|
|
/* Downgrade last (so unsupported features can be removed before) */
|
|
|
|
if (new_version < old_version) {
|
2015-07-27 17:51:36 +02:00
|
|
|
helper_cb_info.current_operation = QCOW2_DOWNGRADING;
|
|
|
|
ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
|
|
|
|
&helper_cb_info);
|
2015-07-27 17:51:35 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-03 10:09:54 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-05 16:07:16 +02:00
|
|
|
/*
|
|
|
|
* If offset or size are negative, respectively, they will not be included in
|
|
|
|
* the BLOCK_IMAGE_CORRUPTED event emitted.
|
|
|
|
* fatal will be ignored for read-only BDS; corruptions found there will always
|
|
|
|
* be considered non-fatal.
|
|
|
|
*/
|
|
|
|
void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
|
|
|
|
int64_t size, const char *message_format, ...)
|
|
|
|
{
|
2015-09-07 17:12:56 +02:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-04-08 12:29:20 +03:00
|
|
|
const char *node_name;
|
2014-09-05 16:07:16 +02:00
|
|
|
char *message;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
fatal = fatal && !bs->read_only;
|
|
|
|
|
|
|
|
if (s->signaled_corruption &&
|
|
|
|
(!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
va_start(ap, message_format);
|
|
|
|
message = g_strdup_vprintf(message_format, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
if (fatal) {
|
|
|
|
fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
|
|
|
|
"corruption events will be suppressed\n", message);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
|
|
|
|
"corruption events will be suppressed\n", message);
|
|
|
|
}
|
|
|
|
|
2015-04-08 12:29:20 +03:00
|
|
|
node_name = bdrv_get_node_name(bs);
|
|
|
|
qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
|
|
|
|
*node_name != '\0', node_name,
|
|
|
|
message, offset >= 0, offset,
|
|
|
|
size >= 0, size,
|
2014-09-05 16:07:16 +02:00
|
|
|
fatal, &error_abort);
|
|
|
|
g_free(message);
|
|
|
|
|
|
|
|
if (fatal) {
|
|
|
|
qcow2_mark_corrupt(bs);
|
|
|
|
bs->drv = NULL; /* make BDS unusable */
|
|
|
|
}
|
|
|
|
|
|
|
|
s->signaled_corruption = true;
|
|
|
|
}
|
|
|
|
|
2014-06-05 17:20:59 +08:00
|
|
|
static QemuOptsList qcow2_create_opts = {
|
|
|
|
.name = "qcow2-create-opts",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Virtual disk size"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_COMPAT_LEVEL,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Compatibility level (0.10 or 1.1)"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_BACKING_FILE,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "File name of a base image"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_BACKING_FMT,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Image format of the base image"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_ENCRYPT,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
2017-06-23 17:24:06 +01:00
|
|
|
.help = "Encrypt the image with format 'aes'. (Deprecated "
|
|
|
|
"in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_ENCRYPT_FORMAT,
|
|
|
|
.type = QEMU_OPT_STRING,
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
.help = "Encrypt the image, format choices: 'aes', 'luks'",
|
2014-06-05 17:20:59 +08:00
|
|
|
},
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 17:24:12 +01:00
|
|
|
BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
|
|
|
|
"ID of secret providing qcow AES key or LUKS passphrase"),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
|
2014-06-05 17:20:59 +08:00
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_CLUSTER_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "qcow2 cluster size",
|
|
|
|
.def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_PREALLOC,
|
|
|
|
.type = QEMU_OPT_STRING,
|
2014-09-10 17:05:49 +08:00
|
|
|
.help = "Preallocation mode (allowed values: off, metadata, "
|
|
|
|
"falloc, full)"
|
2014-06-05 17:20:59 +08:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_LAZY_REFCOUNTS,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Postpone refcount updates",
|
|
|
|
.def_value_str = "off"
|
|
|
|
},
|
2015-02-18 17:40:49 -05:00
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_REFCOUNT_BITS,
|
|
|
|
.type = QEMU_OPT_NUMBER,
|
|
|
|
.help = "Width of a reference count entry in bits",
|
|
|
|
.def_value_str = "16"
|
|
|
|
},
|
2014-06-05 17:20:59 +08:00
|
|
|
{ /* end of list */ }
|
|
|
|
}
|
2010-04-23 20:19:47 +00:00
|
|
|
};
|
|
|
|
|
2014-12-02 18:32:41 +01:00
|
|
|
BlockDriver bdrv_qcow2 = {
|
2010-12-17 16:02:39 +01:00
|
|
|
.format_name = "qcow2",
|
2015-09-07 17:12:56 +02:00
|
|
|
.instance_size = sizeof(BDRVQcow2State),
|
2010-12-17 16:02:39 +01:00
|
|
|
.bdrv_probe = qcow2_probe,
|
|
|
|
.bdrv_open = qcow2_open,
|
|
|
|
.bdrv_close = qcow2_close,
|
2012-09-20 15:13:28 -04:00
|
|
|
.bdrv_reopen_prepare = qcow2_reopen_prepare,
|
2015-04-16 13:42:27 +02:00
|
|
|
.bdrv_reopen_commit = qcow2_reopen_commit,
|
|
|
|
.bdrv_reopen_abort = qcow2_reopen_abort,
|
2015-11-16 15:34:59 +01:00
|
|
|
.bdrv_join_options = qcow2_join_options,
|
2016-12-19 16:36:02 +01:00
|
|
|
.bdrv_child_perm = bdrv_format_default_perms,
|
2014-06-05 17:21:11 +08:00
|
|
|
.bdrv_create = qcow2_create,
|
2013-06-28 12:47:42 +02:00
|
|
|
.bdrv_has_zero_init = bdrv_has_zero_init_1,
|
2013-09-04 19:00:28 +02:00
|
|
|
.bdrv_co_get_block_status = qcow2_co_get_block_status,
|
2010-12-17 16:02:39 +01:00
|
|
|
|
2016-05-31 16:13:07 +02:00
|
|
|
.bdrv_co_preadv = qcow2_co_preadv,
|
2016-06-01 16:55:05 +02:00
|
|
|
.bdrv_co_pwritev = qcow2_co_pwritev,
|
2011-11-10 18:10:11 +01:00
|
|
|
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
|
2010-04-28 11:36:11 +01:00
|
|
|
|
2016-06-01 15:10:06 -06:00
|
|
|
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
|
2016-07-15 17:23:03 -06:00
|
|
|
.bdrv_co_pdiscard = qcow2_co_pdiscard,
|
2010-04-28 11:36:11 +01:00
|
|
|
.bdrv_truncate = qcow2_truncate,
|
2016-07-22 11:17:43 +03:00
|
|
|
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
|
2014-10-24 15:57:31 +02:00
|
|
|
.bdrv_make_empty = qcow2_make_empty,
|
2010-04-23 20:19:47 +00:00
|
|
|
|
|
|
|
.bdrv_snapshot_create = qcow2_snapshot_create,
|
|
|
|
.bdrv_snapshot_goto = qcow2_snapshot_goto,
|
|
|
|
.bdrv_snapshot_delete = qcow2_snapshot_delete,
|
|
|
|
.bdrv_snapshot_list = qcow2_snapshot_list,
|
2014-06-05 17:20:59 +08:00
|
|
|
.bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
|
2017-07-05 13:57:35 +01:00
|
|
|
.bdrv_measure = qcow2_measure,
|
2014-06-05 17:20:59 +08:00
|
|
|
.bdrv_get_info = qcow2_get_info,
|
2013-10-09 10:46:18 +02:00
|
|
|
.bdrv_get_specific_info = qcow2_get_specific_info,
|
2010-04-23 20:19:47 +00:00
|
|
|
|
2010-12-17 16:02:39 +01:00
|
|
|
.bdrv_save_vmstate = qcow2_save_vmstate,
|
|
|
|
.bdrv_load_vmstate = qcow2_load_vmstate,
|
2010-04-23 20:19:47 +00:00
|
|
|
|
2014-06-04 15:09:35 +02:00
|
|
|
.supports_backing = true,
|
2010-04-23 20:19:47 +00:00
|
|
|
.bdrv_change_backing_file = qcow2_change_backing_file,
|
|
|
|
|
2013-12-11 19:26:16 +01:00
|
|
|
.bdrv_refresh_limits = qcow2_refresh_limits,
|
2011-11-14 15:09:46 -06:00
|
|
|
.bdrv_invalidate_cache = qcow2_invalidate_cache,
|
2015-12-22 16:04:57 +01:00
|
|
|
.bdrv_inactivate = qcow2_inactivate,
|
2011-11-14 15:09:46 -06:00
|
|
|
|
2014-06-05 17:20:59 +08:00
|
|
|
.create_opts = &qcow2_create_opts,
|
|
|
|
.bdrv_check = qcow2_check,
|
2014-06-05 17:21:11 +08:00
|
|
|
.bdrv_amend_options = qcow2_amend_options,
|
2015-08-04 15:14:40 +03:00
|
|
|
|
|
|
|
.bdrv_detach_aio_context = qcow2_detach_aio_context,
|
|
|
|
.bdrv_attach_aio_context = qcow2_attach_aio_context,
|
2017-06-28 15:05:14 +03:00
|
|
|
|
|
|
|
.bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
|
2017-06-28 15:05:22 +03:00
|
|
|
.bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
|
2017-06-28 15:05:28 +03:00
|
|
|
.bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
|
2010-04-23 20:19:47 +00:00
|
|
|
};
|
|
|
|
|
2009-05-09 17:03:42 -05:00
|
|
|
static void bdrv_qcow2_init(void)
|
|
|
|
{
|
|
|
|
bdrv_register(&bdrv_qcow2);
|
|
|
|
}
|
|
|
|
|
|
|
|
block_init(bdrv_qcow2_init);
|