50a3efb0f0
bdrv_close() skips much of its logic when bs->drv is NULL. This is fine when we're closing a BlockDriverState that has just been created (because e.g the initialization process failed), but it's not enough in other cases. For example, when a valid qcow2 image is found to be corrupted then QEMU marks it as such in the file header and then sets bs->drv to NULL in order to make the BlockDriverState unusable. When that BDS is later closed then many of its data structures are not freed (leaking their memory) and none of its children are detached. This results in bdrv_close_all() failing to close all BDSs and making this assertion fail when QEMU is being shut down: bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed. This patch makes bdrv_close() do the full uninitialization process in all cases. This fixes the problem with corrupted images and still works fine with freshly created BDSs. Signed-off-by: Alberto Garcia <berto@igalia.com> Message-id: 20171106145345.12038-1-berto@igalia.com Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com>
446 lines
15 KiB
Bash
Executable File
446 lines
15 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Test case for image corruption (overlapping data structures) in qcow2
|
|
#
|
|
# Copyright (C) 2013 Red Hat, Inc.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
# creator
|
|
owner=mreitz@redhat.com
|
|
|
|
seq="$(basename $0)"
|
|
echo "QA output created by $seq"
|
|
|
|
here="$PWD"
|
|
status=1 # failure is the default!
|
|
|
|
_cleanup()
|
|
{
|
|
_cleanup_test_img
|
|
}
|
|
trap "_cleanup; exit \$status" 0 1 2 3 15
|
|
|
|
# get standard environment, filters and checks
|
|
. ./common.rc
|
|
. ./common.filter
|
|
|
|
# This tests qocw2-specific low-level functionality
|
|
_supported_fmt qcow2
|
|
_supported_proto file
|
|
_supported_os Linux
|
|
|
|
rt_offset=65536 # 0x10000 (XXX: just an assumption)
|
|
rb_offset=131072 # 0x20000 (XXX: just an assumption)
|
|
l1_offset=196608 # 0x30000 (XXX: just an assumption)
|
|
l2_offset=262144 # 0x40000 (XXX: just an assumption)
|
|
l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
|
|
|
|
IMGOPTS="compat=1.1"
|
|
|
|
OPEN_RW="open -o overlap-check=all $TEST_IMG"
|
|
# Overlap checks are done before write operations only, therefore opening an
|
|
# image read-only makes the overlap-check option irrelevant
|
|
OPEN_RO="open -r $TEST_IMG"
|
|
|
|
echo
|
|
echo "=== Testing L2 reference into L1 ==="
|
|
echo
|
|
_make_test_img 64M
|
|
# Link first L1 entry (first L2 table) onto itself
|
|
# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
|
|
# later write will result in a COW operation, effectively ruining this attempt
|
|
# on image corruption)
|
|
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
|
|
_check_test_img
|
|
|
|
# The corrupt bit should not be set anyway
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
|
|
# Try to write something, thereby forcing the corrupt bit to be set
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
|
|
|
|
# The corrupt bit must now be set
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
|
|
# This information should be available through qemu-img info
|
|
_img_info --format-specific
|
|
|
|
# Try to open the image R/W (which should fail)
|
|
$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
|
|
| _filter_testdir \
|
|
| _filter_imgfmt
|
|
|
|
# Try to open it RO (which should succeed)
|
|
$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
|
|
|
|
# We could now try to fix the image, but this would probably fail (how should an
|
|
# L2 table linked onto the L1 table be fixed?)
|
|
|
|
echo
|
|
echo "=== Testing cluster data reference into refcount block ==="
|
|
echo
|
|
_make_test_img 64M
|
|
# Allocate L2 table
|
|
truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
|
|
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
|
|
# Mark cluster as used
|
|
poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
|
|
# Redirect new data cluster onto refcount block
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
|
|
_check_test_img
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
|
|
# Try to fix it
|
|
_check_test_img -r all
|
|
|
|
# The corrupt bit should be cleared
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
|
|
# Look if it's really really fixed
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
|
|
echo
|
|
echo "=== Testing cluster data reference into inactive L2 table ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
|
|
$QEMU_IMG snapshot -c foo "$TEST_IMG"
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
|
|
# The inactive L2 table remains at its old offset
|
|
poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
|
|
"\x80\x00\x00\x00\x00\x04\x00\x00"
|
|
_check_test_img
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
_check_test_img -r all
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
|
|
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
|
|
|
|
# Check data
|
|
$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
|
|
$QEMU_IMG snapshot -a foo "$TEST_IMG"
|
|
_check_test_img
|
|
$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing overlap while COW is in flight ==="
|
|
echo
|
|
# compat=0.10 is required in order to make the following discard actually
|
|
# unallocate the sector rather than make it a zero sector - we want COW, after
|
|
# all.
|
|
IMGOPTS='compat=0.10' _make_test_img 1G
|
|
# Write two clusters, the second one enforces creation of an L2 table after
|
|
# the first data cluster.
|
|
$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
|
|
# Discard the first cluster. This cluster will soon enough be reallocated and
|
|
# used for COW.
|
|
$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
|
|
# Now, corrupt the image by marking the second L2 table cluster as free.
|
|
poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
|
|
# Start a write operation requiring COW on the image stopping it right before
|
|
# doing the read; then, trigger the corruption prevention by writing anything to
|
|
# any unallocated cluster, leading to an attempt to overwrite the second L2
|
|
# table. Finally, resume the COW write and see it fail (but not crash).
|
|
echo "open -o file.driver=blkdebug $TEST_IMG
|
|
break cow_read 0
|
|
aio_write 0k 1k
|
|
wait_break 0
|
|
write 64k 64k
|
|
resume 0" | $QEMU_IO | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing unallocated image header ==="
|
|
echo
|
|
_make_test_img 64M
|
|
# Create L1/L2
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
|
|
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing unaligned L1 entry ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
|
|
# aligned or not does not matter
|
|
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
|
|
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
# Test how well zero cluster expansion can cope with this
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
|
|
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
|
|
|
|
echo
|
|
echo "=== Testing unaligned L2 entry ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
|
|
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing unaligned pre-allocated zero cluster ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
|
|
# zero cluster expansion
|
|
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
|
|
|
|
echo
|
|
echo "=== Testing unaligned reftable entry ==="
|
|
echo
|
|
_make_test_img 64M
|
|
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing non-fatal corruption on freeing ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
|
|
$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing read-only corruption report ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
|
|
# Should only emit a single error message
|
|
$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing non-fatal and then fatal corruption report ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
|
|
poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
|
|
# Should emit two error messages
|
|
$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing empty refcount table ==="
|
|
echo
|
|
_make_test_img 64M
|
|
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
# Repair the image
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
# Since the first data cluster is already allocated this triggers an
|
|
# allocation with an explicit offset (using qcow2_alloc_clusters_at())
|
|
# causing a refcount block to be allocated at offset 0
|
|
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
|
|
# Repair the image
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Testing empty refcount block ==="
|
|
echo
|
|
_make_test_img 64M
|
|
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
# Repair the image
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Testing empty refcount block with compressed write ==="
|
|
echo
|
|
_make_test_img 64M
|
|
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
|
|
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
# The previous write already allocated an L2 table, so now this new
|
|
# write will try to allocate a compressed data cluster at offset 0.
|
|
$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
|
|
# Repair the image
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Testing zero refcount table size ==="
|
|
echo
|
|
_make_test_img 64M
|
|
poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00"
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
|
|
# Repair the image
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Testing incorrect refcount table offset ==="
|
|
echo
|
|
_make_test_img 64M
|
|
poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
echo
|
|
echo "=== Testing dirty corrupt image ==="
|
|
echo
|
|
|
|
_make_test_img 64M
|
|
|
|
# Let the refblock appear unaligned
|
|
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00"
|
|
# Mark the image dirty, thus forcing an automatic check when opening it
|
|
poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
|
|
# Open the image (qemu should refuse to do so)
|
|
$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
|
|
|
|
echo '--- Repairing ---'
|
|
|
|
# The actual repair should have happened (because of the dirty bit),
|
|
# but some cleanup may have failed (like freeing the old reftable)
|
|
# because the image was already marked corrupt by that point
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Writing to an unaligned preallocated zero cluster ==="
|
|
echo
|
|
|
|
_make_test_img 64M
|
|
|
|
# Allocate the L2 table
|
|
$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
# Pretend there is a preallocated zero cluster somewhere inside the
|
|
# image header
|
|
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
|
|
# Let's write to it!
|
|
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
|
|
|
|
# Can't repair this yet (TODO: We can just deallocate the cluster)
|
|
|
|
echo
|
|
echo '=== Discarding with an unaligned refblock ==='
|
|
echo
|
|
|
|
_make_test_img 64M
|
|
|
|
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
|
|
# Make our refblock unaligned
|
|
poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
|
|
# Now try to discard something that will be submitted as two requests
|
|
# (main part + tail)
|
|
$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
|
|
|
|
echo '--- Repairing ---'
|
|
# Fails the first repair because the corruption prevents the check
|
|
# function from double-checking
|
|
# (Using -q for the first invocation, because otherwise the
|
|
# double-check error message appears above the summary for some
|
|
# reason -- so let's just hide the summary)
|
|
_check_test_img -q -r all
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Discarding an out-of-bounds refblock ==="
|
|
echo
|
|
|
|
_make_test_img 64M
|
|
|
|
# Pretend there's a refblock really up high
|
|
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
|
|
# Let's try to shrink the qcow2 image so that the block driver tries
|
|
# to discard that refblock (and see what happens!)
|
|
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
|
|
|
|
echo '--- Checking and retrying ---'
|
|
# Image should not be resized
|
|
_img_info | grep 'virtual size'
|
|
# But it should pass this check, because the "partial" resize has
|
|
# already overwritten refblocks past the end
|
|
_check_test_img -r all
|
|
# So let's try again
|
|
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
|
|
_img_info | grep 'virtual size'
|
|
|
|
echo
|
|
echo "=== Discarding a non-covered in-bounds refblock ==="
|
|
echo
|
|
|
|
IMGOPTS='refcount_bits=1' _make_test_img 64M
|
|
|
|
# Pretend there's a refblock somewhere where there is no refblock to
|
|
# cover it (but the covering refblock has a valid index in the
|
|
# reftable)
|
|
# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
|
|
# to 0x10_0000_0000 (64G) to point to the third refblock
|
|
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
|
|
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
|
|
|
|
echo '--- Checking and retrying ---'
|
|
# Image should not be resized
|
|
_img_info | grep 'virtual size'
|
|
# But it should pass this check, because the "partial" resize has
|
|
# already overwritten refblocks past the end
|
|
_check_test_img -r all
|
|
# So let's try again
|
|
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
|
|
_img_info | grep 'virtual size'
|
|
|
|
echo
|
|
echo "=== Discarding a refblock covered by an unaligned refblock ==="
|
|
echo
|
|
|
|
IMGOPTS='refcount_bits=1' _make_test_img 64M
|
|
|
|
# Same as above
|
|
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
|
|
# But now we actually "create" an unaligned third refblock
|
|
poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
|
|
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
|
|
|
|
echo '--- Repairing ---'
|
|
# Fails the first repair because the corruption prevents the check
|
|
# function from double-checking
|
|
# (Using -q for the first invocation, because otherwise the
|
|
# double-check error message appears above the summary for some
|
|
# reason -- so let's just hide the summary)
|
|
_check_test_img -q -r all
|
|
_check_test_img -r all
|
|
|
|
echo
|
|
echo "=== Testing the QEMU shutdown with a corrupted image ==="
|
|
echo
|
|
_make_test_img 64M
|
|
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
|
|
echo "{'execute': 'qmp_capabilities'}
|
|
{'execute': 'human-monitor-command',
|
|
'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
|
|
{'execute': 'quit'}" \
|
|
| $QEMU -qmp stdio -nographic -nodefaults \
|
|
-drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
|
|
| _filter_qmp | _filter_qemu_io
|
|
|
|
# success, all done
|
|
echo "*** done"
|
|
rm -f $seq.full
|
|
status=0
|