qemu-e2k/tests/qemu-iotests/060
Alberto Garcia 50a3efb0f0 block: Close a BlockDriverState completely even when bs->drv is NULL
bdrv_close() skips much of its logic when bs->drv is NULL. This is
fine when we're closing a BlockDriverState that has just been created
(because e.g the initialization process failed), but it's not enough
in other cases.

For example, when a valid qcow2 image is found to be corrupted then
QEMU marks it as such in the file header and then sets bs->drv to
NULL in order to make the BlockDriverState unusable. When that BDS is
later closed then many of its data structures are not freed (leaking
their memory) and none of its children are detached. This results in
bdrv_close_all() failing to close all BDSs and making this assertion
fail when QEMU is being shut down:

   bdrv_close_all: Assertion `QTAILQ_EMPTY(&all_bdrv_states)' failed.

This patch makes bdrv_close() do the full uninitialization process
in all cases. This fixes the problem with corrupted images and still
works fine with freshly created BDSs.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 20171106145345.12038-1-berto@igalia.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-11-21 14:54:02 +01:00

446 lines
15 KiB
Bash
Executable File

#!/bin/bash
#
# Test case for image corruption (overlapping data structures) in qcow2
#
# Copyright (C) 2013 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=mreitz@redhat.com
seq="$(basename $0)"
echo "QA output created by $seq"
here="$PWD"
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
# This tests qocw2-specific low-level functionality
_supported_fmt qcow2
_supported_proto file
_supported_os Linux
rt_offset=65536 # 0x10000 (XXX: just an assumption)
rb_offset=131072 # 0x20000 (XXX: just an assumption)
l1_offset=196608 # 0x30000 (XXX: just an assumption)
l2_offset=262144 # 0x40000 (XXX: just an assumption)
l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
IMGOPTS="compat=1.1"
OPEN_RW="open -o overlap-check=all $TEST_IMG"
# Overlap checks are done before write operations only, therefore opening an
# image read-only makes the overlap-check option irrelevant
OPEN_RO="open -r $TEST_IMG"
echo
echo "=== Testing L2 reference into L1 ==="
echo
_make_test_img 64M
# Link first L1 entry (first L2 table) onto itself
# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
# later write will result in a COW operation, effectively ruining this attempt
# on image corruption)
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
_check_test_img
# The corrupt bit should not be set anyway
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to write something, thereby forcing the corrupt bit to be set
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
# The corrupt bit must now be set
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# This information should be available through qemu-img info
_img_info --format-specific
# Try to open the image R/W (which should fail)
$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
| _filter_testdir \
| _filter_imgfmt
# Try to open it RO (which should succeed)
$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
# We could now try to fix the image, but this would probably fail (how should an
# L2 table linked onto the L1 table be fixed?)
echo
echo "=== Testing cluster data reference into refcount block ==="
echo
_make_test_img 64M
# Allocate L2 table
truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
# Mark cluster as used
poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
# Redirect new data cluster onto refcount block
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
_check_test_img
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to fix it
_check_test_img -r all
# The corrupt bit should be cleared
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Look if it's really really fixed
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "=== Testing cluster data reference into inactive L2 table ==="
echo
_make_test_img 64M
$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
$QEMU_IMG snapshot -c foo "$TEST_IMG"
$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
# The inactive L2 table remains at its old offset
poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
"\x80\x00\x00\x00\x00\x04\x00\x00"
_check_test_img
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img -r all
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Check data
$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
$QEMU_IMG snapshot -a foo "$TEST_IMG"
_check_test_img
$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
echo
echo "=== Testing overlap while COW is in flight ==="
echo
# compat=0.10 is required in order to make the following discard actually
# unallocate the sector rather than make it a zero sector - we want COW, after
# all.
IMGOPTS='compat=0.10' _make_test_img 1G
# Write two clusters, the second one enforces creation of an L2 table after
# the first data cluster.
$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
# Discard the first cluster. This cluster will soon enough be reallocated and
# used for COW.
$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
# Now, corrupt the image by marking the second L2 table cluster as free.
poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
# Start a write operation requiring COW on the image stopping it right before
# doing the read; then, trigger the corruption prevention by writing anything to
# any unallocated cluster, leading to an attempt to overwrite the second L2
# table. Finally, resume the COW write and see it fail (but not crash).
echo "open -o file.driver=blkdebug $TEST_IMG
break cow_read 0
aio_write 0k 1k
wait_break 0
write 64k 64k
resume 0" | $QEMU_IO | _filter_qemu_io
echo
echo "=== Testing unallocated image header ==="
echo
_make_test_img 64M
# Create L1/L2
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing unaligned L1 entry ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
# aligned or not does not matter
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
# Test how well zero cluster expansion can cope with this
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
echo
echo "=== Testing unaligned L2 entry ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing unaligned pre-allocated zero cluster ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
# zero cluster expansion
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
echo
echo "=== Testing unaligned reftable entry ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing non-fatal corruption on freeing ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing read-only corruption report ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
# Should only emit a single error message
$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
echo
echo "=== Testing non-fatal and then fatal corruption report ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
# Should emit two error messages
$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing empty refcount table ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
# Since the first data cluster is already allocated this triggers an
# allocation with an explicit offset (using qcow2_alloc_clusters_at())
# causing a refcount block to be allocated at offset 0
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing empty refcount block ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing empty refcount block with compressed write ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
# The previous write already allocated an L2 table, so now this new
# write will try to allocate a compressed data cluster at offset 0.
$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
# Repair the image
_check_test_img -r all
echo
echo "=== Testing zero refcount table size ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
# Repair the image
_check_test_img -r all
echo
echo "=== Testing incorrect refcount table offset ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing dirty corrupt image ==="
echo
_make_test_img 64M
# Let the refblock appear unaligned
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00"
# Mark the image dirty, thus forcing an automatic check when opening it
poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
# Open the image (qemu should refuse to do so)
$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
echo '--- Repairing ---'
# The actual repair should have happened (because of the dirty bit),
# but some cleanup may have failed (like freeing the old reftable)
# because the image was already marked corrupt by that point
_check_test_img -r all
echo
echo "=== Writing to an unaligned preallocated zero cluster ==="
echo
_make_test_img 64M
# Allocate the L2 table
$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
# Pretend there is a preallocated zero cluster somewhere inside the
# image header
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
# Let's write to it!
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# Can't repair this yet (TODO: We can just deallocate the cluster)
echo
echo '=== Discarding with an unaligned refblock ==='
echo
_make_test_img 64M
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
# Make our refblock unaligned
poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
# Now try to discard something that will be submitted as two requests
# (main part + tail)
$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
echo '--- Repairing ---'
# Fails the first repair because the corruption prevents the check
# function from double-checking
# (Using -q for the first invocation, because otherwise the
# double-check error message appears above the summary for some
# reason -- so let's just hide the summary)
_check_test_img -q -r all
_check_test_img -r all
echo
echo "=== Discarding an out-of-bounds refblock ==="
echo
_make_test_img 64M
# Pretend there's a refblock really up high
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
# Let's try to shrink the qcow2 image so that the block driver tries
# to discard that refblock (and see what happens!)
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
echo '--- Checking and retrying ---'
# Image should not be resized
_img_info | grep 'virtual size'
# But it should pass this check, because the "partial" resize has
# already overwritten refblocks past the end
_check_test_img -r all
# So let's try again
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
_img_info | grep 'virtual size'
echo
echo "=== Discarding a non-covered in-bounds refblock ==="
echo
IMGOPTS='refcount_bits=1' _make_test_img 64M
# Pretend there's a refblock somewhere where there is no refblock to
# cover it (but the covering refblock has a valid index in the
# reftable)
# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
# to 0x10_0000_0000 (64G) to point to the third refblock
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
echo '--- Checking and retrying ---'
# Image should not be resized
_img_info | grep 'virtual size'
# But it should pass this check, because the "partial" resize has
# already overwritten refblocks past the end
_check_test_img -r all
# So let's try again
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
_img_info | grep 'virtual size'
echo
echo "=== Discarding a refblock covered by an unaligned refblock ==="
echo
IMGOPTS='refcount_bits=1' _make_test_img 64M
# Same as above
poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
# But now we actually "create" an unaligned third refblock
poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
$QEMU_IMG resize --shrink "$TEST_IMG" 32M
echo '--- Repairing ---'
# Fails the first repair because the corruption prevents the check
# function from double-checking
# (Using -q for the first invocation, because otherwise the
# double-check error message appears above the summary for some
# reason -- so let's just hide the summary)
_check_test_img -q -r all
_check_test_img -r all
echo
echo "=== Testing the QEMU shutdown with a corrupted image ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
echo "{'execute': 'qmp_capabilities'}
{'execute': 'human-monitor-command',
'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
{'execute': 'quit'}" \
| $QEMU -qmp stdio -nographic -nodefaults \
-drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
| _filter_qmp | _filter_qemu_io
# success, all done
echo "*** done"
rm -f $seq.full
status=0