Block patches:

- Some changes for qcow2's refcount repair algorithm to make it work for qcow2 images stored on block devices - Skip test cases that require zstd when support for it is missing - Some refactoring in the iotests' meson.build -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmJf/asSHGhyZWl0ekBy ZWRoYXQuY29tAAoJEKH6QNCYAZzfYXUQAKQv5qKQBjU4MTwlS8A4h6B6OJgC1Sik 9BB7LO/QFjuuF4vNKpcUlf6i0epxPP8B5pmCjaAolMh6u6wZwL7hHq+SOYXvejTo vINW+r097U0qYPkSV+cS6tbW92rYJDD7VxF+34udiWXGjozsBTw/k9DfJaa9Ht66 2dw3AxUa4lxN1/ejFzDLx3DNaff+HctLhgVpHeBb0eN2zr2Ug5+ZFgMoiWwU6r6J EzTORLAzATerlQVYUkhh4Y/UdVLLw1SzTWOQv5b/NqvaLfKmYsQobSfjC2ajO8XJ P2REigcOAij5uWVRf4EY7xoqmADP8pXxuOTzw0hyGNLOLNcXoFbfW45WSPoY+YgH EH1TtC4vMsg/MlO/A3PJr9v+SNqxz32cul3MVrY3PuG4Dzz0riy9GhtFUU37igbj mR6pP3nSa/f2X4+9B6/UrPjLzusRvc8bvzYqVEnSLABav11npphkYaR9QT1fQUVD Zw26igXtmLKUcfop/EqShbhblk0ZLYDTj/Lx7X+thC9OCrK1QgF6qAsIUqiS1iHz vwdktRTCofo4ZIT/OCz5QeriJqDz0B7VJ8/4i/uvm2eq8BUsn2mJuyAGD2XtaONV rmASrV9VbajdxX5VptjKOOHG6aHtqQlKbyBFog8I4nqVFdjdSMalb++gBMCrPu1A 1iZPsOOyz/8+ =BF0c -----END PGP SIGNATURE----- Merge tag 'pull-block-2022-04-20' of https://gitlab.com/hreitz/qemu into staging Block patches: - Some changes for qcow2's refcount repair algorithm to make it work for qcow2 images stored on block devices - Skip test cases that require zstd when support for it is missing - Some refactoring in the iotests' meson.build # -----BEGIN PGP SIGNATURE----- # # iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmJf/asSHGhyZWl0ekBy # ZWRoYXQuY29tAAoJEKH6QNCYAZzfYXUQAKQv5qKQBjU4MTwlS8A4h6B6OJgC1Sik # 9BB7LO/QFjuuF4vNKpcUlf6i0epxPP8B5pmCjaAolMh6u6wZwL7hHq+SOYXvejTo # vINW+r097U0qYPkSV+cS6tbW92rYJDD7VxF+34udiWXGjozsBTw/k9DfJaa9Ht66 # 2dw3AxUa4lxN1/ejFzDLx3DNaff+HctLhgVpHeBb0eN2zr2Ug5+ZFgMoiWwU6r6J # EzTORLAzATerlQVYUkhh4Y/UdVLLw1SzTWOQv5b/NqvaLfKmYsQobSfjC2ajO8XJ # P2REigcOAij5uWVRf4EY7xoqmADP8pXxuOTzw0hyGNLOLNcXoFbfW45WSPoY+YgH # EH1TtC4vMsg/MlO/A3PJr9v+SNqxz32cul3MVrY3PuG4Dzz0riy9GhtFUU37igbj # mR6pP3nSa/f2X4+9B6/UrPjLzusRvc8bvzYqVEnSLABav11npphkYaR9QT1fQUVD # Zw26igXtmLKUcfop/EqShbhblk0ZLYDTj/Lx7X+thC9OCrK1QgF6qAsIUqiS1iHz # vwdktRTCofo4ZIT/OCz5QeriJqDz0B7VJ8/4i/uvm2eq8BUsn2mJuyAGD2XtaONV # rmASrV9VbajdxX5VptjKOOHG6aHtqQlKbyBFog8I4nqVFdjdSMalb++gBMCrPu1A # 1iZPsOOyz/8+ # =BF0c # -----END PGP SIGNATURE----- # gpg: Signature made Wed 20 Apr 2022 05:33:47 AM PDT # gpg: using RSA key CB62D7A0EE3829E45F004D34A1FA40D098019CDF # gpg: issuer "hreitz@redhat.com" # gpg: Good signature from "Hanna Reitz <hreitz@redhat.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: CB62 D7A0 EE38 29E4 5F00 4D34 A1FA 40D0 9801 9CDF * tag 'pull-block-2022-04-20' of https://gitlab.com/hreitz/qemu: qcow2: Add errp to rebuild_refcount_structure() iotests/108: Test new refcount rebuild algorithm qcow2: Improve refcount structure rebuilding iotests/303: Check for zstd support iotests/065: Check for zstd support iotests.py: Add supports_qcow2_zstd_compression() tests/qemu-iotests: Move the bash and sanitizer checks to meson.build tests/qemu-iotests/meson.build: Improve the indentation Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2022-04-20 09:39:33 -07:00 · 2022-04-20 09:39:33 -07:00 · 40a4b96eb0
parent 591e7bb5fd 0423f75351
commit 40a4b96eb0
8 changed files with 691 additions and 185 deletions
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@ -2437,10 +2437,165 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
    return cluster << s->cluster_bits;
 }

+/*
+ * Helper function for rebuild_refcount_structure().
+ *
+ * Scan the range of clusters [first_cluster, end_cluster) for allocated
+ * clusters and write all corresponding refblocks to disk.  The refblock
+ * and allocation data is taken from the in-memory refcount table
+ * *refcount_table[] (of size *nb_clusters), which is basically one big
+ * (unlimited size) refblock for the whole image.
+ *
+ * For these refblocks, clusters are allocated using said in-memory
+ * refcount table.  Care is taken that these allocations are reflected
+ * in the refblocks written to disk.
+ *
+ * The refblocks' offsets are written into a reftable, which is
+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr).  If
+ * that reftable is of insufficient size, it will be resized to fit.
+ * This reftable is not written to disk.
+ *
+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed
+ * to point to existing valid refblocks that do not need to be allocated
+ * again.)
+ *
+ * Return whether the on-disk reftable array was resized (true/false),
+ * or -errno on error.
+ */
+static int rebuild_refcounts_write_refblocks(
+        BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+        int64_t first_cluster, int64_t end_cluster,
+        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
+        Error **errp
+    )
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t cluster;
+    int64_t refblock_offset, refblock_start, refblock_index;
+    int64_t first_free_cluster = 0;
+    uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
+    uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
+    void *on_disk_refblock;
+    bool reftable_grown = false;
+    int ret;
+
+    for (cluster = first_cluster; cluster < end_cluster; cluster++) {
+        /* Check all clusters to find refblocks that contain non-zero entries */
+        if (!s->get_refcount(*refcount_table, cluster)) {
+            continue;
+        }
+
+        /*
+         * This cluster is allocated, so we need to create a refblock
+         * for it.  The data we will write to disk is just the
+         * respective slice from *refcount_table, so it will contain
+         * accurate refcounts for all clusters belonging to this
+         * refblock.  After we have written it, we will therefore skip
+         * all remaining clusters in this refblock.
+         */
+
+        refblock_index = cluster >> s->refcount_block_bits;
+        refblock_start = refblock_index << s->refcount_block_bits;
+
+        if (on_disk_reftable_entries > refblock_index &&
+            on_disk_reftable[refblock_index])
+        {
+            /*
+             * We can get here after a `goto write_refblocks`: We have a
+             * reftable from a previous run, and the refblock is already
+             * allocated.  No need to allocate it again.
+             */
+            refblock_offset = on_disk_reftable[refblock_index];
+        } else {
+            int64_t refblock_cluster_index;
+
+            /* Don't allocate a cluster in a refblock already written to disk */
+            if (first_free_cluster < refblock_start) {
+                first_free_cluster = refblock_start;
+            }
+            refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+                                                  nb_clusters,
+                                                  &first_free_cluster);
+            if (refblock_offset < 0) {
+                error_setg_errno(errp, -refblock_offset,
+                                 "ERROR allocating refblock");
+                return refblock_offset;
+            }
+
+            refblock_cluster_index = refblock_offset / s->cluster_size;
+            if (refblock_cluster_index >= end_cluster) {
+                /*
+                 * We must write the refblock that holds this refblock's
+                 * refcount
+                 */
+                end_cluster = refblock_cluster_index + 1;
+            }
+
+            if (on_disk_reftable_entries <= refblock_index) {
+                on_disk_reftable_entries =
+                    ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+                             s->cluster_size) / REFTABLE_ENTRY_SIZE;
+                on_disk_reftable =
+                    g_try_realloc(on_disk_reftable,
+                                  on_disk_reftable_entries *
+                                  REFTABLE_ENTRY_SIZE);
+                if (!on_disk_reftable) {
+                    error_setg(errp, "ERROR allocating reftable memory");
+                    return -ENOMEM;
+                }
+
+                memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
+                       (on_disk_reftable_entries -
+                        *on_disk_reftable_entries_ptr) *
+                       REFTABLE_ENTRY_SIZE);
+
+                *on_disk_reftable_ptr = on_disk_reftable;
+                *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
+
+                reftable_grown = true;
+            } else {
+                assert(on_disk_reftable);
+            }
+            on_disk_reftable[refblock_index] = refblock_offset;
+        }
+
+        /* Refblock is allocated, write it to disk */
+
+        ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
+                                            s->cluster_size, false);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "ERROR writing refblock");
+            return ret;
+        }
+
+        /*
+         * The refblock is simply a slice of *refcount_table.
+         * Note that the size of *refcount_table is always aligned to
+         * whole clusters, so the write operation will not result in
+         * out-of-bounds accesses.
+         */
+        on_disk_refblock = (void *)((char *) *refcount_table +
+                                    refblock_index * s->cluster_size);
+
+        ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
+                          s->cluster_size);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "ERROR writing refblock");
+            return ret;
+        }
+
+        /* This refblock is done, skip to its end */
+        cluster = refblock_start + s->refcount_block_size - 1;
+    }
+
+    return reftable_grown;
+}
+
 /*
 * Creates a new refcount structure based solely on the in-memory information
- * given through *refcount_table. All necessary allocations will be reflected
- * in that array.
+ * given through *refcount_table (this in-memory information is basically just
+ * the concatenation of all refblocks).  All necessary allocations will be
+ * reflected in that array.
 *
 * On success, the old refcount structure is leaked (it will be covered by the
 * new refcount structure).
@ -2448,15 +2603,18 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
 static int rebuild_refcount_structure(BlockDriverState *bs,
                                      BdrvCheckResult *res,
                                      void **refcount_table,
-                                      int64_t *nb_clusters)
+                                      int64_t *nb_clusters,
+                                      Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
-    int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
-    int64_t refblock_offset, refblock_start, refblock_index;
-    uint32_t reftable_size = 0;
+    int64_t reftable_offset = -1;
+    int64_t reftable_length = 0;
+    int64_t reftable_clusters;
+    int64_t refblock_index;
+    uint32_t on_disk_reftable_entries = 0;
    uint64_t *on_disk_reftable = NULL;
-    void *on_disk_refblock;
    int ret = 0;
+    int reftable_size_changed = 0;
    struct {
        uint64_t reftable_offset;
        uint32_t reftable_clusters;
@ -2464,162 +2622,145 @@ static int rebuild_refcount_structure(BlockDriverState *bs,

    qcow2_cache_empty(bs, s->refcount_block_cache);

-write_refblocks:
-    for (; cluster < *nb_clusters; cluster++) {
-        if (!s->get_refcount(*refcount_table, cluster)) {
-            continue;
-        }
+    /*
+     * For each refblock containing entries, we try to allocate a
+     * cluster (in the in-memory refcount table) and write its offset
+     * into on_disk_reftable[].  We then write the whole refblock to
+     * disk (as a slice of the in-memory refcount table).
+     * This is done by rebuild_refcounts_write_refblocks().
+     *
+     * Once we have scanned all clusters, we try to find space for the
+     * reftable.  This will dirty the in-memory refcount table (i.e.
+     * make it differ from the refblocks we have already written), so we
+     * need to run rebuild_refcounts_write_refblocks() again for the
+     * range of clusters where the reftable has been allocated.
+     *
+     * This second run might make the reftable grow again, in which case
+     * we will need to allocate another space for it, which is why we
+     * repeat all this until the reftable stops growing.
+     *
+     * (This loop will terminate, because with every cluster the
+     * reftable grows, it can accomodate a multitude of more refcounts,
+     * so that at some point this must be able to cover the reftable
+     * and all refblocks describing it.)
+     *
+     * We then convert the reftable to big-endian and write it to disk.
+     *
+     * Note that we never free any reftable allocations.  Doing so would
+     * needlessly complicate the algorithm: The eventual second check
+     * run we do will clean up all leaks we have caused.
+     */

-        refblock_index = cluster >> s->refcount_block_bits;
-        refblock_start = refblock_index << s->refcount_block_bits;
-
-        /* Don't allocate a cluster in a refblock already written to disk */
-        if (first_free_cluster < refblock_start) {
-            first_free_cluster = refblock_start;
-        }
-        refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
-                                              nb_clusters, &first_free_cluster);
-        if (refblock_offset < 0) {
-            fprintf(stderr, "ERROR allocating refblock: %s\n",
-                    strerror(-refblock_offset));
-            res->check_errors++;
-            ret = refblock_offset;
-            goto fail;
-        }
-
-        if (reftable_size <= refblock_index) {
-            uint32_t old_reftable_size = reftable_size;
-            uint64_t *new_on_disk_reftable;
-
-            reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
-                                     s->cluster_size) / REFTABLE_ENTRY_SIZE;
-            new_on_disk_reftable = g_try_realloc(on_disk_reftable,
-                                                 reftable_size *
-                                                 REFTABLE_ENTRY_SIZE);
-            if (!new_on_disk_reftable) {
-                res->check_errors++;
-                ret = -ENOMEM;
-                goto fail;
-            }
-            on_disk_reftable = new_on_disk_reftable;
-
-            memset(on_disk_reftable + old_reftable_size, 0,
-                   (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
-
-            /* The offset we have for the reftable is now no longer valid;
-             * this will leak that range, but we can easily fix that by running
-             * a leak-fixing check after this rebuild operation */
-            reftable_offset = -1;
-        } else {
-            assert(on_disk_reftable);
-        }
-        on_disk_reftable[refblock_index] = refblock_offset;
-
-        /* If this is apparently the last refblock (for now), try to squeeze the
-         * reftable in */
-        if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
-            reftable_offset < 0)
-        {
-            uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
-                                                          REFTABLE_ENTRY_SIZE);
-            reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
-                                                  refcount_table, nb_clusters,
-                                                  &first_free_cluster);
-            if (reftable_offset < 0) {
-                fprintf(stderr, "ERROR allocating reftable: %s\n",
-                        strerror(-reftable_offset));
-                res->check_errors++;
-                ret = reftable_offset;
-                goto fail;
-            }
-        }
-
-        ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
-                                            s->cluster_size, false);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
-            goto fail;
-        }
-
-        /* The size of *refcount_table is always cluster-aligned, therefore the
-         * write operation will not overflow */
-        on_disk_refblock = (void *)((char *) *refcount_table +
-                                    refblock_index * s->cluster_size);
-
-        ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
-                          s->cluster_size);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
-            goto fail;
-        }
-
-        /* Go to the end of this refblock */
-        cluster = refblock_start + s->refcount_block_size - 1;
+    reftable_size_changed =
+        rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                          0, *nb_clusters,
+                                          &on_disk_reftable,
+                                          &on_disk_reftable_entries, errp);
+    if (reftable_size_changed < 0) {
+        res->check_errors++;
+        ret = reftable_size_changed;
+        goto fail;
    }

-    if (reftable_offset < 0) {
-        uint64_t post_refblock_start, reftable_clusters;
+    /*
+     * There was no reftable before, so rebuild_refcounts_write_refblocks()
+     * must have increased its size (from 0 to something).
+     */
+    assert(reftable_size_changed);
+
+    do {
+        int64_t reftable_start_cluster, reftable_end_cluster;
+        int64_t first_free_cluster = 0;
+
+        reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
+        reftable_clusters = size_to_clusters(s, reftable_length);

-        post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
-        reftable_clusters =
-            size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
-        /* Not pretty but simple */
-        if (first_free_cluster < post_refblock_start) {
-            first_free_cluster = post_refblock_start;
-        }
        reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
                                              refcount_table, nb_clusters,
                                              &first_free_cluster);
        if (reftable_offset < 0) {
-            fprintf(stderr, "ERROR allocating reftable: %s\n",
-                    strerror(-reftable_offset));
+            error_setg_errno(errp, -reftable_offset,
+                             "ERROR allocating reftable");
            res->check_errors++;
            ret = reftable_offset;
            goto fail;
        }

-        goto write_refblocks;
-    }
+        /*
+         * We need to update the affected refblocks, so re-run the
+         * write_refblocks loop for the reftable's range of clusters.
+         */
+        assert(offset_into_cluster(s, reftable_offset) == 0);
+        reftable_start_cluster = reftable_offset / s->cluster_size;
+        reftable_end_cluster = reftable_start_cluster + reftable_clusters;
+        reftable_size_changed =
+            rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                              reftable_start_cluster,
+                                              reftable_end_cluster,
+                                              &on_disk_reftable,
+                                              &on_disk_reftable_entries, errp);
+        if (reftable_size_changed < 0) {
+            res->check_errors++;
+            ret = reftable_size_changed;
+            goto fail;
+        }

-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+        /*
+         * If the reftable size has changed, we will need to find a new
+         * allocation, repeating the loop.
+         */
+    } while (reftable_size_changed);
+
+    /* The above loop must have run at least once */
+    assert(reftable_offset >= 0);
+
+    /*
+     * All allocations are done, all refblocks are written, convert the
+     * reftable to big-endian and write it to disk.
+     */
+
+    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+         refblock_index++)
+    {
        cpu_to_be64s(&on_disk_reftable[refblock_index]);
    }

-    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
-                                        reftable_size * REFTABLE_ENTRY_SIZE,
+    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
                                        false);
    if (ret < 0) {
-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR writing reftable");
        goto fail;
    }

-    assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
+    assert(reftable_length < INT_MAX);
    ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
-                      reftable_size * REFTABLE_ENTRY_SIZE);
+                      reftable_length);
    if (ret < 0) {
-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR writing reftable");
        goto fail;
    }

    /* Enter new reftable into the image header */
    reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
    reftable_offset_and_clusters.reftable_clusters =
-        cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
+        cpu_to_be32(reftable_clusters);
    ret = bdrv_pwrite_sync(bs->file,
                           offsetof(QCowHeader, refcount_table_offset),
                           &reftable_offset_and_clusters,
                           sizeof(reftable_offset_and_clusters));
    if (ret < 0) {
-        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR setting reftable");
        goto fail;
    }

-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+         refblock_index++)
+    {
        be64_to_cpus(&on_disk_reftable[refblock_index]);
    }
    s->refcount_table = on_disk_reftable;
    s->refcount_table_offset = reftable_offset;
-    s->refcount_table_size = reftable_size;
+    s->refcount_table_size = on_disk_reftable_entries;
    update_max_refcount_table_index(s);

    return 0;
@ -2676,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
    if (rebuild && (fix & BDRV_FIX_ERRORS)) {
        BdrvCheckResult old_res = *res;
        int fresh_leaks = 0;
+        Error *local_err = NULL;

        fprintf(stderr, "Rebuilding refcount structure\n");
        ret = rebuild_refcount_structure(bs, res, &refcount_table,
-                                         &nb_clusters);
+                                         &nb_clusters, &local_err);
        if (ret < 0) {
+            error_report_err(local_err);
            goto fail;
        }

--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@ -18,36 +18,10 @@ skip() {
    exit 0
 }

-# Disable tests with any sanitizer except for specific ones
-SANITIZE_FLAGS=$( grep "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null )
-ALLOWED_SANITIZE_FLAGS="safe-stack cfi-icall"
-#Remove all occurrencies of allowed Sanitize flags
-for j in ${ALLOWED_SANITIZE_FLAGS}; do
-    TMP_FLAGS=${SANITIZE_FLAGS}
-    SANITIZE_FLAGS=""
-    for i in ${TMP_FLAGS}; do
-        if ! echo ${i} | grep -q "${j}" 2>/dev/null; then
-            SANITIZE_FLAGS="${SANITIZE_FLAGS} ${i}"
-        fi
-    done
-done
-if echo ${SANITIZE_FLAGS} | grep -q "\-fsanitize" 2>/dev/null; then
-    # Have a sanitize flag that is not allowed, stop
-    skip "Sanitizers are enabled ==> Not running the qemu-iotests."
-fi
-
 if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then
    skip "No qemu-system binary available ==> Not running the qemu-iotests."
 fi

-if ! command -v bash >/dev/null 2>&1 ; then
-    skip "bash not available ==> Not running the qemu-iotests."
-fi
-
-if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then
-    skip "bash version too old ==> Not running the qemu-iotests."
-fi
-
 cd tests/qemu-iotests

 # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests
--- a/tests/qemu-iotests/065
+++ b/tests/qemu-iotests/065
@ -24,7 +24,7 @@ import os
 import re
 import json
 import iotests
-from iotests import qemu_img, qemu_img_info
+from iotests import qemu_img, qemu_img_info, supports_qcow2_zstd_compression
 import unittest

 test_img = os.path.join(iotests.test_dir, 'test.img')
@ -95,11 +95,17 @@ class TestQCow2(TestQemuImgInfo):

 class TestQCow3NotLazy(TestQemuImgInfo):
    '''Testing a qcow2 version 3 image with lazy refcounts disabled'''
-    img_options = 'compat=1.1,lazy_refcounts=off,compression_type=zstd'
+    if supports_qcow2_zstd_compression():
+        compression_type = 'zstd'
+    else:
+        compression_type = 'zlib'
+
+    img_options = 'compat=1.1,lazy_refcounts=off'
+    img_options += f',compression_type={compression_type}'
    json_compare = { 'compat': '1.1', 'lazy-refcounts': False,
                     'refcount-bits': 16, 'corrupt': False,
-                     'compression-type': 'zstd', 'extended-l2': False }
-    human_compare = [ 'compat: 1.1', 'compression type: zstd',
+                     'compression-type': compression_type, 'extended-l2': False }
+    human_compare = [ 'compat: 1.1', f'compression type: {compression_type}',
                      'lazy refcounts: false', 'refcount bits: 16',
                      'corrupt: false', 'extended l2: false' ]

@ -126,11 +132,17 @@ class TestQCow3NotLazyQMP(TestQMP):
 class TestQCow3LazyQMP(TestQMP):
    '''Testing a qcow2 version 3 image with lazy refcounts enabled, opening
       with lazy refcounts disabled'''
-    img_options = 'compat=1.1,lazy_refcounts=on,compression_type=zstd'
+    if supports_qcow2_zstd_compression():
+        compression_type = 'zstd'
+    else:
+        compression_type = 'zlib'
+
+    img_options = 'compat=1.1,lazy_refcounts=on'
+    img_options += f',compression_type={compression_type}'
    qemu_options = 'lazy-refcounts=off'
    compare = { 'compat': '1.1', 'lazy-refcounts': True,
                'refcount-bits': 16, 'corrupt': False,
-                'compression-type': 'zstd', 'extended-l2': False }
+                'compression-type': compression_type, 'extended-l2': False }

 TestImageInfoSpecific = None
 TestQemuImgInfo = None
--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
@ -30,13 +30,20 @@ status=1	# failure is the default!

 _cleanup()
 {
-	_cleanup_test_img
+    _cleanup_test_img
+    if [ -f "$TEST_DIR/qsd.pid" ]; then
+        qsd_pid=$(cat "$TEST_DIR/qsd.pid")
+        kill -KILL "$qsd_pid"
+        fusermount -u "$TEST_DIR/fuse-export" &>/dev/null
+    fi
+    rm -f "$TEST_DIR/fuse-export"
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15

 # get standard environment, filters and checks
 . ./common.rc
 . ./common.filter
+. ./common.qemu

 # This tests qcow2-specific low-level functionality
 _supported_fmt qcow2
@ -47,6 +54,22 @@ _supported_os Linux
 # files
 _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file

+# This test either needs sudo -n losetup or FUSE exports to work
+if sudo -n losetup &>/dev/null; then
+    loopdev=true
+else
+    loopdev=false
+
+    # QSD --export fuse will either yield "Parameter 'id' is missing"
+    # or "Invalid parameter 'fuse'", depending on whether there is
+    # FUSE support or not.
+    error=$($QSD --export fuse 2>&1)
+    if [[ $error = *"'fuse'"* ]]; then
+        _notrun 'Passwordless sudo for losetup or FUSE support required, but' \
+                'neither is available'
+    fi
+fi
+
 echo
 echo '=== Repairing an image without any refcount table ==='
 echo
@ -138,6 +161,240 @@ _make_test_img 64M
 poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00"
 _check_test_img -r all

+echo
+echo '=== Check rebuilt reftable location ==='
+
+# In an earlier version of the refcount rebuild algorithm, the
+# reftable was generally placed at the image end (unless something was
+# allocated in the area covered by the refblock right before the image
+# file end, then we would try to place the reftable in that refblock).
+# This was later changed so the reftable would be placed in the
+# earliest possible location.  Test this.
+
+echo
+echo '--- Does the image size increase? ---'
+echo
+
+# First test: Just create some image, write some data to it, and
+# resize it so there is free space at the end of the image (enough
+# that it spans at least one full refblock, which for cluster_size=512
+# images, spans 128k).  With the old algorithm, the reftable would
+# have then been placed at the end of the image file, but with the new
+# one, it will be put in that free space.
+# We want to check whether the size of the image file increases due to
+# rebuilding the refcount structures (it should not).
+
+_make_test_img -o 'cluster_size=512' 1M
+# Write something
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Add free space
+file_len=$(stat -c '%s' "$TEST_IMG")
+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG"
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
+
+# Check whether rebuilding the refcount structures increases the image
+# file size
+file_len=$(stat -c '%s' "$TEST_IMG")
+echo
+# The only leaks there can be are the old refcount structures that are
+# leaked during rebuilding, no need to clutter the output with them
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
+echo
+post_repair_file_len=$(stat -c '%s' "$TEST_IMG")
+
+if [[ $file_len -eq $post_repair_file_len ]]; then
+    echo 'OK: Image size did not change'
+else
+    echo 'ERROR: Image size differs' \
+        "($file_len before, $post_repair_file_len after)"
+fi
+
+echo
+echo '--- Will the reftable occupy a hole specifically left for it?  ---'
+echo
+
+# Note: With cluster_size=512, every refblock covers 128k.
+# The reftable covers 8M per reftable cluster.
+
+# Create an image that requires two reftable clusters (just because
+# this is more interesting than a single-clustered reftable).
+_make_test_img -o 'cluster_size=512' 9M
+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io
+
+# Writing 8M will have resized the reftable.  Unfortunately, doing so
+# will leave holes in the file, so we need to fill them up so we can
+# be sure the whole file is allocated.  Do that by writing
+# consecutively smaller chunks starting from 8 MB, until the file
+# length increases even with a chunk size of 512.  Then we must have
+# filled all holes.
+ofs=$((8 * 1024 * 1024))
+block_len=$((16 * 1024))
+while [[ $block_len -ge 512 ]]; do
+    file_len=$(stat -c '%s' "$TEST_IMG")
+    while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do
+        # Do not include this in the reference output, it does not
+        # really matter which qemu-io calls we do here exactly
+        $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null
+        ofs=$((ofs + block_len))
+    done
+    block_len=$((block_len / 2))
+done
+
+# Fill up to 9M (do not include this in the reference output either,
+# $ofs is random for all we know)
+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null
+
+# Make space as follows:
+# - For the first refblock: Right at the beginning of the image (this
+#   refblock is placed in the first place possible),
+# - For the reftable somewhere soon afterwards, still near the
+#   beginning of the image (i.e. covered by the first refblock); the
+#   reftable too is placed in the first place possible, but only after
+#   all refblocks have been placed)
+# No space is needed for the other refblocks, because no refblock is
+# put before the space it covers.  In this test case, we do not mind
+# if they are placed at the image file's end.
+
+# Before we make that space, we have to find out the host offset of
+# the area that belonged to the two data clusters at guest offset 4k,
+# because we expect the reftable to be placed there, and we will have
+# to verify that it is.
+
+l1_offset=$(peek_file_be "$TEST_IMG" 40 8)
+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8)
+l2_offset=$((l2_offset & 0x00fffffffffffe00))
+data_4k_offset=$(peek_file_be "$TEST_IMG" \
+                 $((l2_offset + 4096 / 512 * 8)) 8)
+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00))
+
+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
+
+echo
+# The only leaks there can be are the old refcount structures that are
+# leaked during rebuilding, no need to clutter the output with them
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
+echo
+
+# Check whether the reftable was put where we expected
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+if [[ $rt_offset -eq $data_4k_offset ]]; then
+    echo 'OK: Reftable is where we expect it'
+else
+    echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset"
+fi
+
+echo
+echo '--- Rebuilding refcount structures on block devices ---'
+echo
+
+# A block device cannot really grow, at least not during qemu-img
+# check.  As mentioned in the above cases, rebuilding the refcount
+# structure may lead to new refcount structures being written after
+# the end of the image, and in the past that happened even if there
+# was more than sufficient space in the image.  Such post-EOF writes
+# will not work on block devices, so test that the new algorithm
+# avoids it.
+
+# If we have passwordless sudo and losetup, we can use those to create
+# a block device.  Otherwise, we can resort to qemu's FUSE export to
+# create a file that isn't growable, which effectively tests the same
+# thing.
+
+_cleanup_test_img
+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG"
+
+if $loopdev; then
+    export_mp=$(sudo -n losetup --show -f "$TEST_IMG")
+    export_mp_driver=host_device
+    sudo -n chmod go+rw "$export_mp"
+else
+    # Create non-growable FUSE export that is a bit like an empty
+    # block device
+    export_mp="$TEST_DIR/fuse-export"
+    export_mp_driver=file
+    touch "$export_mp"
+
+    $QSD \
+        --blockdev file,node-name=export-node,filename="$TEST_IMG" \
+        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
+        --pidfile "$TEST_DIR/qsd.pid" \
+        --daemonize
+fi
+
+# Now create a qcow2 image on the device -- unfortunately, qemu-img
+# create force-creates the file, so we have to resort to the
+# blockdev-create job.
+_launch_qemu \
+    --blockdev $export_mp_driver,node-name=file,filename="$export_mp"
+
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "qmp_capabilities" }' \
+    'return'
+
+# Small cluster size again, so the image needs multiple refblocks
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "blockdev-create",
+       "arguments": {
+           "job-id": "create",
+           "options": {
+               "driver": "qcow2",
+               "file": "file",
+               "size": '$((64 * 1024 * 1024))',
+               "cluster-size": 512
+           } } }' \
+    '"concluded"'
+
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \
+    'return'
+
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "quit" }' \
+    'return'
+
+wait=y _cleanup_qemu
+echo
+
+# Write some data
+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$export_mp" 48 8)
+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8)
+poke_file "$export_mp" $rb_offset "\x00\x00"
+
+# Repairing such a simple case should just work
+# (We used to put the reftable at the end of the image file, which can
+# never work for non-growable devices.)
+echo
+TEST_IMG="$export_mp" _check_test_img -r all \
+    | grep -v '^Repairing cluster.*refcount=1 reference=0'
+
+if $loopdev; then
+    sudo -n losetup -d "$export_mp"
+else
+    qsd_pid=$(cat "$TEST_DIR/qsd.pid")
+    kill -TERM "$qsd_pid"
+    # Wait for process to exit (cannot `wait` because the QSD is daemonized)
+    while [ -f "$TEST_DIR/qsd.pid" ]; do
+        true
+    done
+fi
+
 # success, all done
 echo '*** done'
 rm -f $seq.full
--- a/tests/qemu-iotests/108.out
+++ b/tests/qemu-iotests/108.out
@ -105,6 +105,87 @@ The following inconsistencies were found and repaired:
    0 leaked clusters
    1 corruptions

+Double checking the fixed image now...
+No errors were found on the image.
+
+=== Check rebuilt reftable location ===
+
+--- Does the image size increase? ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+
+OK: Image size did not change
+
+--- Will the reftable occupy a hole specifically left for it?  ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184
+wrote 8388608/8388608 bytes at offset 0
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 1024/1024 bytes at offset 4096
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+
+OK: Reftable is where we expect it
+
+--- Rebuilding refcount structures on block devices ---
+
+{ "execute": "qmp_capabilities" }
+{"return": {}}
+{ "execute": "blockdev-create",
+       "arguments": {
+           "job-id": "create",
+           "options": {
+               "driver": "IMGFMT",
+               "file": "file",
+               "size": 67108864,
+               "cluster-size": 512
+           } } }
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}}
+{ "execute": "job-dismiss", "arguments": { "id": "create" } }
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}}
+{"return": {}}
+{ "execute": "quit" }
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
 Double checking the fixed image now...
 No errors were found on the image.
 *** done
--- a/tests/qemu-iotests/303
+++ b/tests/qemu-iotests/303
@ -21,10 +21,12 @@

 import iotests
 import subprocess
-from iotests import qemu_img_create, qemu_io, file_path, log, filter_qemu_io
+from iotests import qemu_img_create, qemu_io, file_path, log, filter_qemu_io, \
+        verify_qcow2_zstd_compression

 iotests.script_initialize(supported_fmts=['qcow2'],
                          unsupported_imgopts=['refcount_bits', 'compat'])
+verify_qcow2_zstd_compression()

 disk = file_path('disk')
 chunk = 1024 * 1024
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@ -1471,6 +1471,26 @@ def verify_working_luks():
    if not working:
        notrun(reason)

+def supports_qcow2_zstd_compression() -> bool:
+    img_file = f'{test_dir}/qcow2-zstd-test.qcow2'
+    res = qemu_img('create', '-f', 'qcow2', '-o', 'compression_type=zstd',
+                   img_file, '0',
+                   check=False)
+    try:
+        os.remove(img_file)
+    except OSError:
+        pass
+
+    if res.returncode == 1 and \
+            "'compression-type' does not accept value 'zstd'" in res.stdout:
+        return False
+    else:
+        return True
+
+def verify_qcow2_zstd_compression():
+    if not supports_qcow2_zstd_compression():
+        notrun('zstd compression not supported')
+
 def qemu_pipe(*args: str) -> str:
    """
    Run qemu with an option to print something and exit (e.g. a help option).
--- a/tests/qemu-iotests/meson.build
+++ b/tests/qemu-iotests/meson.build
@ -1,30 +1,47 @@
-if have_tools and targetos != 'windows' and not get_option('gprof')
-  qemu_iotests_binaries = [qemu_img, qemu_io, qemu_nbd, qsd]
-  qemu_iotests_env = {'PYTHON': python.full_path()}
-  qemu_iotests_formats = {
-    'qcow2': 'quick',
-    'raw': 'slow',
-    'qed': 'thorough',
-    'vmdk': 'thorough',
-    'vpc': 'thorough'
-  }
-
-  foreach k, v : emulators
-    if k.startswith('qemu-system-')
-      qemu_iotests_binaries += v
-    endif
-  endforeach
-  foreach format, speed: qemu_iotests_formats
-    if speed == 'quick'
-      suites = 'block'
-    else
-      suites = ['block-' + speed, speed]
-    endif
-    test('qemu-iotests ' + format, sh, args: [files('../check-block.sh'), format],
-         depends: qemu_iotests_binaries, env: qemu_iotests_env,
-         protocol: 'tap',
-         suite: suites,
-         timeout: 0,
-         is_parallel: false)
-  endforeach
+if not have_tools or targetos == 'windows' or get_option('gprof')
+  subdir_done()
 endif
+
+foreach cflag: config_host['QEMU_CFLAGS'].split()
+  if cflag.startswith('-fsanitize') and \
+     not cflag.contains('safe-stack') and not cflag.contains('cfi-icall')
+    message('Sanitizers are enabled ==> Disabled the qemu-iotests.')
+    subdir_done()
+  endif
+endforeach
+
+bash = find_program('bash', required: false, version: '>= 4.0')
+if not bash.found()
+  message('bash >= v4.0 not available ==> Disabled the qemu-iotests.')
+  subdir_done()
+endif
+
+qemu_iotests_binaries = [qemu_img, qemu_io, qemu_nbd, qsd]
+qemu_iotests_env = {'PYTHON': python.full_path()}
+qemu_iotests_formats = {
+  'qcow2': 'quick',
+  'raw': 'slow',
+  'qed': 'thorough',
+  'vmdk': 'thorough',
+  'vpc': 'thorough'
+}
+
+foreach k, v : emulators
+  if k.startswith('qemu-system-')
+    qemu_iotests_binaries += v
+  endif
+endforeach
+
+foreach format, speed: qemu_iotests_formats
+  if speed == 'quick'
+    suites = 'block'
+  else
+    suites = ['block-' + speed, speed]
+  endif
+  test('qemu-iotests ' + format, sh, args: [files('../check-block.sh'), format],
+       depends: qemu_iotests_binaries, env: qemu_iotests_env,
+       protocol: 'tap',
+       suite: suites,
+       timeout: 0,
+       is_parallel: false)
+endforeach