666d644cd7
Filesystems are occasionally being shut down with this error: xfs_trans_ail_delete_bulk: attempting to delete a log item that is not in the AIL. It was diagnosed to be related to the EFI/EFD commit order when the EFI and EFD are in different checkpoints and the EFD is committed before the EFI here: http://oss.sgi.com/archives/xfs/2013-01/msg00082.html The real problem is that a single bit cannot fully describe the states that the EFI/EFD processing can be in. These completion states are: EFI EFI in AIL EFD Result committed/unpinned Yes committed OK committed/pinned No committed Shutdown uncommitted No committed Shutdown Note that the "result" field is what should happen, not what does happen. The current logic is broken and handles the first two cases correctly by luck. That is, the code will free the EFI if the XFS_EFI_COMMITTED bit is *not* set, rather than if it is set. The inverted logic "works" because if both EFI and EFD are committed, then the first __xfs_efi_release() call clears the XFS_EFI_COMMITTED bit, and the second frees the EFI item. Hence as long as xfs_efi_item_committed() has been called, everything appears to be fine. It is the third case where the logic fails - where xfs_efd_item_committed() is called before xfs_efi_item_committed(), and that results in the EFI being freed before it has been committed. That is the bug that triggered the shutdown, and hence keeping track of whether the EFI has been committed or not is insufficient to correctly order the EFI/EFD operations w.r.t. the AIL. What we really want is this: the EFI is always placed into the AIL before the last reference goes away. The only way to guarantee that is that the EFI is not freed until after it has been unpinned *and* the EFD has been committed. That is, restructure the logic so that the only case that can occur is the first case. This can be done easily by replacing the XFS_EFI_COMMITTED with an EFI reference count. The EFI is initialised with it's own count, and that is not released until it is unpinned. However, there is a complication to this method - the high level EFI/EFD code in xfs_bmap_finish() does not hold direct references to the EFI structure, and runs a transaction commit between the EFI and EFD processing. Hence the EFI can be freed even before the EFD is created using such a method. Further, log recovery uses the AIL for tracking EFI/EFDs that need to be recovered, but it uses the AIL *differently* to the EFI transaction commit. Hence log recovery never pins or unpins EFIs, so we can't drop the EFI reference count indirectly to free the EFI. However, this doesn't prevent us from using a reference count here. There is a 1:1 relationship between EFIs and EFDs, so when we initialise the EFI we can take a reference count for the EFD as well. This solves the xfs_bmap_finish() issue - the EFI will never be freed until the EFD is processed. In terms of log recovery, during the committing of the EFD we can look for the XFS_EFI_RECOVERED bit being set and drop the EFI reference as well, thereby ensuring everything works correctly there as well. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
166 lines
5.3 KiB
C
166 lines
5.3 KiB
C
/*
|
|
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#ifndef __XFS_EXTFREE_ITEM_H__
|
|
#define __XFS_EXTFREE_ITEM_H__
|
|
|
|
struct xfs_mount;
|
|
struct kmem_zone;
|
|
|
|
typedef struct xfs_extent {
|
|
xfs_dfsbno_t ext_start;
|
|
xfs_extlen_t ext_len;
|
|
} xfs_extent_t;
|
|
|
|
/*
|
|
* Since an xfs_extent_t has types (start:64, len: 32)
|
|
* there are different alignments on 32 bit and 64 bit kernels.
|
|
* So we provide the different variants for use by a
|
|
* conversion routine.
|
|
*/
|
|
|
|
typedef struct xfs_extent_32 {
|
|
__uint64_t ext_start;
|
|
__uint32_t ext_len;
|
|
} __attribute__((packed)) xfs_extent_32_t;
|
|
|
|
typedef struct xfs_extent_64 {
|
|
__uint64_t ext_start;
|
|
__uint32_t ext_len;
|
|
__uint32_t ext_pad;
|
|
} xfs_extent_64_t;
|
|
|
|
/*
|
|
* This is the structure used to lay out an efi log item in the
|
|
* log. The efi_extents field is a variable size array whose
|
|
* size is given by efi_nextents.
|
|
*/
|
|
typedef struct xfs_efi_log_format {
|
|
__uint16_t efi_type; /* efi log item type */
|
|
__uint16_t efi_size; /* size of this item */
|
|
__uint32_t efi_nextents; /* # extents to free */
|
|
__uint64_t efi_id; /* efi identifier */
|
|
xfs_extent_t efi_extents[1]; /* array of extents to free */
|
|
} xfs_efi_log_format_t;
|
|
|
|
typedef struct xfs_efi_log_format_32 {
|
|
__uint16_t efi_type; /* efi log item type */
|
|
__uint16_t efi_size; /* size of this item */
|
|
__uint32_t efi_nextents; /* # extents to free */
|
|
__uint64_t efi_id; /* efi identifier */
|
|
xfs_extent_32_t efi_extents[1]; /* array of extents to free */
|
|
} __attribute__((packed)) xfs_efi_log_format_32_t;
|
|
|
|
typedef struct xfs_efi_log_format_64 {
|
|
__uint16_t efi_type; /* efi log item type */
|
|
__uint16_t efi_size; /* size of this item */
|
|
__uint32_t efi_nextents; /* # extents to free */
|
|
__uint64_t efi_id; /* efi identifier */
|
|
xfs_extent_64_t efi_extents[1]; /* array of extents to free */
|
|
} xfs_efi_log_format_64_t;
|
|
|
|
/*
|
|
* This is the structure used to lay out an efd log item in the
|
|
* log. The efd_extents array is a variable size array whose
|
|
* size is given by efd_nextents;
|
|
*/
|
|
typedef struct xfs_efd_log_format {
|
|
__uint16_t efd_type; /* efd log item type */
|
|
__uint16_t efd_size; /* size of this item */
|
|
__uint32_t efd_nextents; /* # of extents freed */
|
|
__uint64_t efd_efi_id; /* id of corresponding efi */
|
|
xfs_extent_t efd_extents[1]; /* array of extents freed */
|
|
} xfs_efd_log_format_t;
|
|
|
|
typedef struct xfs_efd_log_format_32 {
|
|
__uint16_t efd_type; /* efd log item type */
|
|
__uint16_t efd_size; /* size of this item */
|
|
__uint32_t efd_nextents; /* # of extents freed */
|
|
__uint64_t efd_efi_id; /* id of corresponding efi */
|
|
xfs_extent_32_t efd_extents[1]; /* array of extents freed */
|
|
} __attribute__((packed)) xfs_efd_log_format_32_t;
|
|
|
|
typedef struct xfs_efd_log_format_64 {
|
|
__uint16_t efd_type; /* efd log item type */
|
|
__uint16_t efd_size; /* size of this item */
|
|
__uint32_t efd_nextents; /* # of extents freed */
|
|
__uint64_t efd_efi_id; /* id of corresponding efi */
|
|
xfs_extent_64_t efd_extents[1]; /* array of extents freed */
|
|
} xfs_efd_log_format_64_t;
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
* Max number of extents in fast allocation path.
|
|
*/
|
|
#define XFS_EFI_MAX_FAST_EXTENTS 16
|
|
|
|
/*
|
|
* Define EFI flag bits. Manipulated by set/clear/test_bit operators.
|
|
*/
|
|
#define XFS_EFI_RECOVERED 1
|
|
|
|
/*
|
|
* This is the "extent free intention" log item. It is used to log the fact
|
|
* that some extents need to be free. It is used in conjunction with the
|
|
* "extent free done" log item described below.
|
|
*
|
|
* The EFI is reference counted so that it is not freed prior to both the EFI
|
|
* and EFD being committed and unpinned. This ensures that when the last
|
|
* reference goes away the EFI will always be in the AIL as it has been
|
|
* unpinned, regardless of whether the EFD is processed before or after the EFI.
|
|
*/
|
|
typedef struct xfs_efi_log_item {
|
|
xfs_log_item_t efi_item;
|
|
atomic_t efi_refcount;
|
|
atomic_t efi_next_extent;
|
|
unsigned long efi_flags; /* misc flags */
|
|
xfs_efi_log_format_t efi_format;
|
|
} xfs_efi_log_item_t;
|
|
|
|
/*
|
|
* This is the "extent free done" log item. It is used to log
|
|
* the fact that some extents earlier mentioned in an efi item
|
|
* have been freed.
|
|
*/
|
|
typedef struct xfs_efd_log_item {
|
|
xfs_log_item_t efd_item;
|
|
xfs_efi_log_item_t *efd_efip;
|
|
uint efd_next_extent;
|
|
xfs_efd_log_format_t efd_format;
|
|
} xfs_efd_log_item_t;
|
|
|
|
/*
|
|
* Max number of extents in fast allocation path.
|
|
*/
|
|
#define XFS_EFD_MAX_FAST_EXTENTS 16
|
|
|
|
extern struct kmem_zone *xfs_efi_zone;
|
|
extern struct kmem_zone *xfs_efd_zone;
|
|
|
|
xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint);
|
|
xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
|
|
uint);
|
|
int xfs_efi_copy_format(xfs_log_iovec_t *buf,
|
|
xfs_efi_log_format_t *dst_efi_fmt);
|
|
void xfs_efi_item_free(xfs_efi_log_item_t *);
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* __XFS_EXTFREE_ITEM_H__ */
|