diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aeaadeebc1fd..b81c331b28fa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1016,6 +1016,7 @@ struct btrfs_fs_info { /* used to keep from writing metadata until there is a nice batch */ struct percpu_counter dirty_metadata_bytes; struct percpu_counter delalloc_bytes; + struct percpu_counter dio_bytes; s32 dirty_metadata_batch; s32 delalloc_batch; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb26b3239827..663efce22d98 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2633,11 +2633,17 @@ int open_ctree(struct super_block *sb, goto fail; } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); + ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_srcu; } + + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); + if (ret) { + err = ret; + goto fail_dio_bytes; + } fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); @@ -3336,6 +3342,8 @@ fail_delalloc_bytes: percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: percpu_counter_destroy(&fs_info->dirty_metadata_bytes); +fail_dio_bytes: + percpu_counter_destroy(&fs_info->dio_bytes); fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: @@ -4017,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_sum(&fs_info->delalloc_bytes)); } + if (percpu_counter_sum(&fs_info->dio_bytes)) + btrfs_info(fs_info, "at unmount dio bytes count %lld", + percpu_counter_sum(&fs_info->dio_bytes)); + btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_fsid(fs_info->fs_devices); @@ -4048,6 +4060,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); + percpu_counter_destroy(&fs_info->dio_bytes); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); cleanup_srcu_struct(&fs_info->subvol_srcu); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fa09a83a6954..c61cfd0a77ee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4633,6 +4633,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; u64 delalloc_bytes; + u64 dio_bytes; u64 async_pages; u64 items; long time_left; @@ -4648,7 +4649,8 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, delalloc_bytes = percpu_counter_sum_positive( &fs_info->delalloc_bytes); - if (delalloc_bytes == 0) { + dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); + if (delalloc_bytes == 0 && dio_bytes == 0) { if (trans) return; if (wait_ordered) @@ -4656,8 +4658,16 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, return; } + /* + * If we are doing more ordered than delalloc we need to just wait on + * ordered extents, otherwise we'll waste time trying to flush delalloc + * that likely won't give us the space back we need. + */ + if (dio_bytes > delalloc_bytes) + wait_ordered = true; + loops = 0; - while (delalloc_bytes && loops < 3) { + while ((delalloc_bytes || dio_bytes) && loops < 3) { nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; /* @@ -4707,6 +4717,7 @@ skip_async: } delalloc_bytes = percpu_counter_sum_positive( &fs_info->delalloc_bytes); + dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); } } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f6bb6039fa4c..52889da69113 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -195,8 +195,11 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); - if (dio) + if (dio) { + percpu_counter_add_batch(&fs_info->dio_bytes, len, + fs_info->delalloc_batch); set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); + } /* one ref for the tree */ refcount_set(&entry->refs, 1); @@ -468,6 +471,10 @@ void btrfs_remove_ordered_extent(struct inode *inode, if (root != fs_info->tree_root) btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false); + if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) + percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len, + fs_info->delalloc_batch); + tree = &btrfs_inode->ordered_tree; spin_lock_irq(&tree->lock); node = &entry->rb_node;