summarylogtreecommitdiffstats
path: root/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch
diff options
context:
space:
mode:
Diffstat (limited to '0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch')
-rw-r--r--0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch255
1 files changed, 0 insertions, 255 deletions
diff --git a/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch b/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch
deleted file mode 100644
index 493337c64d42..000000000000
--- a/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch
+++ /dev/null
@@ -1,255 +0,0 @@
-From 8da079307d115705e243d226591dcb4388cef7e2 Mon Sep 17 00:00:00 2001
-From: Josef Bacik <josef@toxicpanda.com>
-Date: Mon, 31 Jul 2023 16:28:43 -0400
-Subject: [PATCH 5/8] btrfs: wait on uncached block groups on every allocation
- loop
-
-My initial fix for the generic/475 hangs was related to metadata, but
-our CI testing uncovered another case where we hang for similar reasons.
-We again have a task with a plug that is holding an outstanding request
-that is keeping the dm device from finishing it's suspend, and that task
-is stuck in the allocator.
-
-This time it is stuck trying to allocate data, but we do not have a
-block group that matches the size class. The larger loop in the
-allocator looks like this (simplified of course)
-
- find_free_extent
- for_each_block_group {
- ffe_ctl->cached == btrfs_block_group_cache_done(bg)
- if (!ffe_ctl->cached)
- ffe_ctl->have_caching_bg = true;
- do_allocation()
- btrfs_wait_block_group_cache_progress();
- }
-
- if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
- go search again;
-
-In my earlier fix we were trying to allocate from the block group, but
-we weren't waiting for the progress because we were only waiting for the
-free space to be >= the amount of free space we wanted. My fix made it
-so we waited for forward progress to be made as well, so we would be
-sure to wait.
-
-This time however we did not have a block group that matched our size
-class, so what was happening was this
-
- find_free_extent
- for_each_block_group {
- ffe_ctl->cached == btrfs_block_group_cache_done(bg)
- if (!ffe_ctl->cached)
- ffe_ctl->have_caching_bg = true;
- if (size_class_doesn't_match())
- goto loop;
- do_allocation()
- btrfs_wait_block_group_cache_progress();
- loop:
- release_block_group(block_group);
- }
-
- if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
- go search again;
-
-The size_class_doesn't_match() part was true, so we'd just skip this
-block group and never wait for caching, and then because we found a
-caching block group we'd just go back and do the loop again. We never
-sleep and thus never flush the plug and we have the same deadlock.
-
-Fix the logic for waiting on the block group caching to instead do it
-unconditionally when we goto loop. This takes the logic out of the
-allocation step, so now the loop looks more like this
-
- find_free_extent
- for_each_block_group {
- ffe_ctl->cached == btrfs_block_group_cache_done(bg)
- if (!ffe_ctl->cached)
- ffe_ctl->have_caching_bg = true;
- if (size_class_doesn't_match())
- goto loop;
- do_allocation()
- btrfs_wait_block_group_cache_progress();
- loop:
- if (loop > LOOP_CACHING_NOWAIT && !ffe_ctl->retry_uncached &&
- !ffe_ctl->cached) {
- ffe_ctl->retry_uncached = true;
- btrfs_wait_block_group_cache_progress();
- }
-
- release_block_group(block_group);
- }
-
- if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
- go search again;
-
-This simplifies the logic a lot, and makes sure that if we're hitting
-uncached block groups we're always waiting on them at some point.
-
-I ran this through 100 iterations of generic/475, as this particular
-case was harder to hit than the previous one.
-
-Signed-off-by: Josef Bacik <josef@toxicpanda.com>
-Signed-off-by: David Sterba <dsterba@suse.com>
----
- fs/btrfs/extent-tree.c | 61 +++++++++++++-----------------------------
- fs/btrfs/extent-tree.h | 13 +++------
- 2 files changed, 22 insertions(+), 52 deletions(-)
-
-diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
-index f396a9afa4032..6096bd98e6c70 100644
---- a/fs/btrfs/extent-tree.c
-+++ b/fs/btrfs/extent-tree.c
-@@ -3427,7 +3427,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
- * Helper function for find_free_extent().
- *
- * Return -ENOENT to inform caller that we need fallback to unclustered mode.
-- * Return -EAGAIN to inform caller that we need to re-search this block group
- * Return >0 to inform caller that we find nothing
- * Return 0 means we have found a location and set ffe_ctl->found_offset.
- */
-@@ -3508,14 +3507,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
- trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
- return 0;
- }
-- } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
-- !ffe_ctl->retry_clustered) {
-- spin_unlock(&last_ptr->refill_lock);
--
-- ffe_ctl->retry_clustered = true;
-- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
-- ffe_ctl->empty_cluster + ffe_ctl->empty_size);
-- return -EAGAIN;
- }
- /*
- * At this point we either didn't find a cluster or we weren't able to
-@@ -3530,7 +3521,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
- /*
- * Return >0 to inform caller that we find nothing
- * Return 0 when we found an free extent and set ffe_ctrl->found_offset
-- * Return -EAGAIN to inform caller that we need to re-search this block group
- */
- static int find_free_extent_unclustered(struct btrfs_block_group *bg,
- struct find_free_extent_ctl *ffe_ctl)
-@@ -3568,25 +3558,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
- offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
- ffe_ctl->num_bytes, ffe_ctl->empty_size,
- &ffe_ctl->max_extent_size);
--
-- /*
-- * If we didn't find a chunk, and we haven't failed on this block group
-- * before, and this block group is in the middle of caching and we are
-- * ok with waiting, then go ahead and wait for progress to be made, and
-- * set @retry_unclustered to true.
-- *
-- * If @retry_unclustered is true then we've already waited on this
-- * block group once and should move on to the next block group.
-- */
-- if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
-- ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
-- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
-- ffe_ctl->empty_size);
-- ffe_ctl->retry_unclustered = true;
-- return -EAGAIN;
-- } else if (!offset) {
-+ if (!offset)
- return 1;
-- }
- ffe_ctl->found_offset = offset;
- return 0;
- }
-@@ -3600,7 +3573,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
- /* We want to try and use the cluster allocator, so lets look there */
- if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
- ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
-- if (ret >= 0 || ret == -EAGAIN)
-+ if (ret >= 0)
- return ret;
- /* ret == -ENOENT case falls through */
- }
-@@ -3816,8 +3789,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
- {
- switch (ffe_ctl->policy) {
- case BTRFS_EXTENT_ALLOC_CLUSTERED:
-- ffe_ctl->retry_clustered = false;
-- ffe_ctl->retry_unclustered = false;
-+ ffe_ctl->retry_uncached = false;
- break;
- case BTRFS_EXTENT_ALLOC_ZONED:
- /* Nothing to do */
-@@ -4168,9 +4140,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
- ffe_ctl->orig_have_caching_bg = false;
- ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
- ffe_ctl->loop = 0;
-- /* For clustered allocation */
-- ffe_ctl->retry_clustered = false;
-- ffe_ctl->retry_unclustered = false;
-+ ffe_ctl->retry_uncached = false;
- ffe_ctl->cached = 0;
- ffe_ctl->max_extent_size = 0;
- ffe_ctl->total_free_space = 0;
-@@ -4321,16 +4291,12 @@ static noinline int find_free_extent(struct btrfs_root *root,
-
- bg_ret = NULL;
- ret = do_allocation(block_group, ffe_ctl, &bg_ret);
-- if (ret == 0) {
-- if (bg_ret && bg_ret != block_group) {
-- btrfs_release_block_group(block_group,
-- ffe_ctl->delalloc);
-- block_group = bg_ret;
-- }
-- } else if (ret == -EAGAIN) {
-- goto have_block_group;
-- } else if (ret > 0) {
-+ if (ret > 0)
- goto loop;
-+
-+ if (bg_ret && bg_ret != block_group) {
-+ btrfs_release_block_group(block_group, ffe_ctl->delalloc);
-+ block_group = bg_ret;
- }
-
- /* Checks */
-@@ -4371,6 +4337,15 @@ static noinline int find_free_extent(struct btrfs_root *root,
- btrfs_release_block_group(block_group, ffe_ctl->delalloc);
- break;
- loop:
-+ if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
-+ !ffe_ctl->retry_uncached) {
-+ ffe_ctl->retry_uncached = true;
-+ btrfs_wait_block_group_cache_progress(block_group,
-+ ffe_ctl->num_bytes +
-+ ffe_ctl->empty_cluster +
-+ ffe_ctl->empty_size);
-+ goto have_block_group;
-+ }
- release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
- cond_resched();
- }
-diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
-index 429d5c5700618..6bfba2f22fdd4 100644
---- a/fs/btrfs/extent-tree.h
-+++ b/fs/btrfs/extent-tree.h
-@@ -48,16 +48,11 @@ struct find_free_extent_ctl {
- int loop;
-
- /*
-- * Whether we're refilling a cluster, if true we need to re-search
-- * current block group but don't try to refill the cluster again.
-+ * Set to true if we're retrying the allocation on this block group
-+ * after waiting for caching progress, this is so that we retry only
-+ * once before moving on to another block group.
- */
-- bool retry_clustered;
--
-- /*
-- * Whether we're updating free space cache, if true we need to re-search
-- * current block group but don't try updating free space cache again.
-- */
-- bool retry_unclustered;
-+ bool retry_uncached;
-
- /* If current block group is cached */
- int cached;
---
-2.41.0
-