diff options
Diffstat (limited to '0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch')
-rw-r--r-- | 0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch | 255 |
1 files changed, 0 insertions, 255 deletions
diff --git a/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch b/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch deleted file mode 100644 index 493337c64d42..000000000000 --- a/0005-btrfs-wait-on-uncached-block-groups-on-every-allocat.patch +++ /dev/null @@ -1,255 +0,0 @@ -From 8da079307d115705e243d226591dcb4388cef7e2 Mon Sep 17 00:00:00 2001 -From: Josef Bacik <josef@toxicpanda.com> -Date: Mon, 31 Jul 2023 16:28:43 -0400 -Subject: [PATCH 5/8] btrfs: wait on uncached block groups on every allocation - loop - -My initial fix for the generic/475 hangs was related to metadata, but -our CI testing uncovered another case where we hang for similar reasons. -We again have a task with a plug that is holding an outstanding request -that is keeping the dm device from finishing it's suspend, and that task -is stuck in the allocator. - -This time it is stuck trying to allocate data, but we do not have a -block group that matches the size class. The larger loop in the -allocator looks like this (simplified of course) - - find_free_extent - for_each_block_group { - ffe_ctl->cached == btrfs_block_group_cache_done(bg) - if (!ffe_ctl->cached) - ffe_ctl->have_caching_bg = true; - do_allocation() - btrfs_wait_block_group_cache_progress(); - } - - if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) - go search again; - -In my earlier fix we were trying to allocate from the block group, but -we weren't waiting for the progress because we were only waiting for the -free space to be >= the amount of free space we wanted. My fix made it -so we waited for forward progress to be made as well, so we would be -sure to wait. - -This time however we did not have a block group that matched our size -class, so what was happening was this - - find_free_extent - for_each_block_group { - ffe_ctl->cached == btrfs_block_group_cache_done(bg) - if (!ffe_ctl->cached) - ffe_ctl->have_caching_bg = true; - if (size_class_doesn't_match()) - goto loop; - do_allocation() - btrfs_wait_block_group_cache_progress(); - loop: - release_block_group(block_group); - } - - if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) - go search again; - -The size_class_doesn't_match() part was true, so we'd just skip this -block group and never wait for caching, and then because we found a -caching block group we'd just go back and do the loop again. We never -sleep and thus never flush the plug and we have the same deadlock. - -Fix the logic for waiting on the block group caching to instead do it -unconditionally when we goto loop. This takes the logic out of the -allocation step, so now the loop looks more like this - - find_free_extent - for_each_block_group { - ffe_ctl->cached == btrfs_block_group_cache_done(bg) - if (!ffe_ctl->cached) - ffe_ctl->have_caching_bg = true; - if (size_class_doesn't_match()) - goto loop; - do_allocation() - btrfs_wait_block_group_cache_progress(); - loop: - if (loop > LOOP_CACHING_NOWAIT && !ffe_ctl->retry_uncached && - !ffe_ctl->cached) { - ffe_ctl->retry_uncached = true; - btrfs_wait_block_group_cache_progress(); - } - - release_block_group(block_group); - } - - if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) - go search again; - -This simplifies the logic a lot, and makes sure that if we're hitting -uncached block groups we're always waiting on them at some point. - -I ran this through 100 iterations of generic/475, as this particular -case was harder to hit than the previous one. - -Signed-off-by: Josef Bacik <josef@toxicpanda.com> -Signed-off-by: David Sterba <dsterba@suse.com> ---- - fs/btrfs/extent-tree.c | 61 +++++++++++++----------------------------- - fs/btrfs/extent-tree.h | 13 +++------ - 2 files changed, 22 insertions(+), 52 deletions(-) - -diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c -index f396a9afa4032..6096bd98e6c70 100644 ---- a/fs/btrfs/extent-tree.c -+++ b/fs/btrfs/extent-tree.c -@@ -3427,7 +3427,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache, - * Helper function for find_free_extent(). - * - * Return -ENOENT to inform caller that we need fallback to unclustered mode. -- * Return -EAGAIN to inform caller that we need to re-search this block group - * Return >0 to inform caller that we find nothing - * Return 0 means we have found a location and set ffe_ctl->found_offset. - */ -@@ -3508,14 +3507,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg, - trace_btrfs_reserve_extent_cluster(bg, ffe_ctl); - return 0; - } -- } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT && -- !ffe_ctl->retry_clustered) { -- spin_unlock(&last_ptr->refill_lock); -- -- ffe_ctl->retry_clustered = true; -- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + -- ffe_ctl->empty_cluster + ffe_ctl->empty_size); -- return -EAGAIN; - } - /* - * At this point we either didn't find a cluster or we weren't able to -@@ -3530,7 +3521,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg, - /* - * Return >0 to inform caller that we find nothing - * Return 0 when we found an free extent and set ffe_ctrl->found_offset -- * Return -EAGAIN to inform caller that we need to re-search this block group - */ - static int find_free_extent_unclustered(struct btrfs_block_group *bg, - struct find_free_extent_ctl *ffe_ctl) -@@ -3568,25 +3558,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg, - offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start, - ffe_ctl->num_bytes, ffe_ctl->empty_size, - &ffe_ctl->max_extent_size); -- -- /* -- * If we didn't find a chunk, and we haven't failed on this block group -- * before, and this block group is in the middle of caching and we are -- * ok with waiting, then go ahead and wait for progress to be made, and -- * set @retry_unclustered to true. -- * -- * If @retry_unclustered is true then we've already waited on this -- * block group once and should move on to the next block group. -- */ -- if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached && -- ffe_ctl->loop > LOOP_CACHING_NOWAIT) { -- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + -- ffe_ctl->empty_size); -- ffe_ctl->retry_unclustered = true; -- return -EAGAIN; -- } else if (!offset) { -+ if (!offset) - return 1; -- } - ffe_ctl->found_offset = offset; - return 0; - } -@@ -3600,7 +3573,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group, - /* We want to try and use the cluster allocator, so lets look there */ - if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) { - ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret); -- if (ret >= 0 || ret == -EAGAIN) -+ if (ret >= 0) - return ret; - /* ret == -ENOENT case falls through */ - } -@@ -3816,8 +3789,7 @@ static void release_block_group(struct btrfs_block_group *block_group, - { - switch (ffe_ctl->policy) { - case BTRFS_EXTENT_ALLOC_CLUSTERED: -- ffe_ctl->retry_clustered = false; -- ffe_ctl->retry_unclustered = false; -+ ffe_ctl->retry_uncached = false; - break; - case BTRFS_EXTENT_ALLOC_ZONED: - /* Nothing to do */ -@@ -4168,9 +4140,7 @@ static noinline int find_free_extent(struct btrfs_root *root, - ffe_ctl->orig_have_caching_bg = false; - ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags); - ffe_ctl->loop = 0; -- /* For clustered allocation */ -- ffe_ctl->retry_clustered = false; -- ffe_ctl->retry_unclustered = false; -+ ffe_ctl->retry_uncached = false; - ffe_ctl->cached = 0; - ffe_ctl->max_extent_size = 0; - ffe_ctl->total_free_space = 0; -@@ -4321,16 +4291,12 @@ static noinline int find_free_extent(struct btrfs_root *root, - - bg_ret = NULL; - ret = do_allocation(block_group, ffe_ctl, &bg_ret); -- if (ret == 0) { -- if (bg_ret && bg_ret != block_group) { -- btrfs_release_block_group(block_group, -- ffe_ctl->delalloc); -- block_group = bg_ret; -- } -- } else if (ret == -EAGAIN) { -- goto have_block_group; -- } else if (ret > 0) { -+ if (ret > 0) - goto loop; -+ -+ if (bg_ret && bg_ret != block_group) { -+ btrfs_release_block_group(block_group, ffe_ctl->delalloc); -+ block_group = bg_ret; - } - - /* Checks */ -@@ -4371,6 +4337,15 @@ static noinline int find_free_extent(struct btrfs_root *root, - btrfs_release_block_group(block_group, ffe_ctl->delalloc); - break; - loop: -+ if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT && -+ !ffe_ctl->retry_uncached) { -+ ffe_ctl->retry_uncached = true; -+ btrfs_wait_block_group_cache_progress(block_group, -+ ffe_ctl->num_bytes + -+ ffe_ctl->empty_cluster + -+ ffe_ctl->empty_size); -+ goto have_block_group; -+ } - release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc); - cond_resched(); - } -diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h -index 429d5c5700618..6bfba2f22fdd4 100644 ---- a/fs/btrfs/extent-tree.h -+++ b/fs/btrfs/extent-tree.h -@@ -48,16 +48,11 @@ struct find_free_extent_ctl { - int loop; - - /* -- * Whether we're refilling a cluster, if true we need to re-search -- * current block group but don't try to refill the cluster again. -+ * Set to true if we're retrying the allocation on this block group -+ * after waiting for caching progress, this is so that we retry only -+ * once before moving on to another block group. - */ -- bool retry_clustered; -- -- /* -- * Whether we're updating free space cache, if true we need to re-search -- * current block group but don't try updating free space cache again. -- */ -- bool retry_unclustered; -+ bool retry_uncached; - - /* If current block group is cached */ - int cached; --- -2.41.0 - |