summarylogtreecommitdiffstats
path: root/0016-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch
blob: 6f96726231d1681e5d31633bd670e0c5479f308f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Sun, 19 Apr 2020 19:59:18 -0700
Subject: [PATCH] ZEN: mm: Stop kswapd early when nothing's waiting for it to
 free pages

Contains:
  - mm: Stop kswapd early when nothing's waiting for it to free pages

    Keeping kswapd running when all the failed allocations that invoked it
    are satisfied incurs a high overhead due to unnecessary page eviction
    and writeback, as well as spurious VM pressure events to various
    registered shrinkers. When kswapd doesn't need to work to make an
    allocation succeed anymore, stop it prematurely to save resources.

    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>

  - mm: Don't stop kswapd on a per-node basis when there are no waiters

    The page allocator wakes all kswapds in an allocation context's allowed
    nodemask in the slow path, so it doesn't make sense to have the kswapd-
    waiter count per each NUMA node. Instead, it should be a global counter
    to stop all kswapds when there are no failed allocation requests.

    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
 mm/internal.h   |  1 +
 mm/page_alloc.c | 17 ++++++++++++++---
 mm/vmscan.c     |  3 ++-
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index cf16280ce132187e5652c5d9088bac716f879a27..b9ad231c84fa97fb927c3f38d899657c9a7ed2f3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -256,6 +256,7 @@ extern void prep_compound_page(struct page *page, unsigned int order);
 extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
+extern atomic_long_t kswapd_waiters;
 
 extern void free_unref_page(struct page *page, unsigned int order);
 extern void free_unref_page_list(struct list_head *list);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fa30891058e1e1c7cc262560736c45fed66fd377..7e43b957aa2dbbdc20586848d51dd03e785a269b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -121,6 +121,8 @@ typedef int __bitwise fpi_t;
  */
 #define FPI_SKIP_KASAN_POISON	((__force fpi_t)BIT(2))
 
+atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);
+
 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
 static DEFINE_MUTEX(pcp_batch_high_lock);
 #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
@@ -4897,6 +4899,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	int no_progress_loops;
 	unsigned int cpuset_mems_cookie;
 	int reserve_flags;
+	bool woke_kswapd = false;
 
 	/*
 	 * We also sanity check to catch abuse of atomic reserves being used by
@@ -4943,8 +4946,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 			goto nopage;
 	}
 
-	if (alloc_flags & ALLOC_KSWAPD)
+	if (alloc_flags & ALLOC_KSWAPD) {
+		if (!woke_kswapd) {
+			atomic_long_inc(&kswapd_waiters);
+			woke_kswapd = true;
+		}
 		wake_all_kswapds(order, gfp_mask, ac);
+	}
 
 	/*
 	 * The adjusted alloc_flags might result in immediate success, so try
@@ -5149,9 +5157,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto retry;
 	}
 fail:
-	warn_alloc(gfp_mask, ac->nodemask,
-			"page allocation failure: order:%u", order);
 got_pg:
+	if (woke_kswapd)
+		atomic_long_dec(&kswapd_waiters);
+	if (!page)
+		warn_alloc(gfp_mask, ac->nodemask,
+				"page allocation failure: order:%u", order);
 	return page;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1678802e03e78577c3366afeed00f23fdaa95aca..48652f0523ac6e334522e0989e09d3ec2e80056b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4204,7 +4204,8 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 		__fs_reclaim_release(_THIS_IP_);
 		ret = try_to_freeze();
 		__fs_reclaim_acquire(_THIS_IP_);
-		if (ret || kthread_should_stop())
+		if (ret || kthread_should_stop() ||
+		    !atomic_long_read(&kswapd_waiters))
 			break;
 
 		/*