0018-ZEN-mm-Don-t-hog-the-CPU-and-zone-lock-in-rmqueue_bu.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()

There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.

With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
 mm/page_alloc.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d86f3da635751e00a5ca5f9c386415523d2ff40a..2cdff4eafea54a2ca8ad58549746148e1390e031 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3036,15 +3036,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 }
 
 /*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency.  Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
  */
 static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list,
 			int migratetype, unsigned int alloc_flags)
 {
-	int i, allocated = 0;
+	const bool can_resched = !preempt_count() && !irqs_disabled();
+	int i, allocated = 0, last_mod = 0;
 
 	/*
 	 * local_lock_irq held so equivalent to spin_lock_irqsave for
@@ -3057,6 +3058,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 		if (unlikely(page == NULL))
 			break;
 
+		/* Reschedule and ease the contention on the lock if needed */
+		if (i + 1 < count && ((can_resched && need_resched()) ||
+				      spin_needbreak(&zone->lock))) {
+			__mod_zone_page_state(zone, NR_FREE_PAGES,
+					      -((i + 1 - last_mod) << order));
+			last_mod = i + 1;
+			spin_unlock(&zone->lock);
+			if (can_resched)
+				cond_resched();
+			spin_lock(&zone->lock);
+		}
+
 		if (unlikely(check_pcp_refill(page, order)))
 			continue;
 
@@ -3083,7 +3096,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 	 * on i. Do not confuse with 'allocated' which is the number of
 	 * pages added to the pcp list.
 	 */
-	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+	__mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
 	spin_unlock(&zone->lock);
 	return allocated;
 }