summarylogtreecommitdiffstats
path: root/interactivity_levels.patch
blob: b3920791735d6b3c3aea69bdf264c212a3d15b4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
commit 52bbe1d1b5977d116aebca9902073030c1eb8f66
Author: Hamad Marri <hamad.s.almarri@gmail.com>
Date:   Thu May 6 16:07:35 2021 +0300

    added interactivity levels which are used to avoid fake interactive tasks. Fake interactive task are detected by the number of forks of their parent in a period of time. Introduced new sysctls: sched_nr_fork_threshold which is the max number of forks in period of time to consider all children fake interactive. This happnes for example when run make -j4 where make creates new threads for each job. All make children/threads are fake interactive, therefore, they are (any fake interactive tasks) put in lower interactivity regions based on how often this task is becoming fake interactive in a period of time. When a child exits or sleep, then the number of forks and the fake interactive level are reduced by 1. Also, when a maximum slid window is reached without any new fork, the fake interactive level is reduced by 1. The second sysctl is sched_fake_interactive_win_time_ms which is 1s by default. Each 1s (if a task is in runqueue, not sleeping) if the task is fake interactive already and the time slice ended without any new forks, then interactivty level is reduced by 1. For each fork, interactivity level is increased by 1. Higher levels here means lower interactive scores they got.

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 833c01b9ffd9..1912d22464b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -884,6 +884,12 @@ struct task_struct {
 	struct list_head		sibling;
 	struct task_struct		*group_leader;

+#ifdef CONFIG_CACULE_SCHED
+	u64				fork_start_win_stamp;
+	unsigned int			nr_forks_per_time;
+	int				is_fake_interactive;
+#endif
+
 	/*
 	 * 'ptraced' is the list of tasks this task is using ptrace() on.
 	 *
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index cb819c3d86f3..a5686379b998 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -32,9 +32,11 @@ extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;

 #ifdef CONFIG_CACULE_SCHED
-extern int interactivity_factor;
+extern unsigned int interactivity_factor;
 extern unsigned int interactivity_threshold;
-extern int cacule_max_lifetime;
+extern unsigned int cacule_max_lifetime;
+extern unsigned int fake_interactive_win_time;
+extern unsigned int nr_fork_threshold;
 #endif

 enum sched_tunable_scaling {
diff --git a/kernel/exit.c b/kernel/exit.c
index 04029e35e69a..9dfd515104db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,17 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	write_lock_irq(&tasklist_lock);
 	forget_original_parent(tsk, &dead);

+#ifdef CONFIG_CACULE_SCHED
+	p = tsk->parent;
+	if (p) {
+		if (p->nr_forks_per_time)
+			p->nr_forks_per_time--;
+
+		if (p->is_fake_interactive)
+			p->is_fake_interactive--;
+	}
+#endif
+
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ece4dd36b23a..8cc840dc3540 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3554,11 +3554,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
+	p->se.vruntime			= 0;

 #ifdef CONFIG_CACULE_SCHED
 	p->se.cacule_node.vruntime	= 0;
-#else
-	p->se.vruntime			= 0;
+	p->fork_start_win_stamp		= 0;
+	p->nr_forks_per_time		= 0;
+	p->is_fake_interactive		= 0;
 #endif

 	INIT_LIST_HEAD(&p->se.group_node);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a2387bcb83a..d9f935bf54ff 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -119,9 +119,11 @@ int __weak arch_asym_cpu_priority(int cpu)

 #endif
 #ifdef CONFIG_CACULE_SCHED
-int __read_mostly cacule_max_lifetime			= 22000; // in ms
-int __read_mostly interactivity_factor			= 32768;
+unsigned int __read_mostly cacule_max_lifetime		= 22000; // in ms
+unsigned int __read_mostly interactivity_factor		= 32768;
 unsigned int __read_mostly interactivity_threshold	= 20480;
+unsigned int __read_mostly fake_interactive_win_time	= 1000; // in ms
+unsigned int __read_mostly nr_fork_threshold		= 3;
 #endif

 #ifdef CONFIG_CFS_BANDWIDTH
@@ -595,28 +597,68 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
 #endif /* CONFIG_CACULE_SCHED */

 #ifdef CONFIG_CACULE_SCHED
+static inline unsigned int is_fake_interactive(struct cacule_node *cn)
+{
+	struct sched_entity *se = se_of(cn);
+	struct task_struct *parent = NULL;
+	struct cfs_rq *cfs_rq;
+	u64 win_time = fake_interactive_win_time * 1000000ULL;
+	u64 now = sched_clock();
+
+	while (!parent) {
+		if (entity_is_task(se)) {
+			parent = task_of(se)->parent;
+			break;
+		}
+
+		cfs_rq = group_cfs_rq(se);
+
+		if (!cfs_rq->head && !cfs_rq->curr)
+			return 0;
+
+		if (cfs_rq->head)
+			se = se_of(cfs_rq->head);
+		else if (cfs_rq->curr)
+			se = cfs_rq->curr;
+	}
+
+	if (parent->is_fake_interactive
+	    && (now - parent->fork_start_win_stamp > win_time))
+	{
+		parent->fork_start_win_stamp = now;
+		parent->is_fake_interactive--;
+	}
+
+	return parent->is_fake_interactive;
+}
+
 static unsigned int
 calc_interactivity(u64 now, struct cacule_node *se)
 {
-	u64 l_se, vr_se, sleep_se = 1ULL, u64_factor;
-	unsigned int score_se;
+	u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m;
+	unsigned int score_se, fake_interactivity;

 	/*
 	 * in case of vruntime==0, logical OR with 1 would
 	 * make sure that the least sig. bit is 1
 	 */
 	l_se		= now - se->cacule_start_time;
-	vr_se		= se->vruntime		| 1;
-	u64_factor	= interactivity_factor;
+	vr_se		= se->vruntime | 1;
+	u64_factor_m	= interactivity_factor;
+	_2m		= u64_factor_m << 1;

 	/* safety check */
 	if (likely(l_se > vr_se))
 		sleep_se = (l_se - vr_se) | 1;

 	if (sleep_se >= vr_se)
-		score_se = u64_factor / (sleep_se / vr_se);
+		score_se = u64_factor_m / (sleep_se / vr_se);
 	else
-		score_se = (u64_factor << 1) - (u64_factor / (vr_se / sleep_se));
+		score_se = _2m - (u64_factor_m / (vr_se / sleep_se));
+
+	fake_interactivity = is_fake_interactive(se);
+	if (fake_interactivity)
+		score_se += (_2m * fake_interactivity) + 1;

 	return score_se;
 }
@@ -626,6 +668,9 @@ static inline int is_interactive(struct cacule_node *cn)
 	if (se_of(cn)->vruntime == 0)
 		return 0;

+	if (is_fake_interactive(cn))
+		return 0;
+
 	return calc_interactivity(sched_clock(), cn) < interactivity_threshold;
 }

@@ -673,47 +718,18 @@ entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se)
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
 {
 	struct cacule_node *se = &(_se->cacule_node);
-	struct cacule_node *iter, *next = NULL;
-	u64 now = sched_clock();
-	unsigned int score_se = calc_interactivity(now, se);

 	se->next = NULL;
 	se->prev = NULL;

 	if (likely(cfs_rq->head)) {
-
-		// start from tail
-		iter = cfs_rq->tail;
-
-		// does se have higher IS than iter?
-		while (iter && entity_before_cached(now, score_se, iter) == -1) {
-			next = iter;
-			iter = iter->prev;
-		}
-
-		// se in tail position
-		if (iter == cfs_rq->tail) {
-			cfs_rq->tail->next	= se;
-			se->prev		= cfs_rq->tail;
-
-			cfs_rq->tail		= se;
-		}
-		// else if not head no tail, insert se after iter
-		else if (iter) {
-			se->next	= next;
-			se->prev	= iter;
-
-			iter->next	= se;
-			next->prev	= se;
-		}
 		// insert se at head
-		else {
-			se->next		= cfs_rq->head;
-			cfs_rq->head->prev	= se;
+		se->next = cfs_rq->head;
+		cfs_rq->head->prev = se;
+
+		// lastly reset the head
+		cfs_rq->head = se;

-			// lastly reset the head
-			cfs_rq->head		= se;
-		}
 	} else {
 		// if empty rq
 		cfs_rq->head = se;
@@ -4603,16 +4619,17 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 }

 #ifdef CONFIG_CACULE_SCHED
+static struct sched_entity *
+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr);
+
 /*
  * Preempt the current task with a newly woken task if needed:
  */
 static void
 check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	u64 now	= sched_clock();
-
 	// does head have higher IS than curr
-	if (entity_before(now, &curr->cacule_node, cfs_rq->head) == 1)
+	if (pick_next_entity(cfs_rq, curr) != curr)
 		resched_curr(rq_of(cfs_rq));
 }
 #else
@@ -4694,12 +4711,26 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static struct sched_entity *
 pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	struct cacule_node *se = cfs_rq->head;
+	struct cacule_node *next, *se = cfs_rq->head;
+	u64 now = sched_clock();
+	unsigned int score_se;

 	if (unlikely(!se))
-		se = &curr->cacule_node;
-	else if (unlikely(curr
-			&& entity_before(sched_clock(), se, &curr->cacule_node) == 1))
+		return curr;
+
+	score_se = calc_interactivity(now, se);
+
+	next = se->next;
+	while (next) {
+		if (entity_before_cached(now, score_se, next) == 1) {
+			se = next;
+			score_se = calc_interactivity(now, se);
+		}
+
+		next = next->next;
+	}
+
+	if (unlikely(curr && entity_before_cached(now, score_se, &curr->cacule_node) == 1))
 		se = &curr->cacule_node;

 	return se_of(se);
@@ -5884,6 +5915,15 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	int task_sleep = flags & DEQUEUE_SLEEP;
 	int idle_h_nr_running = task_has_idle_policy(p);
 	bool was_sched_idle = sched_idle_rq(rq);
+	struct task_struct *parent = p->parent;
+
+	if (task_sleep && parent) {
+		if (parent->nr_forks_per_time)
+			parent->nr_forks_per_time--;
+
+		if (parent->is_fake_interactive)
+			parent->is_fake_interactive--;
+	}

 	util_est_dequeue(&rq->cfs, p);

@@ -11147,6 +11187,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 	struct sched_entity *curr;
 	struct rq *rq = this_rq();
 	struct rq_flags rf;
+	struct task_struct *parent = p->parent;
+	u64 now = sched_clock();

 	rq_lock(rq, &rf);
 	update_rq_clock(rq);
@@ -11157,6 +11199,13 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		update_curr(cfs_rq);

 	rq_unlock(rq, &rf);
+
+	parent->fork_start_win_stamp = now;
+
+	if (parent->nr_forks_per_time >= nr_fork_threshold)
+		parent->is_fake_interactive++;
+
+	parent->nr_forks_per_time++;
 }
 #else
 static void task_fork_fair(struct task_struct *p)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a0bf55bbb3a7..5f49409e3124 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1663,7 +1663,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.procname	= "sched_interactivity_factor",
 		.data		= &interactivity_factor,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
@@ -1677,7 +1677,21 @@ static struct ctl_table kern_table[] = {
 	{
 		.procname	= "sched_max_lifetime_ms",
 		.data		= &cacule_max_lifetime,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_fake_interactive_win_time_ms",
+		.data		= &fake_interactive_win_time,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_nr_fork_threshold",
+		.data		= &nr_fork_threshold,
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},