summarylogtreecommitdiffstats
path: root/interactivity_levels.patch
diff options
context:
space:
mode:
Diffstat (limited to 'interactivity_levels.patch')
-rw-r--r--interactivity_levels.patch366
1 files changed, 366 insertions, 0 deletions
diff --git a/interactivity_levels.patch b/interactivity_levels.patch
new file mode 100644
index 000000000000..b3920791735d
--- /dev/null
+++ b/interactivity_levels.patch
@@ -0,0 +1,366 @@
+commit 52bbe1d1b5977d116aebca9902073030c1eb8f66
+Author: Hamad Marri <hamad.s.almarri@gmail.com>
+Date: Thu May 6 16:07:35 2021 +0300
+
+ added interactivity levels which are used to avoid fake interactive tasks. Fake interactive task are detected by the number of forks of their parent in a period of time. Introduced new sysctls: sched_nr_fork_threshold which is the max number of forks in period of time to consider all children fake interactive. This happnes for example when run make -j4 where make creates new threads for each job. All make children/threads are fake interactive, therefore, they are (any fake interactive tasks) put in lower interactivity regions based on how often this task is becoming fake interactive in a period of time. When a child exits or sleep, then the number of forks and the fake interactive level are reduced by 1. Also, when a maximum slid window is reached without any new fork, the fake interactive level is reduced by 1. The second sysctl is sched_fake_interactive_win_time_ms which is 1s by default. Each 1s (if a task is in runqueue, not sleeping) if the task is fake interactive already and the time slice ended without any new forks, then interactivty level is reduced by 1. For each fork, interactivity level is increased by 1. Higher levels here means lower interactive scores they got.
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 833c01b9ffd9..1912d22464b4 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -884,6 +884,12 @@ struct task_struct {
+ struct list_head sibling;
+ struct task_struct *group_leader;
+
++#ifdef CONFIG_CACULE_SCHED
++ u64 fork_start_win_stamp;
++ unsigned int nr_forks_per_time;
++ int is_fake_interactive;
++#endif
++
+ /*
+ * 'ptraced' is the list of tasks this task is using ptrace() on.
+ *
+diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
+index cb819c3d86f3..a5686379b998 100644
+--- a/include/linux/sched/sysctl.h
++++ b/include/linux/sched/sysctl.h
+@@ -32,9 +32,11 @@ extern unsigned int sysctl_sched_wakeup_granularity;
+ extern unsigned int sysctl_sched_child_runs_first;
+
+ #ifdef CONFIG_CACULE_SCHED
+-extern int interactivity_factor;
++extern unsigned int interactivity_factor;
+ extern unsigned int interactivity_threshold;
+-extern int cacule_max_lifetime;
++extern unsigned int cacule_max_lifetime;
++extern unsigned int fake_interactive_win_time;
++extern unsigned int nr_fork_threshold;
+ #endif
+
+ enum sched_tunable_scaling {
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 04029e35e69a..9dfd515104db 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -667,6 +667,17 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
+ write_lock_irq(&tasklist_lock);
+ forget_original_parent(tsk, &dead);
+
++#ifdef CONFIG_CACULE_SCHED
++ p = tsk->parent;
++ if (p) {
++ if (p->nr_forks_per_time)
++ p->nr_forks_per_time--;
++
++ if (p->is_fake_interactive)
++ p->is_fake_interactive--;
++ }
++#endif
++
+ if (group_dead)
+ kill_orphaned_pgrp(tsk->group_leader, NULL);
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index ece4dd36b23a..8cc840dc3540 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3554,11 +3554,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ p->se.sum_exec_runtime = 0;
+ p->se.prev_sum_exec_runtime = 0;
+ p->se.nr_migrations = 0;
++ p->se.vruntime = 0;
+
+ #ifdef CONFIG_CACULE_SCHED
+ p->se.cacule_node.vruntime = 0;
+-#else
+- p->se.vruntime = 0;
++ p->fork_start_win_stamp = 0;
++ p->nr_forks_per_time = 0;
++ p->is_fake_interactive = 0;
+ #endif
+
+ INIT_LIST_HEAD(&p->se.group_node);
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 5a2387bcb83a..d9f935bf54ff 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -119,9 +119,11 @@ int __weak arch_asym_cpu_priority(int cpu)
+
+ #endif
+ #ifdef CONFIG_CACULE_SCHED
+-int __read_mostly cacule_max_lifetime = 22000; // in ms
+-int __read_mostly interactivity_factor = 32768;
++unsigned int __read_mostly cacule_max_lifetime = 22000; // in ms
++unsigned int __read_mostly interactivity_factor = 32768;
+ unsigned int __read_mostly interactivity_threshold = 20480;
++unsigned int __read_mostly fake_interactive_win_time = 1000; // in ms
++unsigned int __read_mostly nr_fork_threshold = 3;
+ #endif
+
+ #ifdef CONFIG_CFS_BANDWIDTH
+@@ -595,28 +597,68 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
+ #endif /* CONFIG_CACULE_SCHED */
+
+ #ifdef CONFIG_CACULE_SCHED
++static inline unsigned int is_fake_interactive(struct cacule_node *cn)
++{
++ struct sched_entity *se = se_of(cn);
++ struct task_struct *parent = NULL;
++ struct cfs_rq *cfs_rq;
++ u64 win_time = fake_interactive_win_time * 1000000ULL;
++ u64 now = sched_clock();
++
++ while (!parent) {
++ if (entity_is_task(se)) {
++ parent = task_of(se)->parent;
++ break;
++ }
++
++ cfs_rq = group_cfs_rq(se);
++
++ if (!cfs_rq->head && !cfs_rq->curr)
++ return 0;
++
++ if (cfs_rq->head)
++ se = se_of(cfs_rq->head);
++ else if (cfs_rq->curr)
++ se = cfs_rq->curr;
++ }
++
++ if (parent->is_fake_interactive
++ && (now - parent->fork_start_win_stamp > win_time))
++ {
++ parent->fork_start_win_stamp = now;
++ parent->is_fake_interactive--;
++ }
++
++ return parent->is_fake_interactive;
++}
++
+ static unsigned int
+ calc_interactivity(u64 now, struct cacule_node *se)
+ {
+- u64 l_se, vr_se, sleep_se = 1ULL, u64_factor;
+- unsigned int score_se;
++ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m;
++ unsigned int score_se, fake_interactivity;
+
+ /*
+ * in case of vruntime==0, logical OR with 1 would
+ * make sure that the least sig. bit is 1
+ */
+ l_se = now - se->cacule_start_time;
+- vr_se = se->vruntime | 1;
+- u64_factor = interactivity_factor;
++ vr_se = se->vruntime | 1;
++ u64_factor_m = interactivity_factor;
++ _2m = u64_factor_m << 1;
+
+ /* safety check */
+ if (likely(l_se > vr_se))
+ sleep_se = (l_se - vr_se) | 1;
+
+ if (sleep_se >= vr_se)
+- score_se = u64_factor / (sleep_se / vr_se);
++ score_se = u64_factor_m / (sleep_se / vr_se);
+ else
+- score_se = (u64_factor << 1) - (u64_factor / (vr_se / sleep_se));
++ score_se = _2m - (u64_factor_m / (vr_se / sleep_se));
++
++ fake_interactivity = is_fake_interactive(se);
++ if (fake_interactivity)
++ score_se += (_2m * fake_interactivity) + 1;
+
+ return score_se;
+ }
+@@ -626,6 +668,9 @@ static inline int is_interactive(struct cacule_node *cn)
+ if (se_of(cn)->vruntime == 0)
+ return 0;
+
++ if (is_fake_interactive(cn))
++ return 0;
++
+ return calc_interactivity(sched_clock(), cn) < interactivity_threshold;
+ }
+
+@@ -673,47 +718,18 @@ entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se)
+ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
+ {
+ struct cacule_node *se = &(_se->cacule_node);
+- struct cacule_node *iter, *next = NULL;
+- u64 now = sched_clock();
+- unsigned int score_se = calc_interactivity(now, se);
+
+ se->next = NULL;
+ se->prev = NULL;
+
+ if (likely(cfs_rq->head)) {
+-
+- // start from tail
+- iter = cfs_rq->tail;
+-
+- // does se have higher IS than iter?
+- while (iter && entity_before_cached(now, score_se, iter) == -1) {
+- next = iter;
+- iter = iter->prev;
+- }
+-
+- // se in tail position
+- if (iter == cfs_rq->tail) {
+- cfs_rq->tail->next = se;
+- se->prev = cfs_rq->tail;
+-
+- cfs_rq->tail = se;
+- }
+- // else if not head no tail, insert se after iter
+- else if (iter) {
+- se->next = next;
+- se->prev = iter;
+-
+- iter->next = se;
+- next->prev = se;
+- }
+ // insert se at head
+- else {
+- se->next = cfs_rq->head;
+- cfs_rq->head->prev = se;
++ se->next = cfs_rq->head;
++ cfs_rq->head->prev = se;
++
++ // lastly reset the head
++ cfs_rq->head = se;
+
+- // lastly reset the head
+- cfs_rq->head = se;
+- }
+ } else {
+ // if empty rq
+ cfs_rq->head = se;
+@@ -4603,16 +4619,17 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ }
+
+ #ifdef CONFIG_CACULE_SCHED
++static struct sched_entity *
++pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr);
++
+ /*
+ * Preempt the current task with a newly woken task if needed:
+ */
+ static void
+ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ {
+- u64 now = sched_clock();
+-
+ // does head have higher IS than curr
+- if (entity_before(now, &curr->cacule_node, cfs_rq->head) == 1)
++ if (pick_next_entity(cfs_rq, curr) != curr)
+ resched_curr(rq_of(cfs_rq));
+ }
+ #else
+@@ -4694,12 +4711,26 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ static struct sched_entity *
+ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ {
+- struct cacule_node *se = cfs_rq->head;
++ struct cacule_node *next, *se = cfs_rq->head;
++ u64 now = sched_clock();
++ unsigned int score_se;
+
+ if (unlikely(!se))
+- se = &curr->cacule_node;
+- else if (unlikely(curr
+- && entity_before(sched_clock(), se, &curr->cacule_node) == 1))
++ return curr;
++
++ score_se = calc_interactivity(now, se);
++
++ next = se->next;
++ while (next) {
++ if (entity_before_cached(now, score_se, next) == 1) {
++ se = next;
++ score_se = calc_interactivity(now, se);
++ }
++
++ next = next->next;
++ }
++
++ if (unlikely(curr && entity_before_cached(now, score_se, &curr->cacule_node) == 1))
+ se = &curr->cacule_node;
+
+ return se_of(se);
+@@ -5884,6 +5915,15 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ int task_sleep = flags & DEQUEUE_SLEEP;
+ int idle_h_nr_running = task_has_idle_policy(p);
+ bool was_sched_idle = sched_idle_rq(rq);
++ struct task_struct *parent = p->parent;
++
++ if (task_sleep && parent) {
++ if (parent->nr_forks_per_time)
++ parent->nr_forks_per_time--;
++
++ if (parent->is_fake_interactive)
++ parent->is_fake_interactive--;
++ }
+
+ util_est_dequeue(&rq->cfs, p);
+
+@@ -11147,6 +11187,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ struct sched_entity *curr;
+ struct rq *rq = this_rq();
+ struct rq_flags rf;
++ struct task_struct *parent = p->parent;
++ u64 now = sched_clock();
+
+ rq_lock(rq, &rf);
+ update_rq_clock(rq);
+@@ -11157,6 +11199,13 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ update_curr(cfs_rq);
+
+ rq_unlock(rq, &rf);
++
++ parent->fork_start_win_stamp = now;
++
++ if (parent->nr_forks_per_time >= nr_fork_threshold)
++ parent->is_fake_interactive++;
++
++ parent->nr_forks_per_time++;
+ }
+ #else
+ static void task_fork_fair(struct task_struct *p)
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index a0bf55bbb3a7..5f49409e3124 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1663,7 +1663,7 @@ static struct ctl_table kern_table[] = {
+ {
+ .procname = "sched_interactivity_factor",
+ .data = &interactivity_factor,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+@@ -1677,7 +1677,21 @@ static struct ctl_table kern_table[] = {
+ {
+ .procname = "sched_max_lifetime_ms",
+ .data = &cacule_max_lifetime,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++ {
++ .procname = "sched_fake_interactive_win_time_ms",
++ .data = &fake_interactive_win_time,
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++ {
++ .procname = "sched_nr_fork_threshold",
++ .data = &nr_fork_threshold,
++ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },