summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorptr13372021-05-08 10:45:14 +0200
committerptr13372021-05-08 10:45:14 +0200
commitc0218d56ad8ba8985fda6f3c275ec414666ee14c (patch)
tree8df9bb6a1fc6be836ec703b97ea1bd63e53a5039
parent7e2ffc2d1f36c4278c8f420369bacb4aaf5ddc6c (diff)
downloadaur-c0218d56ad8ba8985fda6f3c275ec414666ee14c.tar.gz
correct patches added, working now
-rw-r--r--.SRCINFO13
-rw-r--r--PKGBUILD13
-rw-r--r--cacule-32bit-converter.patch78
-rw-r--r--cacule-5.11.patch465
-rw-r--r--interactivity_levels.patch366
5 files changed, 623 insertions, 312 deletions
diff --git a/.SRCINFO b/.SRCINFO
index eb54cd60438d..a7dd1a9500df 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
pkgbase = linux-raspberrypi4-cacule-stable
pkgdesc = Raspberry Pi 4 Kernel with the cacule schedeuler, aarch64 and armv7
- pkgver = 5.11.17
+ pkgver = 5.11.19
pkgrel = 1
url = http://www.kernel.org/
arch = armv7h
@@ -13,26 +13,28 @@ pkgbase = linux-raspberrypi4-cacule-stable
makedepends = bc
makedepends = git
options = !strip
- source = https://github.com/raspberrypi/linux/archive/e99921bb4319ce35ce2e9841a51c4fbb6fb9cf2c.tar.gz
+ source = https://github.com/raspberrypi/linux/archive/fd19c1acacb4dc76a1a36a6a07d7db92ae4b4c9e.tar.gz
source = cmdline.txt
source = linux.preset
source = 60-linux.hook
source = 90-linux.hook
source = 0001-Make-proc-cpuinfo-consistent-on-arm64-and-arm.patch
source = cacule-5.11.patch
- md5sums = 25cf66285e262398acb1adb5c2e5311c
+ source = interactivity_levels.patch
+ md5sums = bf694586470392017dc161d3965b35d0
md5sums = 31c02f4518d46deb5f0c2ad1f8b083cd
md5sums = 86d4a35722b5410e3b29fc92dae15d4b
md5sums = ce6c81ad1ad1f8b333fd6077d47abdaf
md5sums = 441ec084c47cddc53e592fb0cbce4edf
md5sums = f66a7ea3feb708d398ef57e4da4815e9
- md5sums = 8585be816a07e27a86cd739c1d89cc7b
+ md5sums = 740fa0f2ccb02a74be37e566b32f7a0c
+ md5sums = a621493cad4df968b9b5c140c86d4d93
source_armv7h = config
source_armv7h = config.txt
source_armv7h = cacule-32bit-converter.patch
md5sums_armv7h = 5697f0e23f8329047cfce076f0e904b4
md5sums_armv7h = 9669d916a5929a2eedbd64477f83d99e
- md5sums_armv7h = 60020b476ade77049c832f55fc0bea1f
+ md5sums_armv7h = 4982367e95e393b5937c75f0b30d87e9
source_aarch64 = config8
source_aarch64 = config8.txt
md5sums_aarch64 = c1fa6fc261864237ecdcd4daae0920be
@@ -41,4 +43,3 @@ pkgbase = linux-raspberrypi4-cacule-stable
pkgname = linux-raspberrypi4-cacule-stable
pkgname = linux-raspberrypi4-cacule-stable-headers
-
diff --git a/PKGBUILD b/PKGBUILD
index c69139fefaf9..5f225fad9396 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -7,11 +7,11 @@
buildarch=12
pkgbase=linux-raspberrypi4-cacule-stable
-_commit=e99921bb4319ce35ce2e9841a51c4fbb6fb9cf2c
+_commit=fd19c1acacb4dc76a1a36a6a07d7db92ae4b4c9e
_srcname=linux-${_commit}
_kernelname=${pkgbase#linux}
_desc="Raspberry Pi 4 with the cacule scheduler"
-pkgver=5.11.17
+pkgver=5.11.19
pkgrel=1
pkgdesc="Raspberry Pi 4 Kernel with the cacule schedeuler, aarch64 and armv7"
arch=('armv7h' 'aarch64')
@@ -26,19 +26,21 @@ source=("https://github.com/raspberrypi/linux/archive/${_commit}.tar.gz"
'90-linux.hook'
'0001-Make-proc-cpuinfo-consistent-on-arm64-and-arm.patch'
'cacule-5.11.patch'
+ 'interactivity_levels.patch'
)
source_armv7h=('config' 'config.txt' 'cacule-32bit-converter.patch')
source_aarch64=('config8' 'config8.txt')
-md5sums=('25cf66285e262398acb1adb5c2e5311c'
+md5sums=('bf694586470392017dc161d3965b35d0'
'31c02f4518d46deb5f0c2ad1f8b083cd'
'86d4a35722b5410e3b29fc92dae15d4b'
'ce6c81ad1ad1f8b333fd6077d47abdaf'
'441ec084c47cddc53e592fb0cbce4edf'
'f66a7ea3feb708d398ef57e4da4815e9'
- '8585be816a07e27a86cd739c1d89cc7b')
+ '740fa0f2ccb02a74be37e566b32f7a0c'
+ 'a621493cad4df968b9b5c140c86d4d93')
md5sums_armv7h=('5697f0e23f8329047cfce076f0e904b4'
'9669d916a5929a2eedbd64477f83d99e'
- '60020b476ade77049c832f55fc0bea1f')
+ '4982367e95e393b5937c75f0b30d87e9')
md5sums_aarch64=('c1fa6fc261864237ecdcd4daae0920be'
'9669d916a5929a2eedbd64477f83d99e')
@@ -62,6 +64,7 @@ prepare() {
patch -Np1 -i ../0001-Make-proc-cpuinfo-consistent-on-arm64-and-arm.patch
# cacule-scheduler
patch -Np1 -i ../cacule-5.11.patch
+ patch -Np1 -i ../interactivity_levels.patch
if [[ $CARCH == "armv7h" ]]; then
patch -Np1 -i ../cacule-32bit-converter.patch #only needed if building on armv6 or armv7
fi
diff --git a/cacule-32bit-converter.patch b/cacule-32bit-converter.patch
index b5d87f08d6a1..1526fa083bc4 100644
--- a/cacule-32bit-converter.patch
+++ b/cacule-32bit-converter.patch
@@ -5,77 +5,77 @@ index c99fc326ec24..71c27133c53c 100644
@@ -26,6 +26,8 @@
*/
#include "sched.h"
-
+
+#include <linux/math64.h>
+
/*
* Targeted preemption latency for CPU-bound tasks:
*
@@ -127,7 +129,7 @@ int __weak arch_asym_cpu_priority(int cpu)
-
+
#endif
#ifdef CONFIG_CACULE_SCHED
--int __read_mostly cacule_max_lifetime = 22000; // in ms
-+int __read_mostly cacule_max_lifetime = 4000; // in ms
- int __read_mostly interactivity_factor = 32768;
+-unsigned int __read_mostly cacule_max_lifetime = 22000; // in ms
++unsigned int __read_mostly cacule_max_lifetime = 4000; // in ms
+ unsigned int __read_mostly interactivity_factor = 32768;
unsigned int __read_mostly interactivity_threshold = 20480;
- #endif
+ unsigned int __read_mostly fake_interactive_win_time = 1000; // in ms
@@ -602,6 +604,7 @@ calc_interactivity(u64 now, struct cacule_node *se)
{
- u64 l_se, vr_se, sleep_se = 1ULL, u64_factor;
- unsigned int score_se;
+ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m;
+ unsigned int score_se, fake_interactivity;
+ u32 r_se_rem;
-
+
/*
* in case of vruntime==0, logical OR with 1 would
@@ -616,9 +619,19 @@ calc_interactivity(u64 now, struct cacule_node *se)
sleep_se = (l_se - vr_se) | 1;
-
+
if (sleep_se >= vr_se)
-- score_se = u64_factor / (sleep_se / vr_se);
+- score_se = u64_factor_m / (sleep_se / vr_se);
+ score_se = div_u64_rem
+ (
-+ u64_factor, div_u64_rem
++ u64_factor_m, div_u64_rem
+ (
+ sleep_se, vr_se
+ , &r_se_rem)
+ , &r_se_rem);
else
-- score_se = (u64_factor << 1) - (u64_factor / (vr_se / sleep_se));
-+ score_se = (u64_factor << 1) - (
-+ div_u64_rem(u64_factor, (
+- score_se = _2m - (u64_factor_m / (vr_se / sleep_se));
++ score_se = _2m - (
++ div_u64_rem(u64_factor_m, (
+ div_u64_rem(vr_se, sleep_se, &r_se_rem)
+ ), &r_se_rem)
+ );
-
- return score_se;
- }
-@@ -1041,6 +1054,7 @@ static void reset_lifetime(u64 now, struct sched_entity *se)
+
+ fake_interactivity = is_fake_interactive(se);
+ if (fake_interactivity)
+@@ -1041,6 +1054,7 @@ static void normalize_lifetime(u64 now, struct sched_entity *se)
struct cacule_node *cn;
u64 max_life_ns, life_time;
s64 diff;
+ u32 rem;
-
+
/*
* left shift 20 bits is approximately = * 1000000
-@@ -1054,9 +1068,9 @@ static void reset_lifetime(u64 now, struct sched_entity *se)
- life_time = now - cn->cacule_start_time;
- diff = life_time - max_life_ns;
-
-- if (unlikely(diff > 0)) {
-+ if (diff > 0) {
- // multiply life_time by 8 for more precision
-- u64 old_hrrn_x8 = life_time / ((cn->vruntime >> 3) | 1);
-+ u64 old_hrrn_x8 = div_u64_rem(life_time, ((cn->vruntime >> 3) | 1), &rem);
-
- // reset life to half max_life (i.e ~15s)
- cn->cacule_start_time = now - (max_life_ns >> 1);
+@@ -1054,9 +1068,9 @@ static void normalize_lifetime(u64 now, struct sched_entity *se)
+ life_time = now - cn->cacule_start_time;
+ diff = life_time - max_life_ns;
+
+- if (diff > 0) {
++ if (diff > 0) {
+ // multiply life_time by 1024 for more precision
+- u64 old_hrrn_x = (life_time << 7) / ((cn->vruntime >> 3) | 1);
++ u64 old_hrrn_x8 = div_u64_rem(life_time, ((cn->vruntime >> 3) | 1), &rem);
+
+ // reset life to half max_life (i.e ~15s)
+ cn->cacule_start_time = now - (max_life_ns >> 1);
@@ -1065,7 +1079,7 @@ static void reset_lifetime(u64 now, struct sched_entity *se)
- if (old_hrrn_x8 == 0) old_hrrn_x8 = 1;
-
- // reset vruntime based on old hrrn ratio
-- cn->vruntime = (max_life_ns << 2) / old_hrrn_x8;
-+ cn->vruntime = div_u64_rem((max_life_ns << 2), old_hrrn_x8, &rem);
- }
+ if (old_hrrn_x8 == 0) old_hrrn_x8 = 1;
+
+ // reset vruntime based on old hrrn ratio
+- cn->vruntime = (max_life_ns << 9) / old_hrrn_x;
++ cn->vruntime = div_u64_rem((max_life_ns << 2), old_hrrn_x8, &rem);
}
- }
+ }
+ #endif /* CONFIG_CACULE_SCHED */
diff --git a/cacule-5.11.patch b/cacule-5.11.patch
index 99941578e3e9..231ee8476dc4 100644
--- a/cacule-5.11.patch
+++ b/cacule-5.11.patch
@@ -5,12 +5,12 @@ index 1d56a6b73a4e..4d55ff02310c 100644
@@ -1087,6 +1087,10 @@ Model available). If your platform happens to meet the
requirements for EAS but you do not want to use it, change
this value to 0.
-
+
+sched_interactivity_factor (CacULE scheduler only)
+==================================================
+Sets the value *m* for interactivity score calculations. See
+Figure 1 in https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
-
+
sched_schedstats
================
diff --git a/Documentation/scheduler/sched-CacULE.rst b/Documentation/scheduler/sched-CacULE.rst
@@ -96,13 +96,13 @@ index 000000000000..82b0847c468a
+ idle timer scheduler in order to avoid to get into priority
+ inversion problems which would deadlock the machine.
diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 6e3a5eeec509..e5da9a62fe4e 100644
+index 6e3a5eeec509..97103c8a91b2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -450,16 +450,29 @@ struct sched_statistics {
+@@ -450,10 +450,22 @@ struct sched_statistics {
#endif
};
-
+
+#ifdef CONFIG_CACULE_SCHED
+struct cacule_node {
+ struct cacule_node* next;
@@ -118,18 +118,10 @@ index 6e3a5eeec509..e5da9a62fe4e 100644
struct rb_node run_node;
+#ifdef CONFIG_CACULE_SCHED
+ struct cacule_node cacule_node;
-+#else
-+ u64 vruntime;
+#endif
struct list_head group_node;
unsigned int on_rq;
-
- u64 exec_start;
- u64 sum_exec_runtime;
-- u64 vruntime;
- u64 prev_sum_exec_runtime;
-
- u64 nr_migrations;
+
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba88aca5..cb819c3d86f3 100644
--- a/include/linux/sched/sysctl.h
@@ -137,7 +129,7 @@ index 3c31ba88aca5..cb819c3d86f3 100644
@@ -31,6 +31,12 @@ extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
-
+
+#ifdef CONFIG_CACULE_SCHED
+extern int interactivity_factor;
+extern unsigned int interactivity_threshold;
@@ -152,9 +144,9 @@ index a3d27421de8f..d0cfdf6e9bed 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -824,6 +824,17 @@ config UCLAMP_BUCKETS_COUNT
-
+
endmenu
-
+
+config CACULE_SCHED
+ bool "CacULE CPU scheduler"
+ default y
@@ -177,6 +169,28 @@ index a3d27421de8f..d0cfdf6e9bed 100644
help
This option optimizes the scheduler for common desktop workloads by
automatically creating and populating task groups. This separation
+diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
+index 38ef6d06888e..c8cf984c294e 100644
+--- a/kernel/Kconfig.hz
++++ b/kernel/Kconfig.hz
+@@ -46,6 +46,9 @@ choice
+ 1000 Hz is the preferred choice for desktop systems and other
+ systems requiring fast interactive responses to events.
+
++ config HZ_2000
++ bool "2000 HZ"
++
+ endchoice
+
+ config HZ
+@@ -54,6 +57,7 @@ config HZ
+ default 250 if HZ_250
+ default 300 if HZ_300
+ default 1000 if HZ_1000
++ default 2000 if HZ_2000
+
+ config SCHED_HRTICK
+ def_bool HIGH_RES_TIMERS
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0056507a373..4d8a3b232ae9 100644
--- a/kernel/sched/core.c
@@ -193,12 +207,12 @@ index f0056507a373..4d8a3b232ae9 100644
+#endif
+
INIT_LIST_HEAD(&p->se.group_node);
-
+
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -3840,6 +3846,10 @@ void wake_up_new_task(struct task_struct *p)
update_rq_clock(rq);
post_init_entity_util_avg(p);
-
+
+#ifdef CONFIG_CACULE_SCHED
+ p->se.cacule_node.cacule_start_time = sched_clock();
+#endif
@@ -209,75 +223,22 @@ index f0056507a373..4d8a3b232ae9 100644
@@ -7727,6 +7737,10 @@ void __init sched_init(void)
BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
#endif
-
+
+#ifdef CONFIG_CACULE_SCHED
+ printk(KERN_INFO "CacULE CPU scheduler v5.11 by Hamad Al Marri.");
+#endif
+
wait_bit_init();
-
+
#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index 2357921580f9..fb4ef69724c3 100644
+index 2357921580f9..ac08a7ced508 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
-@@ -439,7 +439,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
- return;
-
- PN(se->exec_start);
-+#ifdef CONFIG_CACULE_SCHED
-+ PN(se->cacule_node.vruntime);
-+#else
- PN(se->vruntime);
-+#endif
- PN(se->sum_exec_runtime);
-
- if (schedstat_enabled()) {
-@@ -493,7 +497,11 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
-
- SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
- p->comm, task_pid_nr(p),
-+#ifdef CONFIG_CACULE_SCHED
-+ SPLIT_NS(p->se.cacule_node.vruntime),
-+#else
- SPLIT_NS(p->se.vruntime),
-+#endif
- (long long)(p->nvcsw + p->nivcsw),
- p->prio);
-
-@@ -535,8 +543,12 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
-
- void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
- {
-- s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
-- spread, rq0_min_vruntime, spread0;
-+ s64 MIN_vruntime = -1,
-+#if !defined(CONFIG_CACULE_SCHED)
-+ min_vruntime, rq0_min_vruntime,
-+ spread0,
-+#endif
-+ max_vruntime = -1, spread;
- struct rq *rq = cpu_rq(cpu);
- struct sched_entity *last;
- unsigned long flags;
-@@ -553,25 +565,41 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
-
- raw_spin_lock_irqsave(&rq->lock, flags);
- if (rb_first_cached(&cfs_rq->tasks_timeline))
-+#ifdef CONFIG_CACULE_SCHED
-+ MIN_vruntime = (__pick_first_entity(cfs_rq))->cacule_node.vruntime;
-+#else
- MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
-+#endif
-+
+@@ -557,21 +557,27 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
-+#ifdef CONFIG_CACULE_SCHED
-+ max_vruntime = last->cacule_node.vruntime;
-+#else
max_vruntime = last->vruntime;
-+#endif
-+
+#if !defined(CONFIG_CACULE_SCHED)
min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
@@ -302,20 +263,8 @@ index 2357921580f9..fb4ef69724c3 100644
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
-@@ -928,7 +956,11 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
-
- PN(se.exec_start);
-+#ifdef CONFIG_CACULE_SCHED
-+ PN(se.cacule_node.vruntime);
-+#else
- PN(se.vruntime);
-+#endif
- PN(se.sum_exec_runtime);
-
- nr_switches = p->nvcsw + p->nivcsw;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index bbc78794224a..7e9ce056efd7 100644
+index bbc78794224a..056b5673605f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,10 @@
@@ -328,23 +277,23 @@ index bbc78794224a..7e9ce056efd7 100644
+ * (C) 2020 Hamad Al Marri <hamad.s.almarri@gmail.com>
*/
#include "sched.h"
-
+
@@ -113,6 +117,11 @@ int __weak arch_asym_cpu_priority(int cpu)
*/
#define fits_capacity(cap, max) ((cap) * 1280 < (max) * 1024)
-
+
+#endif
+#ifdef CONFIG_CACULE_SCHED
+int __read_mostly cacule_max_lifetime = 22000; // in ms
+int __read_mostly interactivity_factor = 32768;
+unsigned int __read_mostly interactivity_threshold = 20480;
#endif
-
+
#ifdef CONFIG_CFS_BANDWIDTH
@@ -253,6 +262,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
-
+
const struct sched_class fair_sched_class;
-
+
+
+#ifdef CONFIG_CACULE_SCHED
+static inline struct sched_entity *se_of(struct cacule_node *cn)
@@ -400,7 +349,7 @@ index bbc78794224a..7e9ce056efd7 100644
+
+static inline int is_interactive(struct cacule_node *cn)
+{
-+ if (cn->vruntime == 0)
++ if (se_of(cn)->vruntime == 0)
+ return 0;
+
+ return calc_interactivity(sched_clock(), cn) < interactivity_threshold;
@@ -521,12 +470,12 @@ index bbc78794224a..7e9ce056efd7 100644
+ struct cacule_node *next = se->next;
+
+ prev->next = next;
-+
+
+ if (next)
+ next->prev = prev;
+ }
+}
-
++
+struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
+{
+ return se_of(cfs_rq->head);
@@ -536,11 +485,11 @@ index bbc78794224a..7e9ce056efd7 100644
* Enqueue an entity into the rb-tree:
*/
@@ -626,16 +805,29 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
-
+
return rb_entry(next, struct sched_entity, run_node);
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
#ifdef CONFIG_SCHED_DEBUG
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
@@ -556,19 +505,19 @@ index bbc78794224a..7e9ce056efd7 100644
+ return se_of(cn);
+#else
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
-
+
if (!last)
return NULL;
-
+
return rb_entry(last, struct sched_entity, run_node);
+#endif /* CONFIG_CACULE_SCHED */
}
-
+
/**************************************************************
@@ -720,6 +912,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
return slice;
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* We calculate the vruntime slice of a to-be-inserted task.
@@ -578,17 +527,17 @@ index bbc78794224a..7e9ce056efd7 100644
return calc_delta_fair(sched_slice(cfs_rq, se), se);
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
#include "pelt.h"
#ifdef CONFIG_SMP
-@@ -836,13 +1030,49 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
+@@ -836,14 +1030,46 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_SMP */
-
+
+#ifdef CONFIG_CACULE_SCHED
+static void normalize_lifetime(u64 now, struct sched_entity *se)
+{
-+ struct cacule_node *cn;
++ struct cacule_node *cn = &se->cacule_node;
+ u64 max_life_ns, life_time;
+ s64 diff;
+
@@ -598,25 +547,21 @@ index bbc78794224a..7e9ce056efd7 100644
+ * Ex. for 30s, with left shift (20bits) == 31.457s
+ */
+ max_life_ns = ((u64) cacule_max_lifetime) << 20;
++ life_time = now - cn->cacule_start_time;
++ diff = life_time - max_life_ns;
+
-+ for_each_sched_entity(se) {
-+ cn = &se->cacule_node;
-+ life_time = now - cn->cacule_start_time;
-+ diff = life_time - max_life_ns;
++ if (diff > 0) {
++ // multiply life_time by 1024 for more precision
++ u64 old_hrrn_x = (life_time << 7) / ((cn->vruntime >> 3) | 1);
+
-+ if (unlikely(diff > 0)) {
-+ // multiply life_time by 8 for more precision
-+ u64 old_hrrn_x8 = life_time / ((cn->vruntime >> 3) | 1);
++ // reset life to half max_life (i.e ~15s)
++ cn->cacule_start_time = now - (max_life_ns >> 1);
+
-+ // reset life to half max_life (i.e ~15s)
-+ cn->cacule_start_time = now - (max_life_ns >> 1);
++ // avoid division by zero
++ if (old_hrrn_x == 0) old_hrrn_x = 1;
+
-+ // avoid division by zero
-+ if (old_hrrn_x8 == 0) old_hrrn_x8 = 1;
-+
-+ // reset vruntime based on old hrrn ratio
-+ cn->vruntime = (max_life_ns << 2) / old_hrrn_x8;
-+ }
++ // reset vruntime based on old hrrn ratio
++ cn->vruntime = (max_life_ns << 9) / old_hrrn_x;
+ }
+}
+#endif /* CONFIG_CACULE_SCHED */
@@ -628,76 +573,71 @@ index bbc78794224a..7e9ce056efd7 100644
{
struct sched_entity *curr = cfs_rq->curr;
- u64 now = rq_clock_task(rq_of(cfs_rq));
+- u64 delta_exec;
+ u64 now = sched_clock();
- u64 delta_exec;
-
++ u64 delta_exec, delta_fair;
+
if (unlikely(!curr))
-@@ -860,13 +1090,22 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ return;
+@@ -860,8 +1086,15 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec);
-
+
+#ifdef CONFIG_CACULE_SCHED
-+ curr->cacule_node.vruntime += calc_delta_fair(delta_exec, curr);
++ delta_fair = calc_delta_fair(delta_exec, curr);
++ curr->vruntime += delta_fair;
++ curr->cacule_node.vruntime += delta_fair;
+ normalize_lifetime(now, curr);
+#else
curr->vruntime += calc_delta_fair(delta_exec, curr);
update_min_vruntime(cfs_rq);
+#endif
-
+
if (entity_is_task(curr)) {
struct task_struct *curtask = task_of(curr);
-
-+#ifdef CONFIG_CACULE_SCHED
-+ trace_sched_stat_runtime(curtask, delta_exec, curr->cacule_node.vruntime);
-+#else
- trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
-+#endif
- cgroup_account_cputime(curtask, delta_exec);
- account_group_exec_runtime(curtask, delta_exec);
- }
-@@ -1029,7 +1268,6 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -1029,7 +1262,6 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
static inline void
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
-
if (!schedstat_enabled())
return;
-
-@@ -1061,7 +1299,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+
+@@ -1061,7 +1293,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
/*
* We are starting a new run period:
*/
- se->exec_start = rq_clock_task(rq_of(cfs_rq));
+ se->exec_start = sched_clock();
}
-
+
/**************************************************
-@@ -4115,7 +4353,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
-
+@@ -4115,7 +4347,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
+
static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
-#ifdef CONFIG_SCHED_DEBUG
+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_CACULE_SCHED)
s64 d = se->vruntime - cfs_rq->min_vruntime;
-
+
if (d < 0)
-@@ -4126,6 +4364,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4126,6 +4358,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
#endif
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
-@@ -4157,6 +4396,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+@@ -4157,6 +4390,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
/* ensure we never gain time by being placed backwards. */
se->vruntime = max_vruntime(se->vruntime, vruntime);
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
-
-@@ -4215,18 +4455,23 @@ static inline bool cfs_bandwidth_used(void);
+
+@@ -4215,18 +4449,23 @@ static inline bool cfs_bandwidth_used(void);
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
@@ -705,7 +645,7 @@ index bbc78794224a..7e9ce056efd7 100644
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
+#endif
bool curr = cfs_rq->curr == se;
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* If we're the current task, we must renormalise before calling
@@ -714,87 +654,87 @@ index bbc78794224a..7e9ce056efd7 100644
if (renorm && curr)
se->vruntime += cfs_rq->min_vruntime;
+#endif
-
+
update_curr(cfs_rq);
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* Otherwise, renormalise after, such that we're placed at the current
* moment in time, instead of some random moment in the past. Being
-@@ -4235,6 +4480,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4235,6 +4474,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
if (renorm && !curr)
se->vruntime += cfs_rq->min_vruntime;
+#endif
-
+
/*
* When enqueuing a sched_entity, we must:
-@@ -4249,8 +4495,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4249,8 +4489,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_cfs_group(se);
account_entity_enqueue(cfs_rq, se);
-
+
+#if !defined(CONFIG_CACULE_SCHED)
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
+#endif
-
+
check_schedstat_required();
update_stats_enqueue(cfs_rq, se, flags);
-@@ -4271,6 +4519,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4271,6 +4513,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
check_enqueue_throttle(cfs_rq);
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
static void __clear_buddies_last(struct sched_entity *se)
{
for_each_sched_entity(se) {
-@@ -4315,6 +4564,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4315,6 +4558,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (cfs_rq->skip == se)
__clear_buddies_skip(se);
}
+#endif /* !CONFIG_CACULE_SCHED */
-
+
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
-
-@@ -4339,13 +4589,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-
+
+@@ -4339,13 +4583,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
update_stats_dequeue(cfs_rq, se, flags);
-
+
+#if !defined(CONFIG_CACULE_SCHED)
clear_buddies(cfs_rq, se);
+#endif
-
+
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* Normalize after update_curr(); which will also have moved
* min_vruntime if @se is the one holding it back. But before doing
-@@ -4354,12 +4607,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4354,12 +4601,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
if (!(flags & DEQUEUE_SLEEP))
se->vruntime -= cfs_rq->min_vruntime;
+#endif
-
+
/* return excess runtime on last dequeue */
return_cfs_rq_runtime(cfs_rq);
-
+
update_cfs_group(se);
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* Now advance min_vruntime if @se was the entity holding it back,
* except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
-@@ -4368,8 +4623,23 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4368,8 +4617,23 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
update_min_vruntime(cfs_rq);
+#endif
}
-
+
+#ifdef CONFIG_CACULE_SCHED
+/*
+ * Preempt the current task with a newly woken task if needed:
@@ -812,18 +752,18 @@ index bbc78794224a..7e9ce056efd7 100644
/*
* Preempt the current task with a newly woken task if needed:
*/
-@@ -4409,6 +4679,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4409,6 +4673,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
if (delta > ideal_runtime)
resched_curr(rq_of(cfs_rq));
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
static void
set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
-@@ -4443,6 +4714,21 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4443,6 +4708,21 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}
-
+
+#ifdef CONFIG_CACULE_SCHED
+static struct sched_entity *
+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@ -841,26 +781,26 @@ index bbc78794224a..7e9ce056efd7 100644
+#else
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
-
-@@ -4503,6 +4789,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
-
+
+@@ -4503,6 +4783,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+
return se;
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
-
-@@ -5605,7 +5892,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+
+@@ -5605,7 +5886,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
hrtick_update(rq);
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
static void set_next_buddy(struct sched_entity *se);
+#endif
-
+
/*
* The dequeue_task method is called before nr_running is
-@@ -5637,12 +5926,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+@@ -5637,12 +5920,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq->load.weight) {
/* Avoid re-evaluating load for this entity: */
se = parent_entity(se);
@@ -875,32 +815,32 @@ index bbc78794224a..7e9ce056efd7 100644
break;
}
flags |= DEQUEUE_SLEEP;
-@@ -5758,6 +6049,7 @@ static unsigned long capacity_of(int cpu)
+@@ -5758,6 +6043,7 @@ static unsigned long capacity_of(int cpu)
return cpu_rq(cpu)->cpu_capacity;
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
static void record_wakee(struct task_struct *p)
{
/*
-@@ -5804,6 +6096,7 @@ static int wake_wide(struct task_struct *p)
+@@ -5804,6 +6090,7 @@ static int wake_wide(struct task_struct *p)
return 0;
return 1;
}
+#endif
-
+
/*
* The purpose of wake_affine() is to quickly determine on which CPU we can run
-@@ -6484,6 +6777,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
+@@ -6484,6 +6771,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
* to @dst_cpu.
-@@ -6717,6 +7011,57 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
-
+@@ -6717,6 +7005,57 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+
return -1;
}
+#endif /* CONFIG_CACULE_SCHED */
@@ -954,24 +894,25 @@ index bbc78794224a..7e9ce056efd7 100644
+ return new_cpu;
+}
+#endif
-
+
/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
-@@ -6741,6 +7086,25 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+@@ -6741,6 +7080,26 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
/* SD_flags and WF_flags share the first nibble */
int sd_flag = wake_flags & 0xF;
-
+
+#ifdef CONFIG_CACULE_SCHED
+ struct sched_entity *se = &p->se;
-+ unsigned int autogroup_enabled = 0;
-+
-+#ifdef CONFIG_SCHED_AUTOGROUP
-+ autogroup_enabled = sysctl_sched_autogroup_enabled;
-+#endif
+
-+ if (autogroup_enabled || !is_interactive(&se->cacule_node))
++ if (!is_interactive(&se->cacule_node))
+ goto cfs_way;
+
++ // check first if the prev cpu
++ // has 0 tasks
++ if (cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
++ cpu_rq(prev_cpu)->cfs.nr_running == 0)
++ return prev_cpu;
++
+ new_cpu = find_least_IS_cpu(p);
+
+ if (likely(new_cpu != -1))
@@ -982,16 +923,16 @@ index bbc78794224a..7e9ce056efd7 100644
+#else
if (wake_flags & WF_TTWU) {
record_wakee(p);
-
-@@ -6753,6 +7117,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
-
+
+@@ -6753,6 +7112,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
rcu_read_lock();
for_each_domain(cpu, tmp) {
-@@ -6799,6 +7164,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
+@@ -6799,6 +7159,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
*/
static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
{
@@ -999,31 +940,31 @@ index bbc78794224a..7e9ce056efd7 100644
/*
* As blocked tasks retain absolute vruntime the migration needs to
* deal with this by subtracting the old and adding the new
-@@ -6824,6 +7190,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
-
+@@ -6824,6 +7185,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
+
se->vruntime -= min_vruntime;
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
if (p->on_rq == TASK_ON_RQ_MIGRATING) {
/*
-@@ -6869,6 +7236,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+@@ -6869,6 +7231,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
#endif /* CONFIG_SMP */
-
+
+#if !defined(CONFIG_CACULE_SCHED)
static unsigned long wakeup_gran(struct sched_entity *se)
{
unsigned long gran = sysctl_sched_wakeup_granularity;
-@@ -6947,6 +7315,7 @@ static void set_skip_buddy(struct sched_entity *se)
+@@ -6947,6 +7310,7 @@ static void set_skip_buddy(struct sched_entity *se)
for_each_sched_entity(se)
cfs_rq_of(se)->skip = se;
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
/*
* Preempt the current task with a newly woken task if needed:
-@@ -6955,9 +7324,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -6955,9 +7319,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
{
struct task_struct *curr = rq->curr;
struct sched_entity *se = &curr->se, *pse = &p->se;
@@ -1033,23 +974,23 @@ index bbc78794224a..7e9ce056efd7 100644
int scale = cfs_rq->nr_running >= sched_nr_latency;
int next_buddy_marked = 0;
+#endif /* CONFIG_CACULE_SCHED */
-
+
if (unlikely(se == pse))
return;
-@@ -6971,10 +7343,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -6971,10 +7338,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
return;
-
+
+#if !defined(CONFIG_CACULE_SCHED)
if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
set_next_buddy(pse);
next_buddy_marked = 1;
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
/*
* We can come here with TIF_NEED_RESCHED already set from new task
-@@ -7004,6 +7378,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7004,6 +7373,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
find_matching_se(&se, &pse);
update_curr(cfs_rq_of(se));
BUG_ON(!pse);
@@ -1061,14 +1002,14 @@ index bbc78794224a..7e9ce056efd7 100644
if (wakeup_preempt_entity(se, pse) == 1) {
/*
* Bias pick_next to pick the sched entity that is
-@@ -7013,11 +7392,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7013,11 +7387,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
set_next_buddy(pse);
goto preempt;
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
return;
-
+
preempt:
resched_curr(rq);
+
@@ -1076,15 +1017,15 @@ index bbc78794224a..7e9ce056efd7 100644
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
-@@ -7032,6 +7414,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
-
+@@ -7032,6 +7409,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+
if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
set_last_buddy(se);
+#endif /* CONFIG_CACULE_SCHED */
}
-
+
struct task_struct *
-@@ -7206,7 +7589,10 @@ static void yield_task_fair(struct rq *rq)
+@@ -7206,7 +7584,10 @@ static void yield_task_fair(struct rq *rq)
{
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
@@ -1092,60 +1033,60 @@ index bbc78794224a..7e9ce056efd7 100644
+#if !defined(CONFIG_CACULE_SCHED)
struct sched_entity *se = &curr->se;
+#endif
-
+
/*
* Are we the only task in the tree?
-@@ -7214,7 +7600,9 @@ static void yield_task_fair(struct rq *rq)
+@@ -7214,7 +7595,9 @@ static void yield_task_fair(struct rq *rq)
if (unlikely(rq->nr_running == 1))
return;
-
+
+#if !defined(CONFIG_CACULE_SCHED)
clear_buddies(cfs_rq, se);
+#endif
-
+
if (curr->policy != SCHED_BATCH) {
update_rq_clock(rq);
-@@ -7230,7 +7618,9 @@ static void yield_task_fair(struct rq *rq)
+@@ -7230,7 +7613,9 @@ static void yield_task_fair(struct rq *rq)
rq_clock_skip_update(rq);
}
-
+
+#if !defined(CONFIG_CACULE_SCHED)
set_skip_buddy(se);
+#endif
}
-
+
static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
-@@ -7241,8 +7631,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
+@@ -7241,8 +7626,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
return false;
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/* Tell the scheduler that we'd really like pse to run next. */
set_next_buddy(se);
+#endif
-
+
yield_task_fair(rq);
-
-@@ -7470,6 +7862,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
+
+@@ -7470,6 +7857,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
if (env->sd->flags & SD_SHARE_CPUCAPACITY)
return 0;
-
+
+#if !defined(CONFIG_CACULE_SCHED)
/*
* Buddy candidates are cache hot:
*/
-@@ -7477,6 +7870,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
+@@ -7477,6 +7865,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
(&p->se == cfs_rq_of(&p->se)->next ||
&p->se == cfs_rq_of(&p->se)->last))
return 1;
+#endif
-
+
if (sysctl_sched_migration_cost == -1)
return 1;
-@@ -10765,11 +11159,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+@@ -10765,11 +11154,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
update_overutilized_status(task_rq(curr));
}
-
+
+#ifdef CONFIG_CACULE_SCHED
/*
* called on fork with the child task as argument from the parent's context
@@ -1173,32 +1114,32 @@ index bbc78794224a..7e9ce056efd7 100644
static void task_fork_fair(struct task_struct *p)
{
struct cfs_rq *cfs_rq;
-@@ -10800,6 +11213,7 @@ static void task_fork_fair(struct task_struct *p)
+@@ -10800,6 +11208,7 @@ static void task_fork_fair(struct task_struct *p)
se->vruntime -= cfs_rq->min_vruntime;
rq_unlock(rq, &rf);
}
+#endif /* CONFIG_CACULE_SCHED */
-
+
/*
* Priority of the task has changed. Check to see if we preempt
-@@ -10912,6 +11326,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
+@@ -10912,6 +11321,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
static void detach_task_cfs_rq(struct task_struct *p)
{
struct sched_entity *se = &p->se;
+
+#if !defined(CONFIG_CACULE_SCHED)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
+
if (!vruntime_normalized(p)) {
-@@ -10922,6 +11338,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
+@@ -10922,6 +11333,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
place_entity(cfs_rq, se, 0);
se->vruntime -= cfs_rq->min_vruntime;
}
+#endif
-
+
detach_entity_cfs_rq(se);
}
-@@ -10929,12 +11346,17 @@ static void detach_task_cfs_rq(struct task_struct *p)
+@@ -10929,12 +11341,17 @@ static void detach_task_cfs_rq(struct task_struct *p)
static void attach_task_cfs_rq(struct task_struct *p)
{
struct sched_entity *se = &p->se;
@@ -1206,17 +1147,17 @@ index bbc78794224a..7e9ce056efd7 100644
+#if !defined(CONFIG_CACULE_SCHED)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
+#endif
-
+
attach_entity_cfs_rq(se);
-
+
+#if !defined(CONFIG_CACULE_SCHED)
if (!vruntime_normalized(p))
se->vruntime += cfs_rq->min_vruntime;
+#endif
}
-
+
static void switched_from_fair(struct rq *rq, struct task_struct *p)
-@@ -10990,13 +11412,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
+@@ -10990,13 +11407,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
@@ -1237,7 +1178,7 @@ index bbc78794224a..7e9ce056efd7 100644
+ cfs_rq->tail = NULL;
+#endif
}
-
+
#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 282a6bbaacd7..a3b7316dd537 100644
@@ -1245,7 +1186,7 @@ index 282a6bbaacd7..a3b7316dd537 100644
+++ b/kernel/sched/sched.h
@@ -516,10 +516,13 @@ struct cfs_rq {
unsigned int idle_h_nr_running; /* SCHED_IDLE */
-
+
u64 exec_clock;
+
+#if !defined(CONFIG_CACULE_SCHED)
@@ -1254,9 +1195,9 @@ index 282a6bbaacd7..a3b7316dd537 100644
u64 min_vruntime_copy;
#endif
+#endif /* CONFIG_CACULE_SCHED */
-
+
struct rb_root_cached tasks_timeline;
-
+
@@ -528,9 +531,15 @@ struct cfs_rq {
* It is set to NULL otherwise (i.e when none are currently running).
*/
@@ -1270,7 +1211,7 @@ index 282a6bbaacd7..a3b7316dd537 100644
struct sched_entity *last;
struct sched_entity *skip;
+#endif // CONFIG_CACULE_SCHED
-
+
#ifdef CONFIG_SCHED_DEBUG
unsigned int nr_spread_over;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
diff --git a/interactivity_levels.patch b/interactivity_levels.patch
new file mode 100644
index 000000000000..b3920791735d
--- /dev/null
+++ b/interactivity_levels.patch
@@ -0,0 +1,366 @@
+commit 52bbe1d1b5977d116aebca9902073030c1eb8f66
+Author: Hamad Marri <hamad.s.almarri@gmail.com>
+Date: Thu May 6 16:07:35 2021 +0300
+
+ added interactivity levels which are used to avoid fake interactive tasks. Fake interactive task are detected by the number of forks of their parent in a period of time. Introduced new sysctls: sched_nr_fork_threshold which is the max number of forks in period of time to consider all children fake interactive. This happnes for example when run make -j4 where make creates new threads for each job. All make children/threads are fake interactive, therefore, they are (any fake interactive tasks) put in lower interactivity regions based on how often this task is becoming fake interactive in a period of time. When a child exits or sleep, then the number of forks and the fake interactive level are reduced by 1. Also, when a maximum slid window is reached without any new fork, the fake interactive level is reduced by 1. The second sysctl is sched_fake_interactive_win_time_ms which is 1s by default. Each 1s (if a task is in runqueue, not sleeping) if the task is fake interactive already and the time slice ended without any new forks, then interactivty level is reduced by 1. For each fork, interactivity level is increased by 1. Higher levels here means lower interactive scores they got.
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 833c01b9ffd9..1912d22464b4 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -884,6 +884,12 @@ struct task_struct {
+ struct list_head sibling;
+ struct task_struct *group_leader;
+
++#ifdef CONFIG_CACULE_SCHED
++ u64 fork_start_win_stamp;
++ unsigned int nr_forks_per_time;
++ int is_fake_interactive;
++#endif
++
+ /*
+ * 'ptraced' is the list of tasks this task is using ptrace() on.
+ *
+diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
+index cb819c3d86f3..a5686379b998 100644
+--- a/include/linux/sched/sysctl.h
++++ b/include/linux/sched/sysctl.h
+@@ -32,9 +32,11 @@ extern unsigned int sysctl_sched_wakeup_granularity;
+ extern unsigned int sysctl_sched_child_runs_first;
+
+ #ifdef CONFIG_CACULE_SCHED
+-extern int interactivity_factor;
++extern unsigned int interactivity_factor;
+ extern unsigned int interactivity_threshold;
+-extern int cacule_max_lifetime;
++extern unsigned int cacule_max_lifetime;
++extern unsigned int fake_interactive_win_time;
++extern unsigned int nr_fork_threshold;
+ #endif
+
+ enum sched_tunable_scaling {
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 04029e35e69a..9dfd515104db 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -667,6 +667,17 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
+ write_lock_irq(&tasklist_lock);
+ forget_original_parent(tsk, &dead);
+
++#ifdef CONFIG_CACULE_SCHED
++ p = tsk->parent;
++ if (p) {
++ if (p->nr_forks_per_time)
++ p->nr_forks_per_time--;
++
++ if (p->is_fake_interactive)
++ p->is_fake_interactive--;
++ }
++#endif
++
+ if (group_dead)
+ kill_orphaned_pgrp(tsk->group_leader, NULL);
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index ece4dd36b23a..8cc840dc3540 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3554,11 +3554,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ p->se.sum_exec_runtime = 0;
+ p->se.prev_sum_exec_runtime = 0;
+ p->se.nr_migrations = 0;
++ p->se.vruntime = 0;
+
+ #ifdef CONFIG_CACULE_SCHED
+ p->se.cacule_node.vruntime = 0;
+-#else
+- p->se.vruntime = 0;
++ p->fork_start_win_stamp = 0;
++ p->nr_forks_per_time = 0;
++ p->is_fake_interactive = 0;
+ #endif
+
+ INIT_LIST_HEAD(&p->se.group_node);
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 5a2387bcb83a..d9f935bf54ff 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -119,9 +119,11 @@ int __weak arch_asym_cpu_priority(int cpu)
+
+ #endif
+ #ifdef CONFIG_CACULE_SCHED
+-int __read_mostly cacule_max_lifetime = 22000; // in ms
+-int __read_mostly interactivity_factor = 32768;
++unsigned int __read_mostly cacule_max_lifetime = 22000; // in ms
++unsigned int __read_mostly interactivity_factor = 32768;
+ unsigned int __read_mostly interactivity_threshold = 20480;
++unsigned int __read_mostly fake_interactive_win_time = 1000; // in ms
++unsigned int __read_mostly nr_fork_threshold = 3;
+ #endif
+
+ #ifdef CONFIG_CFS_BANDWIDTH
+@@ -595,28 +597,68 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
+ #endif /* CONFIG_CACULE_SCHED */
+
+ #ifdef CONFIG_CACULE_SCHED
++static inline unsigned int is_fake_interactive(struct cacule_node *cn)
++{
++ struct sched_entity *se = se_of(cn);
++ struct task_struct *parent = NULL;
++ struct cfs_rq *cfs_rq;
++ u64 win_time = fake_interactive_win_time * 1000000ULL;
++ u64 now = sched_clock();
++
++ while (!parent) {
++ if (entity_is_task(se)) {
++ parent = task_of(se)->parent;
++ break;
++ }
++
++ cfs_rq = group_cfs_rq(se);
++
++ if (!cfs_rq->head && !cfs_rq->curr)
++ return 0;
++
++ if (cfs_rq->head)
++ se = se_of(cfs_rq->head);
++ else if (cfs_rq->curr)
++ se = cfs_rq->curr;
++ }
++
++ if (parent->is_fake_interactive
++ && (now - parent->fork_start_win_stamp > win_time))
++ {
++ parent->fork_start_win_stamp = now;
++ parent->is_fake_interactive--;
++ }
++
++ return parent->is_fake_interactive;
++}
++
+ static unsigned int
+ calc_interactivity(u64 now, struct cacule_node *se)
+ {
+- u64 l_se, vr_se, sleep_se = 1ULL, u64_factor;
+- unsigned int score_se;
++ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m;
++ unsigned int score_se, fake_interactivity;
+
+ /*
+ * in case of vruntime==0, logical OR with 1 would
+ * make sure that the least sig. bit is 1
+ */
+ l_se = now - se->cacule_start_time;
+- vr_se = se->vruntime | 1;
+- u64_factor = interactivity_factor;
++ vr_se = se->vruntime | 1;
++ u64_factor_m = interactivity_factor;
++ _2m = u64_factor_m << 1;
+
+ /* safety check */
+ if (likely(l_se > vr_se))
+ sleep_se = (l_se - vr_se) | 1;
+
+ if (sleep_se >= vr_se)
+- score_se = u64_factor / (sleep_se / vr_se);
++ score_se = u64_factor_m / (sleep_se / vr_se);
+ else
+- score_se = (u64_factor << 1) - (u64_factor / (vr_se / sleep_se));
++ score_se = _2m - (u64_factor_m / (vr_se / sleep_se));
++
++ fake_interactivity = is_fake_interactive(se);
++ if (fake_interactivity)
++ score_se += (_2m * fake_interactivity) + 1;
+
+ return score_se;
+ }
+@@ -626,6 +668,9 @@ static inline int is_interactive(struct cacule_node *cn)
+ if (se_of(cn)->vruntime == 0)
+ return 0;
+
++ if (is_fake_interactive(cn))
++ return 0;
++
+ return calc_interactivity(sched_clock(), cn) < interactivity_threshold;
+ }
+
+@@ -673,47 +718,18 @@ entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se)
+ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
+ {
+ struct cacule_node *se = &(_se->cacule_node);
+- struct cacule_node *iter, *next = NULL;
+- u64 now = sched_clock();
+- unsigned int score_se = calc_interactivity(now, se);
+
+ se->next = NULL;
+ se->prev = NULL;
+
+ if (likely(cfs_rq->head)) {
+-
+- // start from tail
+- iter = cfs_rq->tail;
+-
+- // does se have higher IS than iter?
+- while (iter && entity_before_cached(now, score_se, iter) == -1) {
+- next = iter;
+- iter = iter->prev;
+- }
+-
+- // se in tail position
+- if (iter == cfs_rq->tail) {
+- cfs_rq->tail->next = se;
+- se->prev = cfs_rq->tail;
+-
+- cfs_rq->tail = se;
+- }
+- // else if not head no tail, insert se after iter
+- else if (iter) {
+- se->next = next;
+- se->prev = iter;
+-
+- iter->next = se;
+- next->prev = se;
+- }
+ // insert se at head
+- else {
+- se->next = cfs_rq->head;
+- cfs_rq->head->prev = se;
++ se->next = cfs_rq->head;
++ cfs_rq->head->prev = se;
++
++ // lastly reset the head
++ cfs_rq->head = se;
+
+- // lastly reset the head
+- cfs_rq->head = se;
+- }
+ } else {
+ // if empty rq
+ cfs_rq->head = se;
+@@ -4603,16 +4619,17 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ }
+
+ #ifdef CONFIG_CACULE_SCHED
++static struct sched_entity *
++pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr);
++
+ /*
+ * Preempt the current task with a newly woken task if needed:
+ */
+ static void
+ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ {
+- u64 now = sched_clock();
+-
+ // does head have higher IS than curr
+- if (entity_before(now, &curr->cacule_node, cfs_rq->head) == 1)
++ if (pick_next_entity(cfs_rq, curr) != curr)
+ resched_curr(rq_of(cfs_rq));
+ }
+ #else
+@@ -4694,12 +4711,26 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ static struct sched_entity *
+ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ {
+- struct cacule_node *se = cfs_rq->head;
++ struct cacule_node *next, *se = cfs_rq->head;
++ u64 now = sched_clock();
++ unsigned int score_se;
+
+ if (unlikely(!se))
+- se = &curr->cacule_node;
+- else if (unlikely(curr
+- && entity_before(sched_clock(), se, &curr->cacule_node) == 1))
++ return curr;
++
++ score_se = calc_interactivity(now, se);
++
++ next = se->next;
++ while (next) {
++ if (entity_before_cached(now, score_se, next) == 1) {
++ se = next;
++ score_se = calc_interactivity(now, se);
++ }
++
++ next = next->next;
++ }
++
++ if (unlikely(curr && entity_before_cached(now, score_se, &curr->cacule_node) == 1))
+ se = &curr->cacule_node;
+
+ return se_of(se);
+@@ -5884,6 +5915,15 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ int task_sleep = flags & DEQUEUE_SLEEP;
+ int idle_h_nr_running = task_has_idle_policy(p);
+ bool was_sched_idle = sched_idle_rq(rq);
++ struct task_struct *parent = p->parent;
++
++ if (task_sleep && parent) {
++ if (parent->nr_forks_per_time)
++ parent->nr_forks_per_time--;
++
++ if (parent->is_fake_interactive)
++ parent->is_fake_interactive--;
++ }
+
+ util_est_dequeue(&rq->cfs, p);
+
+@@ -11147,6 +11187,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ struct sched_entity *curr;
+ struct rq *rq = this_rq();
+ struct rq_flags rf;
++ struct task_struct *parent = p->parent;
++ u64 now = sched_clock();
+
+ rq_lock(rq, &rf);
+ update_rq_clock(rq);
+@@ -11157,6 +11199,13 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ update_curr(cfs_rq);
+
+ rq_unlock(rq, &rf);
++
++ parent->fork_start_win_stamp = now;
++
++ if (parent->nr_forks_per_time >= nr_fork_threshold)
++ parent->is_fake_interactive++;
++
++ parent->nr_forks_per_time++;
+ }
+ #else
+ static void task_fork_fair(struct task_struct *p)
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index a0bf55bbb3a7..5f49409e3124 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1663,7 +1663,7 @@ static struct ctl_table kern_table[] = {
+ {
+ .procname = "sched_interactivity_factor",
+ .data = &interactivity_factor,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+@@ -1677,7 +1677,21 @@ static struct ctl_table kern_table[] = {
+ {
+ .procname = "sched_max_lifetime_ms",
+ .data = &cacule_max_lifetime,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++ {
++ .procname = "sched_fake_interactive_win_time_ms",
++ .data = &fake_interactive_win_time,
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++ {
++ .procname = "sched_nr_fork_threshold",
++ .data = &nr_fork_threshold,
++ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },