diff options
author | P. Jung | 2021-09-13 13:22:11 +0000 |
---|---|---|
committer | P. Jung | 2021-09-13 13:22:11 +0000 |
commit | a101ee7fec526c7f573aea4e0c9a9bfde2b06768 (patch) | |
tree | c667649e554343179510e77435a31d8292c5b30c | |
parent | 3c7966b21ab1a0425799399781f6bd6c1905ec77 (diff) | |
download | aur-a101ee7fec526c7f573aea4e0c9a9bfde2b06768.tar.gz |
linux-hardended-cacule 5.14.3
-rw-r--r-- | .SRCINFO | 16 | ||||
-rw-r--r-- | PKGBUILD | 12 | ||||
-rw-r--r-- | cacule-5.13.patch | 2211 | ||||
-rw-r--r-- | cacule-5.14.patch | 1232 | ||||
-rw-r--r-- | config | 2 |
5 files changed, 1247 insertions, 2226 deletions
@@ -1,6 +1,6 @@ pkgbase = linux-hardened-cacule pkgdesc = Security-Hardened Linux with the cacule scheduler - pkgver = 5.13.15.hardened1 + pkgver = 5.14.3.hardened1 pkgrel = 1 url = https://github.com/anthraxx/linux-hardened arch = x86_64 @@ -20,16 +20,16 @@ pkgbase = linux-hardened-cacule makedepends = graphviz makedepends = imagemagick options = !strip - source = https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.13.15.tar.xz - source = https://github.com/anthraxx/linux-hardened/releases/download/5.13.15-hardened1/linux-hardened-5.13.15-hardened1.patch - source = cacule-5.13.patch + source = https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.14.3.tar.xz + source = https://github.com/anthraxx/linux-hardened/releases/download/5.14.3-hardened1/linux-hardened-5.14.3-hardened1.patch + source = cacule-5.14.patch source = cpu-patches.patch source = config - sha256sums = 330b4c395f858e75ce26a34cc008c121ef8d975736b3ea975ca65081ff93621b - sha256sums = c722b5988fec48f6593e24aa429068f489285062960b1f172770e1240b94e5bc - sha256sums = 2c2851ea35a8e8628caf2caf674736af0547c737652737d6a8aebf677ae92e5e + sha256sums = c6c340be69e22021d9406c51467d03e2e28fb7221939b2ef114cac3d3602a8d8 + sha256sums = f46574c15833acb4b27717227f4965d57111936a14ba0af437eedbd3a9e2ec73 + sha256sums = bfc4a35b30ebe39fe2520ae8b2c08b97b6549c2c99a9ecf8c0d57ea27b558e75 sha256sums = 4f22a6e4e5fe6f3bb39ca39073fa812eb9c0dbb3ac9cec64ed0a90d06b54d32a - sha256sums = ab4721efe64acbb78a995af97cf65aec3462994dd985ac36eebb146ab9d93f67 + sha256sums = 80c04de57ded5d84c6cde6d9897cfc62a858c880c733ff3b24d7134e5ced0a57 pkgname = linux-hardened-cacule pkgdesc = The Security-Hardened Linux with the cacule scheduler kernel and modules @@ -6,7 +6,7 @@ pkgbase=linux-hardened-cacule -pkgver=5.13.15.hardened1 +pkgver=5.14.3.hardened1 pkgrel=1 pkgdesc='Security-Hardened Linux with the cacule scheduler' url='https://github.com/anthraxx/linux-hardened' @@ -22,15 +22,15 @@ _srctag=${pkgver%.*}-${pkgver##*.} source=( https://www.kernel.org/pub/linux/kernel/v${pkgver%%.*}.x/${_srcname}.tar.xz https://github.com/anthraxx/linux-hardened/releases/download/${_srctag}/linux-hardened-${_srctag}.patch - cacule-5.13.patch + cacule-5.14.patch cpu-patches.patch config # the main kernel config file ) -sha256sums=('330b4c395f858e75ce26a34cc008c121ef8d975736b3ea975ca65081ff93621b' - 'c722b5988fec48f6593e24aa429068f489285062960b1f172770e1240b94e5bc' - '2c2851ea35a8e8628caf2caf674736af0547c737652737d6a8aebf677ae92e5e' +sha256sums=('c6c340be69e22021d9406c51467d03e2e28fb7221939b2ef114cac3d3602a8d8' + 'f46574c15833acb4b27717227f4965d57111936a14ba0af437eedbd3a9e2ec73' + 'bfc4a35b30ebe39fe2520ae8b2c08b97b6549c2c99a9ecf8c0d57ea27b558e75' '4f22a6e4e5fe6f3bb39ca39073fa812eb9c0dbb3ac9cec64ed0a90d06b54d32a' - 'ab4721efe64acbb78a995af97cf65aec3462994dd985ac36eebb146ab9d93f67') + '80c04de57ded5d84c6cde6d9897cfc62a858c880c733ff3b24d7134e5ced0a57') export KBUILD_BUILD_HOST=archlinux export KBUILD_BUILD_USER=$pkgbase diff --git a/cacule-5.13.patch b/cacule-5.13.patch deleted file mode 100644 index 100bdcb0c5f6..000000000000 --- a/cacule-5.13.patch +++ /dev/null @@ -1,2211 +0,0 @@ -diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst -index 68b21395a743..3f4b9c6911be 100644 ---- a/Documentation/admin-guide/sysctl/kernel.rst -+++ b/Documentation/admin-guide/sysctl/kernel.rst -@@ -1088,6 +1088,10 @@ Model available). If your platform happens to meet the - requirements for EAS but you do not want to use it, change - this value to 0. - -+sched_interactivity_factor (CacULE scheduler only) -+================================================== -+Sets the value *m* for interactivity score calculations. See -+Figure 1 in https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf - - sched_schedstats - ================ -diff --git a/Documentation/scheduler/sched-CacULE.rst b/Documentation/scheduler/sched-CacULE.rst -new file mode 100644 -index 000000000000..82b0847c468a ---- /dev/null -+++ b/Documentation/scheduler/sched-CacULE.rst -@@ -0,0 +1,76 @@ -+====================================== -+The CacULE Scheduler by Hamad Al Marri. -+====================================== -+ -+1. Overview -+============= -+ -+The CacULE CPU scheduler is based on interactivity score mechanism. -+The interactivity score is inspired by the ULE scheduler (FreeBSD -+scheduler). -+ -+1.1 About CacULE Scheduler -+-------------------------- -+ -+ - Each CPU has its own runqueue. -+ -+ - NORMAL runqueue is a linked list of sched_entities (instead of RB-Tree). -+ -+ - RT and other runqueues are just the same as the CFS's. -+ -+ - Wake up tasks preempt currently running tasks if its interactivity score value -+ is higher. -+ -+ -+1.2. Complexity -+---------------- -+ -+The complexity of Enqueue and Dequeue a task is O(1). -+ -+The complexity of pick the next task is in O(n), where n is the number of tasks -+in a runqueue (each CPU has its own runqueue). -+ -+Note: O(n) sounds scary, but usually for a machine with 4 CPUS where it is used -+for desktop or mobile jobs, the maximum number of runnable tasks might not -+exceeds 10 (at the pick next run time) - the idle tasks are excluded since they -+are dequeued when sleeping and enqueued when they wake up. -+ -+ -+2. The CacULE Interactivity Score -+======================================================= -+ -+The interactivity score is inspired by the ULE scheduler (FreeBSD scheduler). -+For more information see: https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf -+CacULE doesn't replace CFS with ULE, it only changes the CFS' pick next task -+mechanism to ULE's interactivity score mechanism for picking next task to run. -+ -+ -+2.3 sched_interactivity_factor -+================= -+Sets the value *m* for interactivity score calculations. See Figure 1 in -+https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf -+The default value of in CacULE is 10 which means that the Maximum Interactive -+Score is 20 (since m = Maximum Interactive Score / 2). -+You can tune sched_interactivity_factor with sysctl command: -+ -+ sysctl kernel.sched_interactivity_factor=50 -+ -+This command changes the sched_interactivity_factor from 10 to 50. -+ -+ -+3. Scheduling policies -+======================= -+ -+CacULE some CFS, implements three scheduling policies: -+ -+ - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling -+ policy that is used for regular tasks. -+ -+ - SCHED_BATCH: Does not preempt nearly as often as regular tasks -+ would, thereby allowing tasks to run longer and make better use of -+ caches but at the cost of interactivity. This is well suited for -+ batch jobs. -+ -+ - SCHED_IDLE: This is even weaker than nice 19, but its not a true -+ idle timer scheduler in order to avoid to get into priority -+ inversion problems which would deadlock the machine. -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 32813c345115..0dc06f09715f 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -458,10 +458,23 @@ struct sched_statistics { - #endif - }; - -+#ifdef CONFIG_CACULE_SCHED -+struct cacule_node { -+ struct cacule_node* next; -+ struct cacule_node* prev; -+ u64 cacule_start_time; -+ u64 last_run; -+ u64 vruntime; -+}; -+#endif -+ - struct sched_entity { - /* For load-balancing: */ - struct load_weight load; - struct rb_node run_node; -+#ifdef CONFIG_CACULE_SCHED -+ struct cacule_node cacule_node; -+#endif - struct list_head group_node; - unsigned int on_rq; - -diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h -index db2c0f34aaaf..a0ef2748ee6e 100644 ---- a/include/linux/sched/sysctl.h -+++ b/include/linux/sched/sysctl.h -@@ -32,6 +32,16 @@ extern unsigned int sysctl_sched_latency; - extern unsigned int sysctl_sched_min_granularity; - extern unsigned int sysctl_sched_wakeup_granularity; - -+#ifdef CONFIG_CACULE_SCHED -+extern unsigned int interactivity_factor; -+extern unsigned int cacule_max_lifetime; -+extern unsigned int cache_factor; -+extern unsigned int cache_divisor; -+extern unsigned int starve_factor; -+extern unsigned int starve_divisor; -+extern int cacule_yield; -+#endif -+ - enum sched_tunable_scaling { - SCHED_TUNABLESCALING_NONE, - SCHED_TUNABLESCALING_LOG, -diff --git a/init/Kconfig b/init/Kconfig -index a61c92066c2e..427593be8c5a 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -834,6 +834,51 @@ config UCLAMP_BUCKETS_COUNT - - endmenu - -+config CACULE_SCHED -+ bool "CacULE CPU scheduler" -+ default y -+ help -+ The CacULE CPU scheduler is based on interactivity score mechanism. -+ The interactivity score is inspired by the ULE scheduler (FreeBSD -+ scheduler). -+ -+ If unsure, say Y here. -+ -+config CACULE_RDB -+ bool "RDB (Response Driven Balancer)" -+ default y -+ depends on CACULE_SCHED -+ help -+ This is an experimental load balancer for CacULE. It is a lightweight -+ load balancer which is a replacement of CFS load balancer. It migrates -+ tasks based on their interactivity scores. -+ -+ If unsure, say Y here. -+ -+config RDB_INTERVAL -+ int "RDB load balancer interval" -+ default 19 -+ depends on CACULE_RDB -+ help -+ This is an interval to control load balance time period. -+ The trigger_load_balance runs in every tick. For High HZ values, the -+ load balance could be overwhelming. RDB load balance includes rq locking -+ which can reduce the performance. The balance interval can help to avoid -+ running load balance on every tick. For example, RDB_INTERVAL=3 will -+ only run load balance every 3ms. Setting RDB_INTERVAL depends on HZ. -+ If you want load balancer run every 2ms while HZ=500 then it is not -+ needed and better to set RDB_INTERVAL=0 since 500HZ already (1000ms -+ / 500HZ = 2ms). However, if you have 1000HZ and want to avoid load -+ balancer from running every 1ms, you could set RDB_INTERVAL=4ms for -+ example to make load balancer run every 4ms. Less RDB_INTERVAL values -+ (or 0 to disable) could make sure tasks are balanced ASAP, but with -+ the cost of locking/blocking time. High RDB_INTERVAL values can relax -+ balancing locking but with the cost of imbalanced workload for that -+ period of time (i.e. if RDB_INTERVAL=100ms) there will be no balancing -+ for 100ms (except for newidle_balance which is not effected by RDB_INTERVAL). -+ -+ If in doubt, use the default value. -+ - # - # For architectures that want to enable the support for NUMA-affine scheduler - # balancing logic: -@@ -1231,6 +1276,7 @@ config SCHED_AUTOGROUP - select CGROUPS - select CGROUP_SCHED - select FAIR_GROUP_SCHED -+ default y - help - This option optimizes the scheduler for common desktop workloads by - automatically creating and populating task groups. This separation -diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz -index 38ef6d06888e..865f8dbddca8 100644 ---- a/kernel/Kconfig.hz -+++ b/kernel/Kconfig.hz -@@ -46,6 +46,9 @@ choice - 1000 Hz is the preferred choice for desktop systems and other - systems requiring fast interactive responses to events. - -+ config HZ_2000 -+ bool "2000 HZ" -+ - endchoice - - config HZ -@@ -54,6 +57,7 @@ config HZ - default 250 if HZ_250 - default 300 if HZ_300 - default 1000 if HZ_1000 -+ default 2000 if HZ_2000 - - config SCHED_HRTICK - def_bool HIGH_RES_TIMERS -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index e5858999b54d..c326d30424f9 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -82,6 +82,10 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; - */ - unsigned int sysctl_sched_rt_period = 1000000; - -+#ifdef CONFIG_CACULE_SCHED -+int __read_mostly cacule_yield = 1; -+#endif -+ - __read_mostly int scheduler_running; - - /* -@@ -3578,6 +3582,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) - p->se.prev_sum_exec_runtime = 0; - p->se.nr_migrations = 0; - p->se.vruntime = 0; -+ -+#ifdef CONFIG_CACULE_SCHED -+ p->se.cacule_node.vruntime = 0; -+#endif -+ - INIT_LIST_HEAD(&p->se.group_node); - - #ifdef CONFIG_FAIR_GROUP_SCHED -@@ -3863,6 +3872,10 @@ void wake_up_new_task(struct task_struct *p) - update_rq_clock(rq); - post_init_entity_util_avg(p); - -+#ifdef CONFIG_CACULE_SCHED -+ p->se.cacule_node.cacule_start_time = sched_clock(); -+#endif -+ - activate_task(rq, p, ENQUEUE_NOCLOCK); - trace_sched_wakeup_new(p); - check_preempt_curr(rq, p, WF_FORK); -@@ -4674,7 +4687,9 @@ static void sched_tick_remote(struct work_struct *work) - struct rq *rq = cpu_rq(cpu); - struct task_struct *curr; - struct rq_flags rf; -+#if !defined(CONFIG_CACULE_SCHED) - u64 delta; -+#endif - int os; - - /* -@@ -4694,6 +4709,7 @@ static void sched_tick_remote(struct work_struct *work) - - update_rq_clock(rq); - -+#if !defined(CONFIG_CACULE_SCHED) - if (!is_idle_task(curr)) { - /* - * Make sure the next tick runs within a reasonable -@@ -4702,6 +4718,8 @@ static void sched_tick_remote(struct work_struct *work) - delta = rq_clock_task(rq) - curr->se.exec_start; - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); - } -+#endif -+ - curr->sched_class->task_tick(rq, curr, 0); - - calc_load_nohz_remote(rq); -@@ -6958,6 +6976,13 @@ static void do_sched_yield(void) - struct rq_flags rf; - struct rq *rq; - -+#ifdef CONFIG_CACULE_SCHED -+ struct task_struct *curr = current; -+ struct cacule_node *cn = &curr->se.cacule_node; -+ -+ if (cacule_yield) -+ cn->vruntime |= YIELD_MARK; -+#endif - rq = this_rq_lock_irq(&rf); - - schedstat_inc(rq->yld_count); -@@ -8115,6 +8140,14 @@ void __init sched_init(void) - BUG_ON(&dl_sched_class + 1 != &stop_sched_class); - #endif - -+#ifdef CONFIG_CACULE_SCHED -+#ifdef CONFIG_CACULE_RDB -+ printk(KERN_INFO "CacULE CPU scheduler (RDB) v5.13-r3 by Hamad Al Marri."); -+#else -+ printk(KERN_INFO "CacULE CPU scheduler v5.13-r3 by Hamad Al Marri."); -+#endif -+#endif -+ - wait_bit_init(); - - #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index c5aacbd492a1..adb021b7da8a 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -560,8 +560,11 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) - - void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) - { -- s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, -- spread, rq0_min_vruntime, spread0; -+ s64 MIN_vruntime = -1, max_vruntime = -1, -+#if !defined(CONFIG_CACULE_SCHED) -+ min_vruntime, rq0_min_vruntime, spread0, -+#endif -+ spread; - struct rq *rq = cpu_rq(cpu); - struct sched_entity *last; - unsigned long flags; -@@ -582,21 +585,27 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) - last = __pick_last_entity(cfs_rq); - if (last) - max_vruntime = last->vruntime; -+#if !defined(CONFIG_CACULE_SCHED) - min_vruntime = cfs_rq->min_vruntime; - rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; -+#endif - raw_spin_unlock_irqrestore(&rq->lock, flags); - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", - SPLIT_NS(MIN_vruntime)); -+#if !defined(CONFIG_CACULE_SCHED) - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", - SPLIT_NS(min_vruntime)); -+#endif - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime", - SPLIT_NS(max_vruntime)); - spread = max_vruntime - MIN_vruntime; - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", - SPLIT_NS(spread)); -+#if !defined(CONFIG_CACULE_SCHED) - spread0 = min_vruntime - rq0_min_vruntime; - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", - SPLIT_NS(spread0)); -+#endif - SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", - cfs_rq->nr_spread_over); - SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 7dd0d859d95b..4aa5fced8f69 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -19,9 +19,24 @@ - * - * Adaptive scheduling granularity, math enhancements by Peter Zijlstra - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra -+ * -+ * CacULE enhancements CPU cache and scheduler based on -+ * Interactivity Score. -+ * (C) 2020 Hamad Al Marri <hamad.s.almarri@gmail.com> - */ - #include "sched.h" - -+#ifdef CONFIG_CACULE_SCHED -+unsigned int __read_mostly cacule_max_lifetime = 22000; // in ms -+unsigned int __read_mostly interactivity_factor = 32768; -+ -+unsigned int __read_mostly cache_factor = 13107; -+unsigned int __read_mostly cache_divisor = 1000000; // 1ms -+ -+unsigned int __read_mostly starve_factor = 19660; -+unsigned int __read_mostly starve_divisor = 3000000; // 3ms -+#endif -+ - /* - * Targeted preemption latency for CPU-bound tasks: - * -@@ -82,7 +97,11 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; - unsigned int sysctl_sched_wakeup_granularity = 1000000UL; - static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; - -+#ifdef CONFIG_CACULE_SCHED -+const_debug unsigned int sysctl_sched_migration_cost = 200000UL; -+#else - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; -+#endif - - int sched_thermal_decay_shift; - static int __init setup_sched_thermal_decay_shift(char *str) -@@ -263,6 +282,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight - - const struct sched_class fair_sched_class; - -+ -+#ifdef CONFIG_CACULE_SCHED -+static inline struct sched_entity *se_of(struct cacule_node *cn) -+{ -+ return container_of(cn, struct sched_entity, cacule_node); -+} -+#endif -+ - /************************************************************** - * CFS operations on generic schedulable entities: - */ -@@ -522,7 +549,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec); - /************************************************************** - * Scheduling class tree data structure manipulation methods: - */ -- -+#if !defined(CONFIG_CACULE_SCHED) - static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime) - { - s64 delta = (s64)(vruntime - max_vruntime); -@@ -585,7 +612,223 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) - { - return entity_before(__node_2_se(a), __node_2_se(b)); - } -+#endif /* CONFIG_CACULE_SCHED */ -+ -+#ifdef CONFIG_CACULE_SCHED -+static unsigned int -+calc_interactivity(u64 now, struct cacule_node *se) -+{ -+ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m; -+ unsigned int score_se; -+ -+ /* -+ * in case of vruntime==0, logical OR with 1 would -+ * make sure that the least sig. bit is 1 -+ */ -+ l_se = now - se->cacule_start_time; -+ vr_se = se->vruntime | 1; -+ u64_factor_m = interactivity_factor; -+ _2m = u64_factor_m << 1; -+ -+ /* safety check */ -+ if (likely(l_se > vr_se)) -+ sleep_se = (l_se - vr_se) | 1; -+ -+ if (sleep_se >= vr_se) -+ score_se = u64_factor_m / (sleep_se / vr_se); -+ else -+ score_se = _2m - (u64_factor_m / (vr_se / sleep_se)); -+ -+ return score_se; -+} -+ -+static unsigned int -+calc_cache_score(u64 now, struct cacule_node *cn) -+{ -+ struct sched_entity *se = se_of(cn); -+ struct cfs_rq *cfs_rq = cfs_rq_of(se); -+ u64 c_div = cache_divisor; -+ u64 cache_period = 1ULL; -+ u64 u64_factor_m = cache_factor; -+ u64 _2m = u64_factor_m << 1; -+ unsigned int score; -+ -+ if (!cache_factor) -+ return 0; -+ -+ if (se == cfs_rq->curr) -+ return 0; -+ -+ cache_period = (now - se->exec_start) | 1; -+ -+ if (c_div >= cache_period) -+ score = u64_factor_m / (c_div / cache_period); -+ else -+ score = _2m - (u64_factor_m / (cache_period / c_div)); -+ -+ return score; -+} -+ -+static unsigned int -+calc_starve_score(u64 now, struct cacule_node *cn) -+{ -+ struct sched_entity *se = se_of(cn); -+ struct cfs_rq *cfs_rq = cfs_rq_of(se); -+ u64 s_div = starve_divisor; -+ u64 starving = 1ULL; -+ u64 u64_factor_m = starve_factor; -+ u64 _2m = u64_factor_m << 1; -+ unsigned int score; -+ -+ if (!starve_factor) -+ return 0; -+ -+ if (se == cfs_rq->curr) -+ return _2m; -+ -+ starving = (now - cn->last_run) | 1; -+ -+ if (s_div >= starving) -+ score = _2m - (u64_factor_m / (s_div / starving)); -+ else -+ score = u64_factor_m / (starving / s_div); -+ -+ return score; -+} -+ -+static inline int cn_has_idle_policy(struct cacule_node *cn) -+{ -+ struct sched_entity *se = se_of(cn); -+ -+ if (!entity_is_task(se)) -+ return false; -+ -+ return task_has_idle_policy(task_of(se)); -+} -+ -+/* -+ * Does se have lower interactivity score value (i.e. interactive) than curr? If yes, return 1, -+ * otherwise return -1 -+ * se is before curr if se has lower interactivity score value -+ * the lower score, the more interactive -+ */ -+static inline int -+entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se) -+{ -+ unsigned int score_curr, score_se; -+ int diff; -+ int is_curr_idle = cn_has_idle_policy(curr); -+ int is_se_idle = cn_has_idle_policy(se); -+ -+ /* if curr is normal but se is idle class, then no */ -+ if (!is_curr_idle && is_se_idle) -+ return -1; -+ -+ /* if curr is idle class and se is normal, then yes */ -+ if (is_curr_idle && !is_se_idle) -+ return 1; -+ -+ score_curr = calc_interactivity(now, curr); -+ score_curr += calc_cache_score(now, curr); -+ score_curr += calc_starve_score(now, curr); -+ -+ score_se = calc_interactivity(now, se); -+ score_se += calc_cache_score(now, se); -+ score_se += calc_starve_score(now, se); -+ -+ diff = score_se - score_curr; -+ -+ if (diff < 0) -+ return 1; -+ -+ return -1; -+} -+ -+#ifdef CONFIG_CACULE_RDB -+static void update_IS(struct rq *rq) -+{ -+ struct list_head *tasks = &rq->cfs_tasks; -+ struct task_struct *p, *to_migrate = NULL; -+ unsigned int max_IS = ~0, temp_IS; -+ -+ list_for_each_entry(p, tasks, se.group_node) { -+ if (task_running(rq, p)) -+ continue; -+ -+ temp_IS = calc_interactivity(sched_clock(), &p->se.cacule_node); -+ if (temp_IS < max_IS) { -+ to_migrate = p; -+ max_IS = temp_IS; -+ } -+ } -+ -+ if (to_migrate) { -+ WRITE_ONCE(rq->max_IS_score, max_IS); -+ WRITE_ONCE(rq->to_migrate_task, to_migrate); -+ } else if (rq->max_IS_score != ~0) { -+ WRITE_ONCE(rq->max_IS_score, ~0); -+ WRITE_ONCE(rq->to_migrate_task, NULL); -+ } -+} -+#endif - -+/* -+ * Enqueue an entity -+ */ -+static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se) -+{ -+ struct cacule_node *se = &(_se->cacule_node); -+ -+ se->next = NULL; -+ se->prev = NULL; -+ -+ if (cfs_rq->head) { -+ // insert se at head -+ se->next = cfs_rq->head; -+ cfs_rq->head->prev = se; -+ -+ // lastly reset the head -+ cfs_rq->head = se; -+ } else { -+ // if empty rq -+ cfs_rq->head = se; -+ cfs_rq->tail = se; -+ } -+} -+ -+static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se) -+{ -+ struct cacule_node *se = &(_se->cacule_node); -+ -+ // if only one se in rq -+ if (cfs_rq->head == cfs_rq->tail) { -+ cfs_rq->head = NULL; -+ cfs_rq->tail = NULL; -+ } else if (se == cfs_rq->head) { -+ // if it is the head -+ cfs_rq->head = cfs_rq->head->next; -+ cfs_rq->head->prev = NULL; -+ } else if (se == cfs_rq->tail) { -+ // if it is the tail -+ cfs_rq->tail = cfs_rq->tail->prev; -+ cfs_rq->tail->next = NULL; -+ } else { -+ // if in the middle -+ struct cacule_node *prev = se->prev; -+ struct cacule_node *next = se->next; -+ -+ prev->next = next; -+ -+ if (next) -+ next->prev = prev; -+ } -+} -+ -+struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) -+{ -+ return se_of(cfs_rq->head); -+} -+#else - /* - * Enqueue an entity into the rb-tree: - */ -@@ -618,16 +861,24 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se) - - return __node_2_se(next); - } -+#endif /* CONFIG_CACULE_SCHED */ - - #ifdef CONFIG_SCHED_DEBUG - struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) - { -+#ifdef CONFIG_CACULE_SCHED -+ if (!cfs_rq->tail) -+ return NULL; -+ -+ return se_of(cfs_rq->tail); -+#else - struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); - - if (!last) - return NULL; - - return __node_2_se(last); -+#endif /* CONFIG_CACULE_SCHED */ - } - - /************************************************************** -@@ -717,6 +968,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) - return slice; - } - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * We calculate the vruntime slice of a to-be-inserted task. - * -@@ -726,6 +978,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) - { - return calc_delta_fair(sched_slice(cfs_rq, se), se); - } -+#endif /* CONFIG_CACULE_SCHED */ - - #include "pelt.h" - #ifdef CONFIG_SMP -@@ -833,14 +1086,55 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) - } - #endif /* CONFIG_SMP */ - -+#ifdef CONFIG_CACULE_SCHED -+static void normalize_lifetime(u64 now, struct sched_entity *se) -+{ -+ struct cacule_node *cn = &se->cacule_node; -+ u64 max_life_ns, life_time, old_hrrn_x; -+ s64 diff; -+ -+ /* -+ * left shift 20 bits is approximately = * 1000000 -+ * we don't need the precision of life time -+ * Ex. for 30s, with left shift (20bits) == 31.457s -+ */ -+ max_life_ns = ((u64) cacule_max_lifetime) << 20; -+ life_time = now - cn->cacule_start_time; -+ diff = life_time - max_life_ns; -+ -+ if (diff > 0) { -+ // unmark YIELD. No need to check or remark since -+ // this normalize action doesn't happen very often -+ cn->vruntime &= YIELD_UNMARK; -+ -+ // multiply life_time by 1024 for more precision -+ old_hrrn_x = (life_time << 7) / ((cn->vruntime >> 3) | 1); -+ -+ // reset life to half max_life (i.e ~15s) -+ cn->cacule_start_time = now - (max_life_ns >> 1); -+ -+ // avoid division by zero -+ if (old_hrrn_x == 0) old_hrrn_x = 1; -+ -+ // reset vruntime based on old hrrn ratio -+ cn->vruntime = (max_life_ns << 9) / old_hrrn_x; -+ } -+} -+#endif /* CONFIG_CACULE_SCHED */ -+ - /* - * Update the current task's runtime statistics. - */ - static void update_curr(struct cfs_rq *cfs_rq) - { - struct sched_entity *curr = cfs_rq->curr; -+#ifdef CONFIG_CACULE_SCHED -+ u64 now = sched_clock(); -+ u64 delta_exec, delta_fair; -+#else - u64 now = rq_clock_task(rq_of(cfs_rq)); - u64 delta_exec; -+#endif - - if (unlikely(!curr)) - return; -@@ -857,8 +1151,16 @@ static void update_curr(struct cfs_rq *cfs_rq) - curr->sum_exec_runtime += delta_exec; - schedstat_add(cfs_rq->exec_clock, delta_exec); - -+#ifdef CONFIG_CACULE_SCHED -+ curr->cacule_node.last_run = now; -+ delta_fair = calc_delta_fair(delta_exec, curr); -+ curr->vruntime += delta_fair; -+ curr->cacule_node.vruntime += delta_fair; -+ normalize_lifetime(now, curr); -+#else - curr->vruntime += calc_delta_fair(delta_exec, curr); - update_min_vruntime(cfs_rq); -+#endif - - if (entity_is_task(curr)) { - struct task_struct *curtask = task_of(curr); -@@ -1026,7 +1328,6 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - static inline void - update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - { -- - if (!schedstat_enabled()) - return; - -@@ -1058,7 +1359,12 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) - /* - * We are starting a new run period: - */ -+#ifdef CONFIG_CACULE_SCHED -+ se->exec_start = sched_clock(); -+ se->cacule_node.last_run = sched_clock(); -+#else - se->exec_start = rq_clock_task(rq_of(cfs_rq)); -+#endif - } - - /************************************************** -@@ -4178,7 +4484,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} - - static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) - { --#ifdef CONFIG_SCHED_DEBUG -+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_CACULE_SCHED) - s64 d = se->vruntime - cfs_rq->min_vruntime; - - if (d < 0) -@@ -4189,6 +4495,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) - #endif - } - -+#if !defined(CONFIG_CACULE_SCHED) - static void - place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) - { -@@ -4220,6 +4527,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) - /* ensure we never gain time by being placed backwards. */ - se->vruntime = max_vruntime(se->vruntime, vruntime); - } -+#endif /* CONFIG_CACULE_SCHED */ - - static void check_enqueue_throttle(struct cfs_rq *cfs_rq); - -@@ -4278,18 +4586,23 @@ static inline bool cfs_bandwidth_used(void); - static void - enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - { -+#if !defined(CONFIG_CACULE_SCHED) - bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED); -+#endif - bool curr = cfs_rq->curr == se; - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * If we're the current task, we must renormalise before calling - * update_curr(). - */ - if (renorm && curr) - se->vruntime += cfs_rq->min_vruntime; -+#endif - - update_curr(cfs_rq); - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Otherwise, renormalise after, such that we're placed at the current - * moment in time, instead of some random moment in the past. Being -@@ -4298,6 +4611,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - */ - if (renorm && !curr) - se->vruntime += cfs_rq->min_vruntime; -+#endif - - /* - * When enqueuing a sched_entity, we must: -@@ -4312,8 +4626,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - update_cfs_group(se); - account_entity_enqueue(cfs_rq, se); - -+#if !defined(CONFIG_CACULE_SCHED) - if (flags & ENQUEUE_WAKEUP) - place_entity(cfs_rq, se, 0); -+#endif - - check_schedstat_required(); - update_stats_enqueue(cfs_rq, se, flags); -@@ -4334,6 +4650,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - check_enqueue_throttle(cfs_rq); - } - -+#if !defined(CONFIG_CACULE_SCHED) - static void __clear_buddies_last(struct sched_entity *se) - { - for_each_sched_entity(se) { -@@ -4378,6 +4695,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) - if (cfs_rq->skip == se) - __clear_buddies_skip(se); - } -+#endif /* !CONFIG_CACULE_SCHED */ - - static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); - -@@ -4402,13 +4720,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - - update_stats_dequeue(cfs_rq, se, flags); - -+#if !defined(CONFIG_CACULE_SCHED) - clear_buddies(cfs_rq, se); -+#endif - - if (se != cfs_rq->curr) - __dequeue_entity(cfs_rq, se); - se->on_rq = 0; - account_entity_dequeue(cfs_rq, se); - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Normalize after update_curr(); which will also have moved - * min_vruntime if @se is the one holding it back. But before doing -@@ -4417,12 +4738,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - */ - if (!(flags & DEQUEUE_SLEEP)) - se->vruntime -= cfs_rq->min_vruntime; -+#endif - - /* return excess runtime on last dequeue */ - return_cfs_rq_runtime(cfs_rq); - - update_cfs_group(se); - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Now advance min_vruntime if @se was the entity holding it back, - * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be -@@ -4431,8 +4754,23 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - */ - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) - update_min_vruntime(cfs_rq); -+#endif - } - -+#ifdef CONFIG_CACULE_SCHED -+static struct sched_entity * -+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr); -+ -+/* -+ * Preempt the current task with a newly woken task if needed: -+ */ -+static void -+check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) -+{ -+ if (pick_next_entity(cfs_rq, curr) != curr) -+ resched_curr(rq_of(cfs_rq)); -+} -+#else - /* - * Preempt the current task with a newly woken task if needed: - */ -@@ -4472,6 +4810,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) - if (delta > ideal_runtime) - resched_curr(rq_of(cfs_rq)); - } -+#endif /* CONFIG_CACULE_SCHED */ - - static void - set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) -@@ -4506,6 +4845,31 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) - se->prev_sum_exec_runtime = se->sum_exec_runtime; - } - -+#ifdef CONFIG_CACULE_SCHED -+static struct sched_entity * -+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) -+{ -+ struct cacule_node *se = cfs_rq->head; -+ struct cacule_node *next; -+ u64 now = sched_clock(); -+ -+ if (!se) -+ return curr; -+ -+ next = se->next; -+ while (next) { -+ if (entity_before(now, se, next) == 1) -+ se = next; -+ -+ next = next->next; -+ } -+ -+ if (curr && entity_before(now, se, &curr->cacule_node) == 1) -+ return curr; -+ -+ return se_of(se); -+} -+#else - static int - wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); - -@@ -4566,6 +4930,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) - - return se; - } -+#endif /* CONFIG_CACULE_SCHED */ - - static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); - -@@ -5666,9 +6031,15 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) - assert_list_leaf_cfs_rq(rq); - - hrtick_update(rq); -+ -+#ifdef CONFIG_CACULE_RDB -+ update_IS(rq); -+#endif - } - -+#if !defined(CONFIG_CACULE_SCHED) - static void set_next_buddy(struct sched_entity *se); -+#endif - - /* - * The dequeue_task method is called before nr_running is -@@ -5700,12 +6071,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) - if (cfs_rq->load.weight) { - /* Avoid re-evaluating load for this entity: */ - se = parent_entity(se); -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Bias pick_next to pick a task from this cfs_rq, as - * p is sleeping when it is within its sched_slice. - */ - if (task_sleep && se && !throttled_hierarchy(cfs_rq)) - set_next_buddy(se); -+#endif - break; - } - flags |= DEQUEUE_SLEEP; -@@ -5737,6 +6110,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) - dequeue_throttle: - util_est_update(&rq->cfs, p, task_sleep); - hrtick_update(rq); -+ -+#ifdef CONFIG_CACULE_RDB -+ update_IS(rq); -+#endif - } - - #ifdef CONFIG_SMP -@@ -5821,6 +6198,7 @@ static unsigned long capacity_of(int cpu) - return cpu_rq(cpu)->cpu_capacity; - } - -+#if !defined(CONFIG_CACULE_SCHED) - static void record_wakee(struct task_struct *p) - { - /* -@@ -5867,6 +6245,7 @@ static int wake_wide(struct task_struct *p) - return 0; - return 1; - } -+#endif /* CONFIG_CACULE_SCHED */ - - /* - * The purpose of wake_affine() is to quickly determine on which CPU we can run -@@ -6569,6 +6948,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) - return min_t(unsigned long, util, capacity_orig_of(cpu)); - } - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued) - * to @dst_cpu. -@@ -6823,6 +7203,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) - - return -1; - } -+#endif /* CONFIG_CACULE_SCHED */ - - /* - * select_task_rq_fair: Select target runqueue for the waking task in domains -@@ -6847,6 +7228,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) - /* SD_flags and WF_flags share the first nibble */ - int sd_flag = wake_flags & 0xF; - -+#if !defined(CONFIG_CACULE_SCHED) - if (wake_flags & WF_TTWU) { - record_wakee(p); - -@@ -6859,6 +7241,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) - - want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); - } -+#endif /* CONFIG_CACULE_SCHED */ - - rcu_read_lock(); - for_each_domain(cpu, tmp) { -@@ -6905,6 +7288,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se); - */ - static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) - { -+#if !defined(CONFIG_CACULE_SCHED) - /* - * As blocked tasks retain absolute vruntime the migration needs to - * deal with this by subtracting the old and adding the new -@@ -6930,6 +7314,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) - - se->vruntime -= min_vruntime; - } -+#endif /* CONFIG_CACULE_SCHED */ - - if (p->on_rq == TASK_ON_RQ_MIGRATING) { - /* -@@ -6975,6 +7360,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) - } - #endif /* CONFIG_SMP */ - -+#if !defined(CONFIG_CACULE_SCHED) - static unsigned long wakeup_gran(struct sched_entity *se) - { - unsigned long gran = sysctl_sched_wakeup_granularity; -@@ -7053,6 +7439,7 @@ static void set_skip_buddy(struct sched_entity *se) - for_each_sched_entity(se) - cfs_rq_of(se)->skip = se; - } -+#endif /* CONFIG_CACULE_SCHED */ - - /* - * Preempt the current task with a newly woken task if needed: -@@ -7061,9 +7448,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ - { - struct task_struct *curr = rq->curr; - struct sched_entity *se = &curr->se, *pse = &p->se; -+ -+#if !defined(CONFIG_CACULE_SCHED) - struct cfs_rq *cfs_rq = task_cfs_rq(curr); - int scale = cfs_rq->nr_running >= sched_nr_latency; - int next_buddy_marked = 0; -+#endif /* CONFIG_CACULE_SCHED */ - - if (unlikely(se == pse)) - return; -@@ -7077,10 +7467,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ - if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) - return; - -+#if !defined(CONFIG_CACULE_SCHED) - if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) { - set_next_buddy(pse); - next_buddy_marked = 1; - } -+#endif /* CONFIG_CACULE_SCHED */ - - /* - * We can come here with TIF_NEED_RESCHED already set from new task -@@ -7110,6 +7502,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ - find_matching_se(&se, &pse); - update_curr(cfs_rq_of(se)); - BUG_ON(!pse); -+ -+#ifdef CONFIG_CACULE_SCHED -+ if (entity_before(sched_clock(), &se->cacule_node, &pse->cacule_node) == 1) -+ goto preempt; -+#else - if (wakeup_preempt_entity(se, pse) == 1) { - /* - * Bias pick_next to pick the sched entity that is -@@ -7119,11 +7516,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ - set_next_buddy(pse); - goto preempt; - } -+#endif /* CONFIG_CACULE_SCHED */ - - return; - - preempt: - resched_curr(rq); -+ -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Only set the backward buddy when the current task is still - * on the rq. This can happen when a wakeup gets interleaved -@@ -7138,6 +7538,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ - - if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) - set_last_buddy(se); -+#endif /* CONFIG_CACULE_SCHED */ - } - - struct task_struct * -@@ -7199,6 +7600,11 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf - cfs_rq = group_cfs_rq(se); - } while (cfs_rq); - -+ /* -+ * Here we picked a sched_entity starting from -+ * the same group of curr, but the task could -+ * be a child of the selected sched_entity. -+ */ - p = task_of(se); - - /* -@@ -7209,6 +7615,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf - if (prev != p) { - struct sched_entity *pse = &prev->se; - -+ /* while se and pse are not in the same group */ - while (!(cfs_rq = is_same_group(se, pse))) { - int se_depth = se->depth; - int pse_depth = pse->depth; -@@ -7223,6 +7630,9 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf - } - } - -+ /* Here we reached the point were both -+ * sched_entities are in the same group. -+ */ - put_prev_entity(cfs_rq, pse); - set_next_entity(cfs_rq, se); - } -@@ -7233,6 +7643,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf - if (prev) - put_prev_task(rq, prev); - -+ /* Going down the hierarchy */ - do { - se = pick_next_entity(cfs_rq, NULL); - set_next_entity(cfs_rq, se); -@@ -7242,6 +7653,15 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf - p = task_of(se); - - done: __maybe_unused; -+#ifdef CONFIG_CACULE_SCHED -+ if (prev) -+ prev->se.cacule_node.vruntime &= YIELD_UNMARK; -+ -+#ifdef CONFIG_CACULE_RDB -+ update_IS(rq); -+#endif -+ -+#endif - #ifdef CONFIG_SMP - /* - * Move the next running task to the front of -@@ -7259,6 +7679,11 @@ done: __maybe_unused; - return p; - - idle: -+#ifdef CONFIG_CACULE_RDB -+ WRITE_ONCE(rq->max_IS_score, ~0); -+ WRITE_ONCE(rq->to_migrate_task, NULL); -+#endif -+ - if (!rf) - return NULL; - -@@ -7312,7 +7737,10 @@ static void yield_task_fair(struct rq *rq) - { - struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); -+ -+#if !defined(CONFIG_CACULE_SCHED) - struct sched_entity *se = &curr->se; -+#endif - - /* - * Are we the only task in the tree? -@@ -7320,7 +7748,9 @@ static void yield_task_fair(struct rq *rq) - if (unlikely(rq->nr_running == 1)) - return; - -+#if !defined(CONFIG_CACULE_SCHED) - clear_buddies(cfs_rq, se); -+#endif - - if (curr->policy != SCHED_BATCH) { - update_rq_clock(rq); -@@ -7336,7 +7766,9 @@ static void yield_task_fair(struct rq *rq) - rq_clock_skip_update(rq); - } - -+#if !defined(CONFIG_CACULE_SCHED) - set_skip_buddy(se); -+#endif - } - - static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) -@@ -7347,8 +7779,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) - if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se))) - return false; - -+#if !defined(CONFIG_CACULE_SCHED) - /* Tell the scheduler that we'd really like pse to run next. */ - set_next_buddy(se); -+#endif - - yield_task_fair(rq); - -@@ -7556,6 +7990,7 @@ struct lb_env { - struct list_head tasks; - }; - -+#if !defined(CONFIG_CACULE_RDB) - /* - * Is this task likely cache-hot: - */ -@@ -7575,6 +8010,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) - if (env->sd->flags & SD_SHARE_CPUCAPACITY) - return 0; - -+#if !defined(CONFIG_CACULE_SCHED) - /* - * Buddy candidates are cache hot: - */ -@@ -7582,6 +8018,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) - (&p->se == cfs_rq_of(&p->se)->next || - &p->se == cfs_rq_of(&p->se)->last)) - return 1; -+#endif - - if (sysctl_sched_migration_cost == -1) - return 1; -@@ -7975,6 +8412,7 @@ static void attach_tasks(struct lb_env *env) - - rq_unlock(env->dst_rq, &rf); - } -+#endif - - #ifdef CONFIG_NO_HZ_COMMON - static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) -@@ -8024,6 +8462,7 @@ static inline void update_blocked_load_tick(struct rq *rq) {} - static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {} - #endif - -+#if !defined(CONFIG_CACULE_RDB) - static bool __update_blocked_others(struct rq *rq, bool *done) - { - const struct sched_class *curr_class; -@@ -8049,9 +8488,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done) - - return decayed; - } -+#endif - - #ifdef CONFIG_FAIR_GROUP_SCHED - -+#if !defined(CONFIG_CACULE_RDB) - static bool __update_blocked_fair(struct rq *rq, bool *done) - { - struct cfs_rq *cfs_rq, *pos; -@@ -8091,6 +8532,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done) - - return decayed; - } -+#endif - - /* - * Compute the hierarchical load factor for cfs_rq and all its ascendants. -@@ -8157,6 +8599,7 @@ static unsigned long task_h_load(struct task_struct *p) - } - #endif - -+#if !defined(CONFIG_CACULE_RDB) - static void update_blocked_averages(int cpu) - { - bool decayed = false, done = true; -@@ -8175,6 +8618,7 @@ static void update_blocked_averages(int cpu) - cpufreq_update_util(rq, 0); - rq_unlock_irqrestore(rq, &rf); - } -+#endif - - /********** Helpers for find_busiest_group ************************/ - -@@ -9278,6 +9722,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s - * different in groups. - */ - -+#if !defined(CONFIG_CACULE_RDB) - /** - * find_busiest_group - Returns the busiest group within the sched_domain - * if there is an imbalance. -@@ -9546,6 +9991,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, - - return busiest; - } -+#endif - - /* - * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but -@@ -9582,6 +10028,7 @@ imbalanced_active_balance(struct lb_env *env) - return 0; - } - -+#if !defined(CONFIG_CACULE_RDB) - static int need_active_balance(struct lb_env *env) - { - struct sched_domain *sd = env->sd; -@@ -9914,6 +10361,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, - out: - return ld_moved; - } -+#endif - - static inline unsigned long - get_sd_balance_interval(struct sched_domain *sd, int cpu_busy) -@@ -9952,6 +10400,7 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance) - *next_balance = next; - } - -+#if !defined(CONFIG_CACULE_RDB) - /* - * active_load_balance_cpu_stop is run by the CPU stopper. It pushes - * running tasks off the busiest CPU onto idle CPUs. It requires at -@@ -10037,6 +10486,7 @@ static int active_load_balance_cpu_stop(void *data) - } - - static DEFINE_SPINLOCK(balancing); -+#endif - - /* - * Scale the max load_balance interval with the number of CPUs in the system. -@@ -10047,6 +10497,7 @@ void update_max_interval(void) - max_load_balance_interval = HZ*num_online_cpus()/10; - } - -+#if !defined(CONFIG_CACULE_RDB) - /* - * It checks each scheduling domain to see if it is due to be balanced, - * and initiates a balancing operation if so. -@@ -10139,6 +10590,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) - rq->next_balance = next_balance; - - } -+#endif - - static inline int on_null_domain(struct rq *rq) - { -@@ -10172,6 +10624,7 @@ static inline int find_new_ilb(void) - return nr_cpu_ids; - } - -+#if !defined(CONFIG_CACULE_RDB) - /* - * Kick a CPU to do the nohz balancing, if it is time for it. We pick any - * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one). -@@ -10322,6 +10775,7 @@ static void nohz_balancer_kick(struct rq *rq) - if (flags) - kick_ilb(flags); - } -+#endif /* CONFIG_CACULE_RDB */ - - static void set_cpu_sd_state_busy(int cpu) - { -@@ -10442,11 +10896,17 @@ static bool update_nohz_stats(struct rq *rq) - if (!time_after(jiffies, READ_ONCE(rq->last_blocked_load_update_tick))) - return true; - -+#if !defined(CONFIG_CACULE_RDB) - update_blocked_averages(cpu); -+#endif - - return rq->has_blocked_load; - } - -+#ifdef CONFIG_CACULE_RDB -+static int idle_try_pull_any(struct cfs_rq *cfs_rq); -+#endif -+ - /* - * Internal function that runs load balance for all idle cpus. The load balance - * can be a simple update of blocked load or a complete load balance with -@@ -10516,7 +10976,11 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags, - rq_unlock_irqrestore(rq, &rf); - - if (flags & NOHZ_BALANCE_KICK) -+#if !defined(CONFIG_CACULE_RDB) - rebalance_domains(rq, CPU_IDLE); -+#else -+ idle_try_pull_any(&rq->cfs); -+#endif - } - - if (time_after(next_balance, rq->next_balance)) { -@@ -10542,6 +11006,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags, - WRITE_ONCE(nohz.has_blocked, 1); - } - -+#if !defined(CONFIG_CACULE_RDB) - /* - * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the - * rebalancing for all the cpus for whom scheduler ticks are stopped. -@@ -10562,6 +11027,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) - - return true; - } -+#endif - - /* - * Check if we need to run the ILB for updating blocked load before entering -@@ -10592,9 +11058,11 @@ static void nohz_newidle_balance(struct rq *this_rq) - if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED)) - return; - -+#if !defined(CONFIG_CACULE_SCHED) - /* Will wake up very soon. No time for doing anything else*/ - if (this_rq->avg_idle < sysctl_sched_migration_cost) - return; -+#endif - - /* Don't need to update blocked load of idle CPUs*/ - if (!READ_ONCE(nohz.has_blocked) || -@@ -10609,6 +11077,7 @@ static void nohz_newidle_balance(struct rq *this_rq) - } - - #else /* !CONFIG_NO_HZ_COMMON */ -+#if !defined(CONFIG_CACULE_RDB) - static inline void nohz_balancer_kick(struct rq *rq) { } - - static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) -@@ -10617,8 +11086,134 @@ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle - } - - static inline void nohz_newidle_balance(struct rq *this_rq) { } -+#endif -+ - #endif /* CONFIG_NO_HZ_COMMON */ - -+#ifdef CONFIG_CACULE_RDB -+static int -+can_migrate_task(struct task_struct *p, int dst_cpu, struct rq *src_rq) -+{ -+ if (task_running(src_rq, p)) -+ return 0; -+ -+ /* Disregard pcpu kthreads; they are where they need to be. */ -+ if (kthread_is_per_cpu(p)) -+ return 0; -+ -+ if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) -+ return 0; -+ -+ if (p->se.exec_start == 0) -+ return 0; -+ -+ return 1; -+} -+ -+static void push_to_unlock(struct rq *this_rq, -+ struct rq *dst_rq, -+ struct task_struct *p, -+ int dst_cpu) -+{ -+ struct rq_flags rf; -+ -+ // detach task -+ deactivate_task(this_rq, p, DEQUEUE_NOCLOCK); -+ set_task_cpu(p, dst_cpu); -+ -+ // unlock this rq -+ raw_spin_unlock(&this_rq->lock); -+ -+ /* push to */ -+ rq_lock_irqsave(dst_rq, &rf); -+ update_rq_clock(dst_rq); -+ -+ activate_task(dst_rq, p, ENQUEUE_NOCLOCK); -+ check_preempt_curr(dst_rq, p, 0); -+ -+ // unlock src rq -+ rq_unlock(dst_rq, &rf); -+ local_irq_restore(rf.flags); -+} -+ -+static void pull_from_unlock(struct rq *this_rq, -+ struct rq *src_rq, -+ struct rq_flags *rf, -+ struct task_struct *p, -+ int dst_cpu) -+{ -+ // detach task -+ deactivate_task(src_rq, p, DEQUEUE_NOCLOCK); -+ set_task_cpu(p, dst_cpu); -+ -+ // unlock src rq -+ rq_unlock(src_rq, rf); -+ local_irq_restore(rf->flags); -+ -+ // lock this rq -+ raw_spin_lock(&this_rq->lock); -+ update_rq_clock(this_rq); -+ -+ activate_task(this_rq, p, ENQUEUE_NOCLOCK); -+ check_preempt_curr(this_rq, p, 0); -+ -+ // unlock this rq -+ raw_spin_unlock(&this_rq->lock); -+} -+ -+static inline struct rq * -+find_max_IS_rq(struct rq *this_rq, int dst_cpu) -+{ -+ struct rq *tmp_rq, *max_rq = NULL; -+ int cpu; -+ unsigned int max_IS = this_rq->max_IS_score; -+ unsigned int local_IS; -+ -+ // find max hrrn -+ for_each_online_cpu(cpu) { -+ if (cpu == dst_cpu) -+ continue; -+ -+ tmp_rq = cpu_rq(cpu); -+ -+ if (tmp_rq->nr_running < 2 || !(READ_ONCE(tmp_rq->to_migrate_task))) -+ continue; -+ -+ local_IS = READ_ONCE(tmp_rq->max_IS_score); -+ -+ if (local_IS < max_IS) { -+ max_IS = local_IS; -+ max_rq = tmp_rq; -+ } -+ } -+ -+ return max_rq; -+} -+ -+static int try_pull_from(struct rq *src_rq, struct rq *this_rq) -+{ -+ struct rq_flags rf; -+ int dst_cpu = cpu_of(this_rq); -+ struct task_struct *p; -+ -+ rq_lock_irqsave(src_rq, &rf); -+ update_rq_clock(src_rq); -+ -+ if (src_rq->to_migrate_task && src_rq->nr_running > 1) { -+ p = src_rq->to_migrate_task; -+ -+ if (can_migrate_task(p, dst_cpu, src_rq)) { -+ pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu); -+ return 1; -+ } -+ } -+ -+ rq_unlock(src_rq, &rf); -+ local_irq_restore(rf.flags); -+ -+ return 0; -+} -+ - /* - * newidle_balance is called by schedule() if this_cpu is about to become - * idle. Attempts to pull tasks from other CPUs. -@@ -10629,6 +11224,111 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } - * > 0 - success, new (fair) tasks present - */ - static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) -+{ -+ int this_cpu = this_rq->cpu; -+ struct task_struct *p = NULL; -+ struct rq *src_rq; -+ int src_cpu; -+ struct rq_flags src_rf; -+ int pulled_task = 0; -+ int cores_round = 1; -+ -+ update_misfit_status(NULL, this_rq); -+ /* -+ * We must set idle_stamp _before_ calling idle_balance(), such that we -+ * measure the duration of idle_balance() as idle time. -+ */ -+ this_rq->idle_stamp = rq_clock(this_rq); -+ -+ /* -+ * Do not pull tasks towards !active CPUs... -+ */ -+ if (!cpu_active(this_cpu)) -+ return 0; -+ -+ /* -+ * This is OK, because current is on_cpu, which avoids it being picked -+ * for load-balance and preemption/IRQs are still disabled avoiding -+ * further scheduler activity on it and we're being very careful to -+ * re-start the picking loop. -+ */ -+ rq_unpin_lock(this_rq, rf); -+ raw_spin_unlock(&this_rq->lock); -+ -+again: -+ for_each_online_cpu(src_cpu) { -+ -+ if (src_cpu == this_cpu) -+ continue; -+ -+ if (cores_round && !cpus_share_cache(src_cpu, this_cpu)) -+ continue; -+ -+ src_rq = cpu_rq(src_cpu); -+ -+ if (src_rq->nr_running < 2 -+ || !(READ_ONCE(src_rq->to_migrate_task))) -+ continue; -+ -+ rq_lock_irqsave(src_rq, &src_rf); -+ update_rq_clock(src_rq); -+ -+ if (src_rq->nr_running < 2 || !(src_rq->to_migrate_task)) -+ goto next; -+ -+ p = src_rq->to_migrate_task; -+ -+ if (can_migrate_task(p, this_cpu, src_rq)) { -+ pull_from_unlock(this_rq, src_rq, &src_rf, p, this_cpu); -+ -+ pulled_task = 1; -+ goto out; -+ } -+ -+next: -+ rq_unlock(src_rq, &src_rf); -+ local_irq_restore(src_rf.flags); -+ -+ /* -+ * Stop searching for tasks to pull if there are -+ * now runnable tasks on this rq. -+ */ -+ if (pulled_task || this_rq->nr_running > 0) -+ goto out; -+ } -+ -+ if (cores_round) { -+ // now search for all cpus -+ cores_round = 0; -+ goto again; -+ } -+ -+out: -+ raw_spin_lock(&this_rq->lock); -+ -+ /* -+ * While browsing the domains, we released the rq lock, a task could -+ * have been enqueued in the meantime. Since we're not going idle, -+ * pretend we pulled a task. -+ */ -+ if (this_rq->cfs.h_nr_running && !pulled_task) -+ pulled_task = 1; -+ -+ /* Is there a task of a high priority class? */ -+ if (this_rq->nr_running != this_rq->cfs.h_nr_running) -+ pulled_task = -1; -+ -+ if (pulled_task) -+ this_rq->idle_stamp = 0; -+ else -+ nohz_newidle_balance(this_rq); -+ -+ rq_repin_lock(this_rq, rf); -+ -+ return pulled_task; -+} -+#else -+static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) - { - unsigned long next_balance = jiffies + HZ; - int this_cpu = this_rq->cpu; -@@ -10657,7 +11357,10 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) - */ - rq_unpin_lock(this_rq, rf); - -- if (this_rq->avg_idle < sysctl_sched_migration_cost || -+ if ( -+#if !defined(CONFIG_CACULE_SCHED) -+ this_rq->avg_idle < sysctl_sched_migration_cost || -+#endif - !READ_ONCE(this_rq->rd->overload)) { - - rcu_read_lock(); -@@ -10782,6 +11485,217 @@ void trigger_load_balance(struct rq *rq) - - nohz_balancer_kick(rq); - } -+#endif -+ -+#ifdef CONFIG_CACULE_RDB -+static int -+idle_try_pull_any(struct cfs_rq *cfs_rq) -+{ -+ struct task_struct *p = NULL; -+ struct rq *this_rq = rq_of(cfs_rq), *src_rq; -+ int dst_cpu = cpu_of(this_rq); -+ int src_cpu; -+ struct rq_flags rf; -+ int pulled = 0; -+ int cores_round = 1; -+ -+again: -+ for_each_online_cpu(src_cpu) { -+ -+ if (src_cpu == dst_cpu) -+ continue; -+ -+ if (cores_round && !cpus_share_cache(src_cpu, dst_cpu)) -+ continue; -+ -+ src_rq = cpu_rq(src_cpu); -+ -+ if (src_rq->nr_running < 2 -+ || !(READ_ONCE(src_rq->to_migrate_task))) -+ continue; -+ -+ rq_lock_irqsave(src_rq, &rf); -+ update_rq_clock(src_rq); -+ -+ if (src_rq->nr_running < 2 || !(src_rq->to_migrate_task)) -+ goto next; -+ -+ p = src_rq->to_migrate_task; -+ -+ if (can_migrate_task(p, dst_cpu, src_rq)) { -+ pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu); -+ pulled = 1; -+ goto out; -+ } -+ -+next: -+ rq_unlock(src_rq, &rf); -+ local_irq_restore(rf.flags); -+ } -+ -+ if (cores_round) { -+ // now search for all cpus -+ cores_round = 0; -+ goto again; -+ } -+ -+out: -+ return pulled; -+} -+ -+ -+static int -+try_pull_higher_IS(struct rq *this_rq) -+{ -+ struct rq *max_rq; -+ int dst_cpu = cpu_of(this_rq); -+ -+ max_rq = find_max_IS_rq(this_rq, dst_cpu); -+ -+ if (!max_rq) -+ return 0; -+ -+ if (try_pull_from(max_rq, this_rq)) -+ return 1; -+ -+ return 0; -+} -+ -+static void try_push_any(struct rq *this_rq) -+{ -+ struct task_struct *p = NULL; -+ struct rq *dst_rq; -+ int dst_cpu; -+ int src_cpu = cpu_of(this_rq); -+ int cores_round = 1; -+ -+again: -+ for_each_online_cpu(dst_cpu) { -+ -+ if (dst_cpu == src_cpu) -+ continue; -+ -+ if (cores_round && !cpus_share_cache(src_cpu, dst_cpu)) -+ continue; -+ -+ dst_rq = cpu_rq(dst_cpu); -+ -+ if (dst_rq->nr_running >= this_rq->nr_running - 1) -+ continue; -+ -+ // lock this rq -+ raw_spin_lock(&this_rq->lock); -+ update_rq_clock(this_rq); -+ -+ if (!this_rq->to_migrate_task) { -+ // unlock this rq -+ raw_spin_unlock(&this_rq->lock); -+ return; -+ } -+ -+ p = this_rq->to_migrate_task; -+ -+ if (can_migrate_task(p, dst_cpu, this_rq)) { -+ push_to_unlock(this_rq, dst_rq, p, dst_cpu); -+ return; -+ } -+ -+ // unlock this rq -+ raw_spin_unlock(&this_rq->lock); -+ } -+ -+ if (cores_round) { -+ // now search for all cpus -+ cores_round = 0; -+ goto again; -+ } -+} -+ -+static void try_pull_any(struct rq *this_rq) -+{ -+ struct task_struct *p = NULL; -+ struct rq *src_rq; -+ int dst_cpu = cpu_of(this_rq); -+ int src_cpu; -+ struct rq_flags src_rf; -+ int cores_round = 1; -+ unsigned int this_max_IS = this_rq->max_IS_score; -+ -+again: -+ for_each_online_cpu(src_cpu) { -+ -+ if (src_cpu == dst_cpu) -+ continue; -+ -+ if (cores_round && !cpus_share_cache(src_cpu, dst_cpu)) -+ continue; -+ -+ src_rq = cpu_rq(src_cpu); -+ -+ p = READ_ONCE(src_rq->to_migrate_task); -+ if (src_rq->nr_running < 2 || !p -+ || READ_ONCE(src_rq->max_IS_score) >= this_max_IS) -+ continue; -+ -+ rq_lock_irqsave(src_rq, &src_rf); -+ update_rq_clock(src_rq); -+ -+ if (src_rq->nr_running < 2 || !(src_rq->to_migrate_task) -+ || src_rq->max_IS_score >= this_max_IS) -+ goto next; -+ -+ p = src_rq->to_migrate_task; -+ -+ if (can_migrate_task(p, dst_cpu, src_rq)) { -+ pull_from_unlock(this_rq, src_rq, &src_rf, p, dst_cpu); -+ return; -+ } -+ -+next: -+ rq_unlock(src_rq, &src_rf); -+ local_irq_restore(src_rf.flags); -+ } -+ -+ if (cores_round) { -+ // now search for all cpus -+ cores_round = 0; -+ goto again; -+ } -+} -+ -+static inline void -+active_balance(struct rq *rq) -+{ -+ if (rq->nr_running < 2) -+ try_pull_higher_IS(rq); -+ else { -+ try_push_any(rq); -+ try_pull_any(rq); -+ } -+} -+ -+void trigger_load_balance(struct rq *rq) -+{ -+ unsigned long interval; -+ -+#ifdef CONFIG_RDB_INTERVAL -+ if (time_before(jiffies, rq->next_balance)) -+ return; -+#endif -+ -+ if (rq->idle_balance) -+ idle_try_pull_any(&rq->cfs); -+ else { -+ active_balance(rq); -+ -+#ifdef CONFIG_RDB_INTERVAL -+ /* scale ms to jiffies */ -+ interval = msecs_to_jiffies(CONFIG_RDB_INTERVAL); -+ rq->next_balance = jiffies + interval; -+#endif -+ } -+} -+#endif - - static void rq_online_fair(struct rq *rq) - { -@@ -10818,6 +11732,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) - entity_tick(cfs_rq, se, queued); - } - -+#ifdef CONFIG_CACULE_RDB -+ update_IS(rq); -+#endif -+ - if (static_branch_unlikely(&sched_numa_balancing)) - task_tick_numa(rq, curr); - -@@ -10825,11 +11743,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) - update_overutilized_status(task_rq(curr)); - } - -+#ifdef CONFIG_CACULE_SCHED - /* - * called on fork with the child task as argument from the parent's context - * - child not yet on the tasklist - * - preemption disabled - */ -+ static void task_fork_fair(struct task_struct *p) -+{ -+ struct cfs_rq *cfs_rq; -+ struct rq *rq = this_rq(); -+ struct rq_flags rf; -+ -+ rq_lock(rq, &rf); -+ update_rq_clock(rq); -+ -+ cfs_rq = task_cfs_rq(current); -+ if (cfs_rq->curr) -+ update_curr(cfs_rq); -+ -+ rq_unlock(rq, &rf); -+} -+#else - static void task_fork_fair(struct task_struct *p) - { - struct cfs_rq *cfs_rq; -@@ -10860,6 +11795,7 @@ static void task_fork_fair(struct task_struct *p) - se->vruntime -= cfs_rq->min_vruntime; - rq_unlock(rq, &rf); - } -+#endif /* CONFIG_CACULE_SCHED */ - - /* - * Priority of the task has changed. Check to see if we preempt -@@ -10978,6 +11914,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se) - static void detach_task_cfs_rq(struct task_struct *p) - { - struct sched_entity *se = &p->se; -+ -+#if !defined(CONFIG_CACULE_SCHED) - struct cfs_rq *cfs_rq = cfs_rq_of(se); - - if (!vruntime_normalized(p)) { -@@ -10988,6 +11926,7 @@ static void detach_task_cfs_rq(struct task_struct *p) - place_entity(cfs_rq, se, 0); - se->vruntime -= cfs_rq->min_vruntime; - } -+#endif - - detach_entity_cfs_rq(se); - } -@@ -10995,12 +11934,17 @@ static void detach_task_cfs_rq(struct task_struct *p) - static void attach_task_cfs_rq(struct task_struct *p) - { - struct sched_entity *se = &p->se; -+ -+#if !defined(CONFIG_CACULE_SCHED) - struct cfs_rq *cfs_rq = cfs_rq_of(se); -+#endif - - attach_entity_cfs_rq(se); - -+#if !defined(CONFIG_CACULE_SCHED) - if (!vruntime_normalized(p)) - se->vruntime += cfs_rq->min_vruntime; -+#endif - } - - static void switched_from_fair(struct rq *rq, struct task_struct *p) -@@ -11056,13 +12000,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) - void init_cfs_rq(struct cfs_rq *cfs_rq) - { - cfs_rq->tasks_timeline = RB_ROOT_CACHED; -+ -+#if !defined(CONFIG_CACULE_SCHED) - cfs_rq->min_vruntime = (u64)(-(1LL << 20)); - #ifndef CONFIG_64BIT - cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; - #endif -+#endif /* CONFIG_CACULE_SCHED */ -+ - #ifdef CONFIG_SMP - raw_spin_lock_init(&cfs_rq->removed.lock); - #endif -+ -+#ifdef CONFIG_CACULE_SCHED -+ cfs_rq->head = NULL; -+ cfs_rq->tail = NULL; -+#endif - } - - #ifdef CONFIG_FAIR_GROUP_SCHED -@@ -11387,7 +12340,9 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) - __init void init_sched_fair_class(void) - { - #ifdef CONFIG_SMP -+#if !defined(CONFIG_CACULE_RDB) - open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -+#endif - - #ifdef CONFIG_NO_HZ_COMMON - nohz.next_balance = jiffies; -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 35f7efed75c4..6ab803743b40 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -159,6 +159,11 @@ extern void call_trace_sched_update_nr_running(struct rq *rq, int count); - */ - #define RUNTIME_INF ((u64)~0ULL) - -+#ifdef CONFIG_CACULE_SCHED -+#define YIELD_MARK 0x8000000000000000ULL -+#define YIELD_UNMARK 0x7FFFFFFFFFFFFFFFULL -+#endif -+ - static inline int idle_policy(int policy) - { - return policy == SCHED_IDLE; -@@ -525,10 +530,13 @@ struct cfs_rq { - unsigned int idle_h_nr_running; /* SCHED_IDLE */ - - u64 exec_clock; -+ -+#if !defined(CONFIG_CACULE_SCHED) - u64 min_vruntime; - #ifndef CONFIG_64BIT - u64 min_vruntime_copy; - #endif -+#endif /* CONFIG_CACULE_SCHED */ - - struct rb_root_cached tasks_timeline; - -@@ -537,9 +545,14 @@ struct cfs_rq { - * It is set to NULL otherwise (i.e when none are currently running). - */ - struct sched_entity *curr; -+#ifdef CONFIG_CACULE_SCHED -+ struct cacule_node *head; -+ struct cacule_node *tail; -+#else - struct sched_entity *next; - struct sched_entity *last; - struct sched_entity *skip; -+#endif // CONFIG_CACULE_SCHED - - #ifdef CONFIG_SCHED_DEBUG - unsigned int nr_spread_over; -@@ -943,6 +956,11 @@ struct rq { - struct rt_rq rt; - struct dl_rq dl; - -+#ifdef CONFIG_CACULE_RDB -+ unsigned int max_IS_score; -+ struct task_struct *to_migrate_task; -+#endif -+ - #ifdef CONFIG_FAIR_GROUP_SCHED - /* list of leaf cfs_rq on this CPU: */ - struct list_head leaf_cfs_rq_list; -diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index d4a78e08f6d8..d85615ec6cb9 100644 ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -1736,6 +1736,59 @@ static struct ctl_table kern_table[] = { - .mode = 0644, - .proc_handler = proc_dointvec, - }, -+#ifdef CONFIG_CACULE_SCHED -+ { -+ .procname = "sched_interactivity_factor", -+ .data = &interactivity_factor, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ { -+ .procname = "sched_max_lifetime_ms", -+ .data = &cacule_max_lifetime, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ { -+ .procname = "sched_cache_factor", -+ .data = &cache_factor, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ { -+ .procname = "sched_cache_divisor", -+ .data = &cache_divisor, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ { -+ .procname = "sched_starve_factor", -+ .data = &starve_factor, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ { -+ .procname = "sched_starve_divisor", -+ .data = &starve_divisor, -+ .maxlen = sizeof(unsigned int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ { -+ .procname = "sched_cacule_yield", -+ .data = &cacule_yield, -+ .maxlen = sizeof (int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = &one_ul, -+ }, -+#endif - #ifdef CONFIG_SCHEDSTATS - { - .procname = "sched_schedstats", diff --git a/cacule-5.14.patch b/cacule-5.14.patch new file mode 100644 index 000000000000..050fa065ddc7 --- /dev/null +++ b/cacule-5.14.patch @@ -0,0 +1,1232 @@ +diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst +index 426162009ce9..f15a268936bf 100644 +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -1084,6 +1084,10 @@ reboot-cmd (SPARC only) + ROM/Flash boot loader. Maybe to tell it what to do after + rebooting. ??? + ++sched_interactivity_factor (CacULE scheduler only) ++================================================== ++Sets the value *m* for interactivity score calculations. See ++Figure 1 in https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf + + sched_energy_aware + ================== +diff --git a/Documentation/scheduler/sched-CacULE.rst b/Documentation/scheduler/sched-CacULE.rst +new file mode 100644 +index 000000000000..82b0847c468a +--- /dev/null ++++ b/Documentation/scheduler/sched-CacULE.rst +@@ -0,0 +1,76 @@ ++====================================== ++The CacULE Scheduler by Hamad Al Marri. ++====================================== ++ ++1. Overview ++============= ++ ++The CacULE CPU scheduler is based on interactivity score mechanism. ++The interactivity score is inspired by the ULE scheduler (FreeBSD ++scheduler). ++ ++1.1 About CacULE Scheduler ++-------------------------- ++ ++ - Each CPU has its own runqueue. ++ ++ - NORMAL runqueue is a linked list of sched_entities (instead of RB-Tree). ++ ++ - RT and other runqueues are just the same as the CFS's. ++ ++ - Wake up tasks preempt currently running tasks if its interactivity score value ++ is higher. ++ ++ ++1.2. Complexity ++---------------- ++ ++The complexity of Enqueue and Dequeue a task is O(1). ++ ++The complexity of pick the next task is in O(n), where n is the number of tasks ++in a runqueue (each CPU has its own runqueue). ++ ++Note: O(n) sounds scary, but usually for a machine with 4 CPUS where it is used ++for desktop or mobile jobs, the maximum number of runnable tasks might not ++exceeds 10 (at the pick next run time) - the idle tasks are excluded since they ++are dequeued when sleeping and enqueued when they wake up. ++ ++ ++2. The CacULE Interactivity Score ++======================================================= ++ ++The interactivity score is inspired by the ULE scheduler (FreeBSD scheduler). ++For more information see: https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf ++CacULE doesn't replace CFS with ULE, it only changes the CFS' pick next task ++mechanism to ULE's interactivity score mechanism for picking next task to run. ++ ++ ++2.3 sched_interactivity_factor ++================= ++Sets the value *m* for interactivity score calculations. See Figure 1 in ++https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf ++The default value of in CacULE is 10 which means that the Maximum Interactive ++Score is 20 (since m = Maximum Interactive Score / 2). ++You can tune sched_interactivity_factor with sysctl command: ++ ++ sysctl kernel.sched_interactivity_factor=50 ++ ++This command changes the sched_interactivity_factor from 10 to 50. ++ ++ ++3. Scheduling policies ++======================= ++ ++CacULE some CFS, implements three scheduling policies: ++ ++ - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling ++ policy that is used for regular tasks. ++ ++ - SCHED_BATCH: Does not preempt nearly as often as regular tasks ++ would, thereby allowing tasks to run longer and make better use of ++ caches but at the cost of interactivity. This is well suited for ++ batch jobs. ++ ++ - SCHED_IDLE: This is even weaker than nice 19, but its not a true ++ idle timer scheduler in order to avoid to get into priority ++ inversion problems which would deadlock the machine. +diff --git a/include/linux/sched.h b/include/linux/sched.h +index ec8d07d88641..9998b59cda03 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -462,10 +462,22 @@ struct sched_statistics { + #endif + }; + ++#ifdef CONFIG_CACULE_SCHED ++struct cacule_node { ++ struct cacule_node* next; ++ struct cacule_node* prev; ++ u64 cacule_start_time; ++ u64 vruntime; ++}; ++#endif ++ + struct sched_entity { + /* For load-balancing: */ + struct load_weight load; + struct rb_node run_node; ++#ifdef CONFIG_CACULE_SCHED ++ struct cacule_node cacule_node; ++#endif + struct list_head group_node; + unsigned int on_rq; + +diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h +index db2c0f34aaaf..c8ba145ea8aa 100644 +--- a/include/linux/sched/sysctl.h ++++ b/include/linux/sched/sysctl.h +@@ -32,6 +32,12 @@ extern unsigned int sysctl_sched_latency; + extern unsigned int sysctl_sched_min_granularity; + extern unsigned int sysctl_sched_wakeup_granularity; + ++#ifdef CONFIG_CACULE_SCHED ++extern unsigned int interactivity_factor; ++extern unsigned int cacule_max_lifetime; ++extern int cacule_yield; ++#endif ++ + enum sched_tunable_scaling { + SCHED_TUNABLESCALING_NONE, + SCHED_TUNABLESCALING_LOG, +diff --git a/init/Kconfig b/init/Kconfig +index 55f9f7738ebb..e8fb30865407 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -837,6 +837,16 @@ config UCLAMP_BUCKETS_COUNT + + endmenu + ++config CACULE_SCHED ++ bool "CacULE CPU scheduler" ++ default y ++ help ++ The CacULE CPU scheduler is based on interactivity score mechanism. ++ The interactivity score is inspired by the ULE scheduler (FreeBSD ++ scheduler). ++ ++ If unsure, say Y here. ++ + # + # For architectures that want to enable the support for NUMA-affine scheduler + # balancing logic: +@@ -1234,6 +1244,7 @@ config SCHED_AUTOGROUP + select CGROUPS + select CGROUP_SCHED + select FAIR_GROUP_SCHED ++ default y + help + This option optimizes the scheduler for common desktop workloads by + automatically creating and populating task groups. This separation +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f3b27c6c5153..c5a31f210bcf 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3943,6 +3943,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) + p->se.prev_sum_exec_runtime = 0; + p->se.nr_migrations = 0; + p->se.vruntime = 0; ++ ++#ifdef CONFIG_CACULE_SCHED ++ p->se.cacule_node.vruntime = 0; ++#endif ++ + INIT_LIST_HEAD(&p->se.group_node); + + #ifdef CONFIG_FAIR_GROUP_SCHED +@@ -4215,6 +4220,10 @@ void wake_up_new_task(struct task_struct *p) + update_rq_clock(rq); + post_init_entity_util_avg(p); + ++#ifdef CONFIG_CACULE_SCHED ++ p->se.cacule_node.cacule_start_time = sched_clock(); ++#endif ++ + activate_task(rq, p, ENQUEUE_NOCLOCK); + trace_sched_wakeup_new(p); + check_preempt_curr(rq, p, WF_FORK); +@@ -5026,7 +5035,9 @@ static void sched_tick_remote(struct work_struct *work) + struct rq *rq = cpu_rq(cpu); + struct task_struct *curr; + struct rq_flags rf; ++#if !defined(CONFIG_CACULE_SCHED) + u64 delta; ++#endif + int os; + + /* +@@ -5046,6 +5057,7 @@ static void sched_tick_remote(struct work_struct *work) + + update_rq_clock(rq); + ++#if !defined(CONFIG_CACULE_SCHED) + if (!is_idle_task(curr)) { + /* + * Make sure the next tick runs within a reasonable +@@ -5054,6 +5066,8 @@ static void sched_tick_remote(struct work_struct *work) + delta = rq_clock_task(rq) - curr->se.exec_start; + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + } ++#endif ++ + curr->sched_class->task_tick(rq, curr, 0); + + calc_load_nohz_remote(rq); +@@ -8980,6 +8994,10 @@ void __init sched_init(void) + BUG_ON(&dl_sched_class + 1 != &stop_sched_class); + #endif + ++#ifdef CONFIG_CACULE_SCHED ++ printk(KERN_INFO "CacULE CPU scheduler v5.14 by Hamad Al Marri."); ++#endif ++ + wait_bit_init(); + + #ifdef CONFIG_FAIR_GROUP_SCHED +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index 0c5ec2776ddf..ee8d4fb3d80f 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -560,8 +560,11 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) + + void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) + { +- s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, +- spread, rq0_min_vruntime, spread0; ++ s64 MIN_vruntime = -1, max_vruntime = -1, ++#if !defined(CONFIG_CACULE_SCHED) ++ min_vruntime, rq0_min_vruntime, spread0, ++#endif ++ spread; + struct rq *rq = cpu_rq(cpu); + struct sched_entity *last; + unsigned long flags; +@@ -582,21 +585,27 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) + last = __pick_last_entity(cfs_rq); + if (last) + max_vruntime = last->vruntime; ++#if !defined(CONFIG_CACULE_SCHED) + min_vruntime = cfs_rq->min_vruntime; + rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; ++#endif + raw_spin_rq_unlock_irqrestore(rq, flags); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", + SPLIT_NS(MIN_vruntime)); ++#if !defined(CONFIG_CACULE_SCHED) + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", + SPLIT_NS(min_vruntime)); ++#endif + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime", + SPLIT_NS(max_vruntime)); + spread = max_vruntime - MIN_vruntime; + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", + SPLIT_NS(spread)); ++#if !defined(CONFIG_CACULE_SCHED) + spread0 = min_vruntime - rq0_min_vruntime; + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", + SPLIT_NS(spread0)); ++#endif + SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", + cfs_rq->nr_spread_over); + SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 44c452072a1b..dfb7437e7718 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -19,9 +19,21 @@ + * + * Adaptive scheduling granularity, math enhancements by Peter Zijlstra + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra ++ * ++ * CacULE enhancements CPU cache and scheduler based on ++ * Interactivity Score. ++ * (C) 2020 Hamad Al Marri <hamad.s.almarri@gmail.com> + */ + #include "sched.h" + ++#ifdef CONFIG_CACULE_SCHED ++unsigned int __read_mostly cacule_max_lifetime = 22000; // in ms ++unsigned int __read_mostly interactivity_factor = 32768; ++int __read_mostly cacule_yield = 1; ++#define YIELD_MARK 0x8000000000000000ULL ++#define YIELD_UNMARK 0x7FFFFFFFFFFFFFFFULL ++#endif ++ + /* + * Targeted preemption latency for CPU-bound tasks: + * +@@ -82,7 +94,11 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; + unsigned int sysctl_sched_wakeup_granularity = 1000000UL; + static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; + ++#ifdef CONFIG_CACULE_SCHED ++const_debug unsigned int sysctl_sched_migration_cost = 200000UL; ++#else + const_debug unsigned int sysctl_sched_migration_cost = 500000UL; ++#endif + + int sched_thermal_decay_shift; + static int __init setup_sched_thermal_decay_shift(char *str) +@@ -263,6 +279,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight + + const struct sched_class fair_sched_class; + ++ ++#ifdef CONFIG_CACULE_SCHED ++static inline struct sched_entity *se_of(struct cacule_node *cn) ++{ ++ return container_of(cn, struct sched_entity, cacule_node); ++} ++#endif ++ + /************************************************************** + * CFS operations on generic schedulable entities: + */ +@@ -476,7 +500,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec); + /************************************************************** + * Scheduling class tree data structure manipulation methods: + */ +- ++#if !defined(CONFIG_CACULE_SCHED) + static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime) + { + s64 delta = (s64)(vruntime - max_vruntime); +@@ -539,7 +563,111 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) + { + return entity_before(__node_2_se(a), __node_2_se(b)); + } ++#endif /* CONFIG_CACULE_SCHED */ ++ ++#ifdef CONFIG_CACULE_SCHED ++static unsigned int ++calc_interactivity(u64 now, struct cacule_node *se) ++{ ++ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m; ++ unsigned int score_se; ++ ++ /* ++ * in case of vruntime==0, logical OR with 1 would ++ * make sure that the least sig. bit is 1 ++ */ ++ l_se = now - se->cacule_start_time; ++ vr_se = se->vruntime | 1; ++ u64_factor_m = interactivity_factor; ++ _2m = u64_factor_m << 1; ++ ++ /* safety check */ ++ if (likely(l_se > vr_se)) ++ sleep_se = (l_se - vr_se) | 1; ++ ++ if (sleep_se >= vr_se) ++ score_se = u64_factor_m / (sleep_se / vr_se); ++ else ++ score_se = _2m - (u64_factor_m / (vr_se / sleep_se)); ++ ++ return score_se; ++} ++ ++/* ++ * Does se have lower interactivity score value (i.e. interactive) than curr? If yes, return 1, ++ * otherwise return -1 ++ * se is before curr if se has lower interactivity score value ++ * the lower score, the more interactive ++ */ ++static inline int ++entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se) ++{ ++ unsigned int score_curr, score_se; ++ int diff; ++ ++ score_curr = calc_interactivity(now, curr); ++ score_se = calc_interactivity(now, se); ++ diff = score_se - score_curr; ++ ++ if (diff < 0) ++ return 1; ++ ++ return -1; ++} ++ ++/* ++ * Enqueue an entity ++ */ ++static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se) ++{ ++ struct cacule_node *se = &(_se->cacule_node); ++ ++ se->next = NULL; ++ se->prev = NULL; ++ ++ if (cfs_rq->head) { ++ // insert se at head ++ se->next = cfs_rq->head; ++ cfs_rq->head->prev = se; ++ ++ // lastly reset the head ++ cfs_rq->head = se; ++ } else { ++ // if empty rq ++ cfs_rq->head = se; ++ } ++} ++ ++static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se) ++{ ++ struct cacule_node *se = &(_se->cacule_node); ++ ++ // if only one se in rq ++ if (cfs_rq->head->next == NULL) { ++ cfs_rq->head = NULL; ++ } else if (se == cfs_rq->head) { ++ // if it is the head ++ cfs_rq->head = cfs_rq->head->next; ++ cfs_rq->head->prev = NULL; ++ } else { ++ // if in the middle ++ struct cacule_node *prev = se->prev; ++ struct cacule_node *next = se->next; ++ ++ prev->next = next; ++ if (next) ++ next->prev = prev; ++ } ++} ++ ++struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) ++{ ++ if (!cfs_rq->head) ++ return NULL; + ++ return se_of(cfs_rq->head); ++} ++#else + /* + * Enqueue an entity into the rb-tree: + */ +@@ -572,16 +700,24 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se) + + return __node_2_se(next); + } ++#endif /* CONFIG_CACULE_SCHED */ + + #ifdef CONFIG_SCHED_DEBUG + struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) + { ++#ifdef CONFIG_CACULE_SCHED ++ if (!cfs_rq->head) ++ return NULL; ++ ++ return se_of(cfs_rq->head); ++#else + struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); + + if (!last) + return NULL; + + return __node_2_se(last); ++#endif /* CONFIG_CACULE_SCHED */ + } + + /************************************************************** +@@ -671,6 +807,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) + return slice; + } + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * We calculate the vruntime slice of a to-be-inserted task. + * +@@ -680,6 +817,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) + { + return calc_delta_fair(sched_slice(cfs_rq, se), se); + } ++#endif /* CONFIG_CACULE_SCHED */ + + #include "pelt.h" + #ifdef CONFIG_SMP +@@ -787,14 +925,55 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) + } + #endif /* CONFIG_SMP */ + ++#ifdef CONFIG_CACULE_SCHED ++static void normalize_lifetime(u64 now, struct sched_entity *se) ++{ ++ struct cacule_node *cn = &se->cacule_node; ++ u64 max_life_ns, life_time, old_hrrn_x; ++ s64 diff; ++ ++ /* ++ * left shift 20 bits is approximately = * 1000000 ++ * we don't need the precision of life time ++ * Ex. for 30s, with left shift (20bits) == 31.457s ++ */ ++ max_life_ns = ((u64) cacule_max_lifetime) << 20; ++ life_time = now - cn->cacule_start_time; ++ diff = life_time - max_life_ns; ++ ++ if (diff > 0) { ++ // unmark YIELD. No need to check or remark since ++ // this normalize action doesn't happen very often ++ cn->vruntime &= YIELD_UNMARK; ++ ++ // multiply life_time by 1024 for more precision ++ old_hrrn_x = (life_time << 7) / ((cn->vruntime >> 3) | 1); ++ ++ // reset life to half max_life (i.e ~15s) ++ cn->cacule_start_time = now - (max_life_ns >> 1); ++ ++ // avoid division by zero ++ if (old_hrrn_x == 0) old_hrrn_x = 1; ++ ++ // reset vruntime based on old hrrn ratio ++ cn->vruntime = (max_life_ns << 9) / old_hrrn_x; ++ } ++} ++#endif /* CONFIG_CACULE_SCHED */ ++ + /* + * Update the current task's runtime statistics. + */ + static void update_curr(struct cfs_rq *cfs_rq) + { + struct sched_entity *curr = cfs_rq->curr; ++#ifdef CONFIG_CACULE_SCHED ++ u64 now = sched_clock(); ++ u64 delta_exec, delta_fair; ++#else + u64 now = rq_clock_task(rq_of(cfs_rq)); + u64 delta_exec; ++#endif + + if (unlikely(!curr)) + return; +@@ -811,8 +990,15 @@ static void update_curr(struct cfs_rq *cfs_rq) + curr->sum_exec_runtime += delta_exec; + schedstat_add(cfs_rq->exec_clock, delta_exec); + ++#ifdef CONFIG_CACULE_SCHED ++ delta_fair = calc_delta_fair(delta_exec, curr); ++ curr->vruntime += delta_fair; ++ curr->cacule_node.vruntime += delta_fair; ++ normalize_lifetime(now, curr); ++#else + curr->vruntime += calc_delta_fair(delta_exec, curr); + update_min_vruntime(cfs_rq); ++#endif + + if (entity_is_task(curr)) { + struct task_struct *curtask = task_of(curr); +@@ -1015,7 +1201,11 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) + /* + * We are starting a new run period: + */ ++#ifdef CONFIG_CACULE_SCHED ++ se->exec_start = sched_clock(); ++#else + se->exec_start = rq_clock_task(rq_of(cfs_rq)); ++#endif + } + + /************************************************** +@@ -4148,7 +4338,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} + + static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) + { +-#ifdef CONFIG_SCHED_DEBUG ++#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_CACULE_SCHED) + s64 d = se->vruntime - cfs_rq->min_vruntime; + + if (d < 0) +@@ -4159,6 +4349,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) + #endif + } + ++#if !defined(CONFIG_CACULE_SCHED) + static void + place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) + { +@@ -4190,6 +4381,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) + /* ensure we never gain time by being placed backwards. */ + se->vruntime = max_vruntime(se->vruntime, vruntime); + } ++#endif /* CONFIG_CACULE_SCHED */ + + static void check_enqueue_throttle(struct cfs_rq *cfs_rq); + +@@ -4248,18 +4440,23 @@ static inline bool cfs_bandwidth_used(void); + static void + enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + { ++#if !defined(CONFIG_CACULE_SCHED) + bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED); ++#endif + bool curr = cfs_rq->curr == se; + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * If we're the current task, we must renormalise before calling + * update_curr(). + */ + if (renorm && curr) + se->vruntime += cfs_rq->min_vruntime; ++#endif + + update_curr(cfs_rq); + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Otherwise, renormalise after, such that we're placed at the current + * moment in time, instead of some random moment in the past. Being +@@ -4268,6 +4465,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + */ + if (renorm && !curr) + se->vruntime += cfs_rq->min_vruntime; ++#endif + + /* + * When enqueuing a sched_entity, we must: +@@ -4282,8 +4480,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + update_cfs_group(se); + account_entity_enqueue(cfs_rq, se); + ++#if !defined(CONFIG_CACULE_SCHED) + if (flags & ENQUEUE_WAKEUP) + place_entity(cfs_rq, se, 0); ++#endif + + check_schedstat_required(); + update_stats_enqueue(cfs_rq, se, flags); +@@ -4304,6 +4504,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + check_enqueue_throttle(cfs_rq); + } + ++#if !defined(CONFIG_CACULE_SCHED) + static void __clear_buddies_last(struct sched_entity *se) + { + for_each_sched_entity(se) { +@@ -4348,6 +4549,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) + if (cfs_rq->skip == se) + __clear_buddies_skip(se); + } ++#endif /* !CONFIG_CACULE_SCHED */ + + static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); + +@@ -4372,13 +4574,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + + update_stats_dequeue(cfs_rq, se, flags); + ++#if !defined(CONFIG_CACULE_SCHED) + clear_buddies(cfs_rq, se); ++#endif + + if (se != cfs_rq->curr) + __dequeue_entity(cfs_rq, se); + se->on_rq = 0; + account_entity_dequeue(cfs_rq, se); + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Normalize after update_curr(); which will also have moved + * min_vruntime if @se is the one holding it back. But before doing +@@ -4387,12 +4592,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + */ + if (!(flags & DEQUEUE_SLEEP)) + se->vruntime -= cfs_rq->min_vruntime; ++#endif + + /* return excess runtime on last dequeue */ + return_cfs_rq_runtime(cfs_rq); + + update_cfs_group(se); + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Now advance min_vruntime if @se was the entity holding it back, + * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be +@@ -4401,8 +4608,23 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + */ + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) + update_min_vruntime(cfs_rq); ++#endif + } + ++#ifdef CONFIG_CACULE_SCHED ++static struct sched_entity * ++pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr); ++ ++/* ++ * Preempt the current task with a newly woken task if needed: ++ */ ++static void ++check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ++{ ++ if (pick_next_entity(cfs_rq, curr) != curr) ++ resched_curr(rq_of(cfs_rq)); ++} ++#else + /* + * Preempt the current task with a newly woken task if needed: + */ +@@ -4442,11 +4664,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) + if (delta > ideal_runtime) + resched_curr(rq_of(cfs_rq)); + } ++#endif /* CONFIG_CACULE_SCHED */ + + static void + set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) + { ++#if !defined(CONFIG_CACULE_SCHED) + clear_buddies(cfs_rq, se); ++#endif + + /* 'current' is not kept within the tree. */ + if (se->on_rq) { +@@ -4478,6 +4703,31 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) + se->prev_sum_exec_runtime = se->sum_exec_runtime; + } + ++#ifdef CONFIG_CACULE_SCHED ++static struct sched_entity * ++pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) ++{ ++ struct cacule_node *se = cfs_rq->head; ++ struct cacule_node *next; ++ u64 now = sched_clock(); ++ ++ if (!se) ++ return curr; ++ ++ next = se->next; ++ while (next) { ++ if (entity_before(now, se, next) == 1) ++ se = next; ++ ++ next = next->next; ++ } ++ ++ if (curr && entity_before(now, se, &curr->cacule_node) == 1) ++ return curr; ++ ++ return se_of(se); ++} ++#else + static int + wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); + +@@ -4536,6 +4786,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) + + return se; + } ++#endif /* CONFIG_CACULE_SCHED */ + + static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); + +@@ -5644,7 +5895,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) + hrtick_update(rq); + } + ++#if !defined(CONFIG_CACULE_SCHED) + static void set_next_buddy(struct sched_entity *se); ++#endif + + /* + * The dequeue_task method is called before nr_running is +@@ -5676,12 +5929,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) + if (cfs_rq->load.weight) { + /* Avoid re-evaluating load for this entity: */ + se = parent_entity(se); ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Bias pick_next to pick a task from this cfs_rq, as + * p is sleeping when it is within its sched_slice. + */ + if (task_sleep && se && !throttled_hierarchy(cfs_rq)) + set_next_buddy(se); ++#endif + break; + } + flags |= DEQUEUE_SLEEP; +@@ -5797,6 +6052,7 @@ static unsigned long capacity_of(int cpu) + return cpu_rq(cpu)->cpu_capacity; + } + ++#if !defined(CONFIG_CACULE_SCHED) + static void record_wakee(struct task_struct *p) + { + /* +@@ -5843,6 +6099,7 @@ static int wake_wide(struct task_struct *p) + return 0; + return 1; + } ++#endif /* CONFIG_CACULE_SCHED */ + + /* + * The purpose of wake_affine() is to quickly determine on which CPU we can run +@@ -6572,6 +6829,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) + return min_t(unsigned long, util, capacity_orig_of(cpu)); + } + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued) + * to @dst_cpu. +@@ -6836,6 +7094,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + + return target; + } ++#endif /* CONFIG_CACULE_SCHED */ + + /* + * select_task_rq_fair: Select target runqueue for the waking task in domains +@@ -6862,6 +7121,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) + * required for stable ->cpus_allowed + */ + lockdep_assert_held(&p->pi_lock); ++ ++#if !defined(CONFIG_CACULE_SCHED) + if (wake_flags & WF_TTWU) { + record_wakee(p); + +@@ -6874,6 +7135,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) + + want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); + } ++#endif /* CONFIG_CACULE_SCHED */ + + rcu_read_lock(); + for_each_domain(cpu, tmp) { +@@ -6920,6 +7182,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se); + */ + static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) + { ++#if !defined(CONFIG_CACULE_SCHED) + /* + * As blocked tasks retain absolute vruntime the migration needs to + * deal with this by subtracting the old and adding the new +@@ -6945,6 +7208,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) + + se->vruntime -= min_vruntime; + } ++#endif /* CONFIG_CACULE_SCHED */ + + if (p->on_rq == TASK_ON_RQ_MIGRATING) { + /* +@@ -6990,6 +7254,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) + } + #endif /* CONFIG_SMP */ + ++#if !defined(CONFIG_CACULE_SCHED) + static unsigned long wakeup_gran(struct sched_entity *se) + { + unsigned long gran = sysctl_sched_wakeup_granularity; +@@ -7068,6 +7333,7 @@ static void set_skip_buddy(struct sched_entity *se) + for_each_sched_entity(se) + cfs_rq_of(se)->skip = se; + } ++#endif /* CONFIG_CACULE_SCHED */ + + /* + * Preempt the current task with a newly woken task if needed: +@@ -7076,9 +7342,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + { + struct task_struct *curr = rq->curr; + struct sched_entity *se = &curr->se, *pse = &p->se; ++ ++#if !defined(CONFIG_CACULE_SCHED) + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + int scale = cfs_rq->nr_running >= sched_nr_latency; + int next_buddy_marked = 0; ++#endif /* CONFIG_CACULE_SCHED */ + + if (unlikely(se == pse)) + return; +@@ -7092,10 +7361,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) + return; + ++#if !defined(CONFIG_CACULE_SCHED) + if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) { + set_next_buddy(pse); + next_buddy_marked = 1; + } ++#endif /* CONFIG_CACULE_SCHED */ + + /* + * We can come here with TIF_NEED_RESCHED already set from new task +@@ -7125,6 +7396,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + find_matching_se(&se, &pse); + update_curr(cfs_rq_of(se)); + BUG_ON(!pse); ++ ++#ifdef CONFIG_CACULE_SCHED ++ if (entity_before(sched_clock(), &se->cacule_node, &pse->cacule_node) == 1) ++ goto preempt; ++#else + if (wakeup_preempt_entity(se, pse) == 1) { + /* + * Bias pick_next to pick the sched entity that is +@@ -7134,11 +7410,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + set_next_buddy(pse); + goto preempt; + } ++#endif /* CONFIG_CACULE_SCHED */ + + return; + + preempt: + resched_curr(rq); ++ ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Only set the backward buddy when the current task is still + * on the rq. This can happen when a wakeup gets interleaved +@@ -7153,6 +7432,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + + if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) + set_last_buddy(se); ++#endif /* CONFIG_CACULE_SCHED */ + } + + #ifdef CONFIG_SMP +@@ -7290,6 +7570,10 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf + p = task_of(se); + + done: __maybe_unused; ++#ifdef CONFIG_CACULE_SCHED ++ if (prev) ++ prev->se.cacule_node.vruntime &= YIELD_UNMARK; ++#endif + #ifdef CONFIG_SMP + /* + * Move the next running task to the front of +@@ -7360,7 +7644,15 @@ static void yield_task_fair(struct rq *rq) + { + struct task_struct *curr = rq->curr; + struct cfs_rq *cfs_rq = task_cfs_rq(curr); ++ ++#ifdef CONFIG_CACULE_SCHED ++ struct cacule_node *cn = &curr->se.cacule_node; ++ ++ if (cacule_yield) ++ cn->vruntime |= YIELD_MARK; ++#else + struct sched_entity *se = &curr->se; ++#endif + + /* + * Are we the only task in the tree? +@@ -7368,7 +7660,9 @@ static void yield_task_fair(struct rq *rq) + if (unlikely(rq->nr_running == 1)) + return; + ++#if !defined(CONFIG_CACULE_SCHED) + clear_buddies(cfs_rq, se); ++#endif + + if (curr->policy != SCHED_BATCH) { + update_rq_clock(rq); +@@ -7384,7 +7678,9 @@ static void yield_task_fair(struct rq *rq) + rq_clock_skip_update(rq); + } + ++#if !defined(CONFIG_CACULE_SCHED) + set_skip_buddy(se); ++#endif + } + + static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) +@@ -7395,8 +7691,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) + if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se))) + return false; + ++#if !defined(CONFIG_CACULE_SCHED) + /* Tell the scheduler that we'd really like pse to run next. */ + set_next_buddy(se); ++#endif + + yield_task_fair(rq); + +@@ -7623,6 +7921,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) + if (env->sd->flags & SD_SHARE_CPUCAPACITY) + return 0; + ++#if !defined(CONFIG_CACULE_SCHED) + /* + * Buddy candidates are cache hot: + */ +@@ -7630,6 +7929,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) + (&p->se == cfs_rq_of(&p->se)->next || + &p->se == cfs_rq_of(&p->se)->last)) + return 1; ++#endif + + if (sysctl_sched_migration_cost == -1) + return 1; +@@ -10651,9 +10951,11 @@ static void nohz_newidle_balance(struct rq *this_rq) + if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED)) + return; + ++#if !defined(CONFIG_CACULE_SCHED) + /* Will wake up very soon. No time for doing anything else*/ + if (this_rq->avg_idle < sysctl_sched_migration_cost) + return; ++#endif + + /* Don't need to update blocked load of idle CPUs*/ + if (!READ_ONCE(nohz.has_blocked) || +@@ -10724,7 +11026,10 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) + */ + rq_unpin_lock(this_rq, rf); + +- if (this_rq->avg_idle < sysctl_sched_migration_cost || ++ if ( ++#if !defined(CONFIG_CACULE_SCHED) ++ this_rq->avg_idle < sysctl_sched_migration_cost || ++#endif + !READ_ONCE(this_rq->rd->overload)) { + + rcu_read_lock(); +@@ -10917,7 +11222,9 @@ static void se_fi_update(struct sched_entity *se, unsigned int fi_seq, bool forc + cfs_rq->forceidle_seq = fi_seq; + } + ++#if !defined(CONFIG_CACULE_SCHED) + cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime; ++#endif + } + } + +@@ -10972,8 +11279,12 @@ bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) + * min_vruntime_fi, which would have been updated in prior calls + * to se_fi_update(). + */ ++#ifdef CONFIG_CACULE_SCHED ++ delta = (s64)(sea->vruntime - seb->vruntime); ++#else + delta = (s64)(sea->vruntime - seb->vruntime) + + (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi); ++#endif + + return delta > 0; + } +@@ -11008,11 +11319,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) + task_tick_core(rq, curr); + } + ++#ifdef CONFIG_CACULE_SCHED + /* + * called on fork with the child task as argument from the parent's context + * - child not yet on the tasklist + * - preemption disabled + */ ++ static void task_fork_fair(struct task_struct *p) ++{ ++ struct cfs_rq *cfs_rq; ++ struct rq *rq = this_rq(); ++ struct rq_flags rf; ++ ++ rq_lock(rq, &rf); ++ update_rq_clock(rq); ++ ++ cfs_rq = task_cfs_rq(current); ++ if (cfs_rq->curr) ++ update_curr(cfs_rq); ++ ++ rq_unlock(rq, &rf); ++} ++#else + static void task_fork_fair(struct task_struct *p) + { + struct cfs_rq *cfs_rq; +@@ -11043,6 +11371,7 @@ static void task_fork_fair(struct task_struct *p) + se->vruntime -= cfs_rq->min_vruntime; + rq_unlock(rq, &rf); + } ++#endif /* CONFIG_CACULE_SCHED */ + + /* + * Priority of the task has changed. Check to see if we preempt +@@ -11161,6 +11490,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se) + static void detach_task_cfs_rq(struct task_struct *p) + { + struct sched_entity *se = &p->se; ++ ++#if !defined(CONFIG_CACULE_SCHED) + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + if (!vruntime_normalized(p)) { +@@ -11171,6 +11502,7 @@ static void detach_task_cfs_rq(struct task_struct *p) + place_entity(cfs_rq, se, 0); + se->vruntime -= cfs_rq->min_vruntime; + } ++#endif + + detach_entity_cfs_rq(se); + } +@@ -11178,12 +11510,17 @@ static void detach_task_cfs_rq(struct task_struct *p) + static void attach_task_cfs_rq(struct task_struct *p) + { + struct sched_entity *se = &p->se; ++ ++#if !defined(CONFIG_CACULE_SCHED) + struct cfs_rq *cfs_rq = cfs_rq_of(se); ++#endif + + attach_entity_cfs_rq(se); + ++#if !defined(CONFIG_CACULE_SCHED) + if (!vruntime_normalized(p)) + se->vruntime += cfs_rq->min_vruntime; ++#endif + } + + static void switched_from_fair(struct rq *rq, struct task_struct *p) +@@ -11239,13 +11576,21 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) + void init_cfs_rq(struct cfs_rq *cfs_rq) + { + cfs_rq->tasks_timeline = RB_ROOT_CACHED; ++ ++#if !defined(CONFIG_CACULE_SCHED) + cfs_rq->min_vruntime = (u64)(-(1LL << 20)); + #ifndef CONFIG_64BIT + cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; + #endif ++#endif /* CONFIG_CACULE_SCHED */ ++ + #ifdef CONFIG_SMP + raw_spin_lock_init(&cfs_rq->removed.lock); + #endif ++ ++#ifdef CONFIG_CACULE_SCHED ++ cfs_rq->head = NULL; ++#endif + } + + #ifdef CONFIG_FAIR_GROUP_SCHED +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index ddefb0419d7a..ad7bc718033b 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -526,6 +526,12 @@ struct cfs_rq { + unsigned int idle_h_nr_running; /* SCHED_IDLE */ + + u64 exec_clock; ++ ++#ifdef CONFIG_CACULE_SCHED ++#ifdef CONFIG_SCHED_CORE ++ unsigned int forceidle_seq; ++#endif ++#else + u64 min_vruntime; + #ifdef CONFIG_SCHED_CORE + unsigned int forceidle_seq; +@@ -535,6 +541,7 @@ struct cfs_rq { + #ifndef CONFIG_64BIT + u64 min_vruntime_copy; + #endif ++#endif /* CONFIG_CACULE_SCHED */ + + struct rb_root_cached tasks_timeline; + +@@ -543,9 +550,13 @@ struct cfs_rq { + * It is set to NULL otherwise (i.e when none are currently running). + */ + struct sched_entity *curr; ++#ifdef CONFIG_CACULE_SCHED ++ struct cacule_node *head; ++#else + struct sched_entity *next; + struct sched_entity *last; + struct sched_entity *skip; ++#endif // CONFIG_CACULE_SCHED + + #ifdef CONFIG_SCHED_DEBUG + unsigned int nr_spread_over; +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index 272f4a272f8c..76ff95cacaa6 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -1737,6 +1737,31 @@ static struct ctl_table kern_table[] = { + .mode = 0644, + .proc_handler = proc_dointvec, + }, ++#ifdef CONFIG_CACULE_SCHED ++ { ++ .procname = "sched_interactivity_factor", ++ .data = &interactivity_factor, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { ++ .procname = "sched_max_lifetime_ms", ++ .data = &cacule_max_lifetime, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { ++ .procname = "sched_cacule_yield", ++ .data = &cacule_yield, ++ .maxlen = sizeof (int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = &one_ul, ++ }, ++#endif + #ifdef CONFIG_SCHEDSTATS + { + .procname = "sched_schedstats", @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 5.13.14-hardened1 Kernel Configuration +# Linux/x86 5.13.16-hardened1 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (GCC) 11.1.0" CONFIG_CC_IS_GCC=y |