summarylogtreecommitdiffstats
path: root/cacule-5.11.patch
diff options
context:
space:
mode:
authorptr13372021-04-10 02:52:34 +0200
committerptr13372021-04-10 02:52:34 +0200
commit5fbe22ddfb838a3b85d4a6a09bd8d557597014cd (patch)
tree3bc4f42cfef0ceba2291ed39100f49df9404205b /cacule-5.11.patch
downloadaur-5fbe22ddfb838a3b85d4a6a09bd8d557597014cd.tar.gz
First Push
Diffstat (limited to 'cacule-5.11.patch')
-rw-r--r--cacule-5.11.patch2368
1 files changed, 2368 insertions, 0 deletions
diff --git a/cacule-5.11.patch b/cacule-5.11.patch
new file mode 100644
index 000000000000..e437547eb20f
--- /dev/null
+++ b/cacule-5.11.patch
@@ -0,0 +1,2368 @@
+diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
+index 1d56a6b73a4e..4d55ff02310c 100644
+--- a/Documentation/admin-guide/sysctl/kernel.rst
++++ b/Documentation/admin-guide/sysctl/kernel.rst
+@@ -1087,6 +1087,10 @@ Model available). If your platform happens to meet the
+ requirements for EAS but you do not want to use it, change
+ this value to 0.
+
++sched_interactivity_factor (CacULE scheduler only)
++==================================================
++Sets the value *m* for interactivity score calculations. See
++Figure 1 in https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
+
+ sched_schedstats
+ ================
+diff --git a/Documentation/scheduler/sched-CacULE.rst b/Documentation/scheduler/sched-CacULE.rst
+new file mode 100644
+index 000000000000..82b0847c468a
+--- /dev/null
++++ b/Documentation/scheduler/sched-CacULE.rst
+@@ -0,0 +1,76 @@
++======================================
++The CacULE Scheduler by Hamad Al Marri.
++======================================
++
++1. Overview
++=============
++
++The CacULE CPU scheduler is based on interactivity score mechanism.
++The interactivity score is inspired by the ULE scheduler (FreeBSD
++scheduler).
++
++1.1 About CacULE Scheduler
++--------------------------
++
++ - Each CPU has its own runqueue.
++
++ - NORMAL runqueue is a linked list of sched_entities (instead of RB-Tree).
++
++ - RT and other runqueues are just the same as the CFS's.
++
++ - Wake up tasks preempt currently running tasks if its interactivity score value
++ is higher.
++
++
++1.2. Complexity
++----------------
++
++The complexity of Enqueue and Dequeue a task is O(1).
++
++The complexity of pick the next task is in O(n), where n is the number of tasks
++in a runqueue (each CPU has its own runqueue).
++
++Note: O(n) sounds scary, but usually for a machine with 4 CPUS where it is used
++for desktop or mobile jobs, the maximum number of runnable tasks might not
++exceeds 10 (at the pick next run time) - the idle tasks are excluded since they
++are dequeued when sleeping and enqueued when they wake up.
++
++
++2. The CacULE Interactivity Score
++=======================================================
++
++The interactivity score is inspired by the ULE scheduler (FreeBSD scheduler).
++For more information see: https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
++CacULE doesn't replace CFS with ULE, it only changes the CFS' pick next task
++mechanism to ULE's interactivity score mechanism for picking next task to run.
++
++
++2.3 sched_interactivity_factor
++=================
++Sets the value *m* for interactivity score calculations. See Figure 1 in
++https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
++The default value of in CacULE is 10 which means that the Maximum Interactive
++Score is 20 (since m = Maximum Interactive Score / 2).
++You can tune sched_interactivity_factor with sysctl command:
++
++ sysctl kernel.sched_interactivity_factor=50
++
++This command changes the sched_interactivity_factor from 10 to 50.
++
++
++3. Scheduling policies
++=======================
++
++CacULE some CFS, implements three scheduling policies:
++
++ - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
++ policy that is used for regular tasks.
++
++ - SCHED_BATCH: Does not preempt nearly as often as regular tasks
++ would, thereby allowing tasks to run longer and make better use of
++ caches but at the cost of interactivity. This is well suited for
++ batch jobs.
++
++ - SCHED_IDLE: This is even weaker than nice 19, but its not a true
++ idle timer scheduler in order to avoid to get into priority
++ inversion problems which would deadlock the machine.
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 6e3a5eeec509..e5da9a62fe4e 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -450,16 +450,29 @@ struct sched_statistics {
+ #endif
+ };
+
++#ifdef CONFIG_CACULE_SCHED
++struct cacule_node {
++ struct cacule_node* next;
++ struct cacule_node* prev;
++ u64 cacule_start_time;
++ u64 vruntime;
++};
++#endif
++
+ struct sched_entity {
+ /* For load-balancing: */
+ struct load_weight load;
+ struct rb_node run_node;
++#ifdef CONFIG_CACULE_SCHED
++ struct cacule_node cacule_node;
++#else
++ u64 vruntime;
++#endif
+ struct list_head group_node;
+ unsigned int on_rq;
+
+ u64 exec_start;
+ u64 sum_exec_runtime;
+- u64 vruntime;
+ u64 prev_sum_exec_runtime;
+
+ u64 nr_migrations;
+diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
+index 3c31ba88aca5..774de59e8111 100644
+--- a/include/linux/sched/sysctl.h
++++ b/include/linux/sched/sysctl.h
+@@ -31,6 +31,12 @@ extern unsigned int sysctl_sched_min_granularity;
+ extern unsigned int sysctl_sched_wakeup_granularity;
+ extern unsigned int sysctl_sched_child_runs_first;
+
++#ifdef CONFIG_CACULE_SCHED
++extern int interactivity_factor;
++extern int cacule_max_lifetime;
++extern int cacule_harsh_mode;
++#endif
++
+ enum sched_tunable_scaling {
+ SCHED_TUNABLESCALING_NONE,
+ SCHED_TUNABLESCALING_LOG,
+@@ -46,6 +52,11 @@ extern unsigned int sysctl_numa_balancing_scan_size;
+
+ #ifdef CONFIG_SCHED_DEBUG
+ extern __read_mostly unsigned int sysctl_sched_migration_cost;
++#elif CONFIG_CACULE_RDB
++extern unsigned int sysctl_sched_migration_cost;
++#endif
++
++#ifdef CONFIG_SCHED_DEBUG
+ extern __read_mostly unsigned int sysctl_sched_nr_migrate;
+
+ int sched_proc_update_handler(struct ctl_table *table, int write,
+diff --git a/init/Kconfig b/init/Kconfig
+index b7d3c6a12196..cae5b7447f48 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -825,6 +825,27 @@ config UCLAMP_BUCKETS_COUNT
+
+ endmenu
+
++config CACULE_SCHED
++ bool "CacULE CPU scheduler"
++ default y
++ help
++ The CacULE CPU scheduler is based on interactivity score mechanism.
++ The interactivity score is inspired by the ULE scheduler (FreeBSD
++ scheduler).
++
++ If unsure, say Y here.
++
++config CACULE_RDB
++ bool "RDB (Response Driven Balancer)"
++ default n
++ depends on CACULE_SCHED
++ help
++ This is an experimental load balancer for CacULE. It is a lightweight
++ load balancer which is a replacement of CFS load balancer. It migrates
++ tasks based on their interactivity scores.
++
++ If unsure, say N.
++
+ #
+ # For architectures that want to enable the support for NUMA-affine scheduler
+ # balancing logic:
+@@ -942,6 +963,7 @@ config CGROUP_WRITEBACK
+
+ menuconfig CGROUP_SCHED
+ bool "CPU controller"
++ depends on !CACULE_RDB
+ default n
+ help
+ This feature lets CPU scheduler recognize task groups and control CPU
+@@ -1205,6 +1227,7 @@ config CHECKPOINT_RESTORE
+
+ config SCHED_AUTOGROUP
+ bool "Automatic process group scheduling"
++ depends on !CACULE_RDB
+ select CGROUPS
+ select CGROUP_SCHED
+ select FAIR_GROUP_SCHED
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index f0056507a373..7b643bc0a281 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3554,7 +3554,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ p->se.sum_exec_runtime = 0;
+ p->se.prev_sum_exec_runtime = 0;
+ p->se.nr_migrations = 0;
++
++#ifdef CONFIG_CACULE_SCHED
++ p->se.cacule_node.vruntime = 0;
++#else
+ p->se.vruntime = 0;
++#endif
++
+ INIT_LIST_HEAD(&p->se.group_node);
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -3840,6 +3846,13 @@ void wake_up_new_task(struct task_struct *p)
+ update_rq_clock(rq);
+ post_init_entity_util_avg(p);
+
++#ifdef CONFIG_CACULE_SCHED
++ if (cacule_harsh_mode)
++ p->se.cacule_node.cacule_start_time = p->start_time;
++ else
++ p->se.cacule_node.cacule_start_time = sched_clock();
++#endif
++
+ activate_task(rq, p, ENQUEUE_NOCLOCK);
+ trace_sched_wakeup_new(p);
+ check_preempt_curr(rq, p, WF_FORK);
+@@ -7727,6 +7740,14 @@ void __init sched_init(void)
+ BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
+ #endif
+
++#if defined(CONFIG_CACULE_SCHED) && !defined(CONFIG_CACULE_RDB)
++ printk(KERN_INFO "CacULE CPU scheduler v5.11 by Hamad Al Marri.");
++#endif
++
++#ifdef CONFIG_CACULE_RDB
++ printk(KERN_INFO "CacULE CPU scheduler (RDB) v5.11 by Hamad Al Marri.");
++#endif
++
+ wait_bit_init();
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index 2357921580f9..fb4ef69724c3 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -439,7 +439,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
+ return;
+
+ PN(se->exec_start);
++#ifdef CONFIG_CACULE_SCHED
++ PN(se->cacule_node.vruntime);
++#else
+ PN(se->vruntime);
++#endif
+ PN(se->sum_exec_runtime);
+
+ if (schedstat_enabled()) {
+@@ -493,7 +497,11 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
+
+ SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
+ p->comm, task_pid_nr(p),
++#ifdef CONFIG_CACULE_SCHED
++ SPLIT_NS(p->se.cacule_node.vruntime),
++#else
+ SPLIT_NS(p->se.vruntime),
++#endif
+ (long long)(p->nvcsw + p->nivcsw),
+ p->prio);
+
+@@ -535,8 +543,12 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
+
+ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+ {
+- s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
+- spread, rq0_min_vruntime, spread0;
++ s64 MIN_vruntime = -1,
++#if !defined(CONFIG_CACULE_SCHED)
++ min_vruntime, rq0_min_vruntime,
++ spread0,
++#endif
++ max_vruntime = -1, spread;
+ struct rq *rq = cpu_rq(cpu);
+ struct sched_entity *last;
+ unsigned long flags;
+@@ -553,25 +565,41 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ if (rb_first_cached(&cfs_rq->tasks_timeline))
++#ifdef CONFIG_CACULE_SCHED
++ MIN_vruntime = (__pick_first_entity(cfs_rq))->cacule_node.vruntime;
++#else
+ MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
++#endif
++
+ last = __pick_last_entity(cfs_rq);
+ if (last)
++#ifdef CONFIG_CACULE_SCHED
++ max_vruntime = last->cacule_node.vruntime;
++#else
+ max_vruntime = last->vruntime;
++#endif
++
++#if !defined(CONFIG_CACULE_SCHED)
+ min_vruntime = cfs_rq->min_vruntime;
+ rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
++#endif
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
+ SPLIT_NS(MIN_vruntime));
++#if !defined(CONFIG_CACULE_SCHED)
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
+ SPLIT_NS(min_vruntime));
++#endif
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime",
+ SPLIT_NS(max_vruntime));
+ spread = max_vruntime - MIN_vruntime;
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread",
+ SPLIT_NS(spread));
++#if !defined(CONFIG_CACULE_SCHED)
+ spread0 = min_vruntime - rq0_min_vruntime;
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
+ SPLIT_NS(spread0));
++#endif
+ SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
+ cfs_rq->nr_spread_over);
+ SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
+@@ -928,7 +956,11 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
+
+ PN(se.exec_start);
++#ifdef CONFIG_CACULE_SCHED
++ PN(se.cacule_node.vruntime);
++#else
+ PN(se.vruntime);
++#endif
+ PN(se.sum_exec_runtime);
+
+ nr_switches = p->nvcsw + p->nivcsw;
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index bbc78794224a..c99fc326ec24 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -19,6 +19,10 @@
+ *
+ * Adaptive scheduling granularity, math enhancements by Peter Zijlstra
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
++ *
++ * CacULE enhancements CPU cache and scheduler based on
++ * Interactivity Score.
++ * (C) 2020 Hamad Al Marri <hamad.s.almarri@gmail.com>
+ */
+ #include "sched.h"
+
+@@ -82,7 +86,15 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
+ unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
+ static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
+
+-const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
++#ifdef CONFIG_CACULE_RDB
++#ifdef CONFIG_SCHED_DEBUG
++const_debug unsigned int sysctl_sched_migration_cost = 750000UL;
++#else
++unsigned int sysctl_sched_migration_cost = 750000UL;
++#endif
++#else
++const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
++#endif
+
+ int sched_thermal_decay_shift;
+ static int __init setup_sched_thermal_decay_shift(char *str)
+@@ -113,6 +125,11 @@ int __weak arch_asym_cpu_priority(int cpu)
+ */
+ #define fits_capacity(cap, max) ((cap) * 1280 < (max) * 1024)
+
++#endif
++#ifdef CONFIG_CACULE_SCHED
++int cacule_max_lifetime = 30000; // in ms
++int cacule_harsh_mode = 0;
++int interactivity_factor = 32768;
+ #endif
+
+ #ifdef CONFIG_CFS_BANDWIDTH
+@@ -253,6 +270,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
+
+ const struct sched_class fair_sched_class;
+
++
++#ifdef CONFIG_CACULE_SCHED
++static inline struct sched_entity *se_of(struct cacule_node *cn)
++{
++ return container_of(cn, struct sched_entity, cacule_node);
++}
++#endif
++
+ /**************************************************************
+ * CFS operations on generic schedulable entities:
+ */
+@@ -512,7 +537,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
+ /**************************************************************
+ * Scheduling class tree data structure manipulation methods:
+ */
+-
++#if !defined(CONFIG_CACULE_SCHED)
+ static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
+ {
+ s64 delta = (s64)(vruntime - max_vruntime);
+@@ -568,7 +593,166 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
+ cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+ #endif
+ }
++#endif /* CONFIG_CACULE_SCHED */
++
++#ifdef CONFIG_CACULE_SCHED
++
++static unsigned int
++calc_interactivity(u64 now, struct cacule_node *se)
++{
++ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor;
++ unsigned int score_se;
++
++ /*
++ * in case of vruntime==0, logical OR with 1 would
++ * make sure that the least sig. bit is 1
++ */
++ l_se = now - se->cacule_start_time;
++ vr_se = se->vruntime | 1;
++ u64_factor = interactivity_factor;
++
++ /* safety check */
++ if (likely(l_se > vr_se))
++ sleep_se = (l_se - vr_se) | 1;
++
++ if (sleep_se >= vr_se)
++ score_se = u64_factor / (sleep_se / vr_se);
++ else
++ score_se = (u64_factor << 1) - (u64_factor / (vr_se / sleep_se));
++
++ return score_se;
++}
++
++static inline int
++entity_before_cached(u64 now, unsigned int score_curr, struct cacule_node *se)
++{
++ unsigned int score_se;
++ int diff;
++
++ score_se = calc_interactivity(now, se);
++ diff = score_se - score_curr;
++
++ if (diff <= 0)
++ return 1;
++
++ return -1;
++}
++
++/*
++ * Does se have lower interactivity score value (i.e. interactive) than curr? If yes, return 1,
++ * otherwise return -1
++ * se is before curr if se has lower interactivity score value
++ * the lower score, the more interactive
++ */
++static inline int
++entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se)
++{
++ unsigned int score_curr, score_se;
++ int diff;
++
++ score_curr = calc_interactivity(now, curr);
++ score_se = calc_interactivity(now, se);
++
++ diff = score_se - score_curr;
++
++ if (diff < 0)
++ return 1;
++
++ return -1;
++}
++
++/*
++ * Enqueue an entity
++ */
++static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
++{
++ struct cacule_node *se = &(_se->cacule_node);
++ struct cacule_node *iter, *next = NULL;
++ u64 now = sched_clock();
++ unsigned int score_se = calc_interactivity(now, se);
++
++ se->next = NULL;
++ se->prev = NULL;
++
++ if (likely(cfs_rq->head)) {
++
++ // start from tail
++ iter = cfs_rq->tail;
++
++ // does se have higher IS than iter?
++ while (iter && entity_before_cached(now, score_se, iter) == -1) {
++ next = iter;
++ iter = iter->prev;
++ }
++
++ // se in tail position
++ if (iter == cfs_rq->tail) {
++ cfs_rq->tail->next = se;
++ se->prev = cfs_rq->tail;
++
++ cfs_rq->tail = se;
++ }
++ // else if not head no tail, insert se after iter
++ else if (iter) {
++ se->next = next;
++ se->prev = iter;
++
++ iter->next = se;
++ next->prev = se;
++ }
++ // insert se at head
++ else {
++ se->next = cfs_rq->head;
++ cfs_rq->head->prev = se;
++
++ // lastly reset the head
++ cfs_rq->head = se;
++ }
++ } else {
++ // if empty rq
++ cfs_rq->head = se;
++ cfs_rq->tail = se;
++ }
++}
++
++static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
++{
++ struct cacule_node *se = &(_se->cacule_node);
++
++ // if only one se in rq
++ if (cfs_rq->head == cfs_rq->tail) {
++ cfs_rq->head = NULL;
++ cfs_rq->tail = NULL;
++
++#ifdef CONFIG_CACULE_RDB
++ WRITE_ONCE(cfs_rq->IS_head, 0);
++#endif
++
++ } else if (se == cfs_rq->head) {
++ // if it is the head
++ cfs_rq->head = cfs_rq->head->next;
++ cfs_rq->head->prev = NULL;
++ } else if (se == cfs_rq->tail) {
++ // if it is the tail
++ cfs_rq->tail = cfs_rq->tail->prev;
++ cfs_rq->tail->next = NULL;
++ } else {
++ // if in the middle
++ struct cacule_node *prev = se->prev;
++ struct cacule_node *next = se->next;
++
++ prev->next = next;
++
++ if (next)
++ next->prev = prev;
++ }
++}
+
++struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
++{
++ return se_of(cfs_rq->head);
++}
++#else
+ /*
+ * Enqueue an entity into the rb-tree:
+ */
+@@ -626,16 +810,29 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+
+ return rb_entry(next, struct sched_entity, run_node);
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ #ifdef CONFIG_SCHED_DEBUG
+ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+ {
++#ifdef CONFIG_CACULE_SCHED
++ struct cacule_node *cn = cfs_rq->head;
++
++ if (!cn)
++ return NULL;
++
++ while (cn->next)
++ cn = cn->next;
++
++ return se_of(cn);
++#else
+ struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
+
+ if (!last)
+ return NULL;
+
+ return rb_entry(last, struct sched_entity, run_node);
++#endif /* CONFIG_CACULE_SCHED */
+ }
+
+ /**************************************************************
+@@ -720,6 +917,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ return slice;
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * We calculate the vruntime slice of a to-be-inserted task.
+ *
+@@ -729,6 +927,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+ return calc_delta_fair(sched_slice(cfs_rq, se), se);
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ #include "pelt.h"
+ #ifdef CONFIG_SMP
+@@ -836,13 +1035,49 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
+ }
+ #endif /* CONFIG_SMP */
+
++#ifdef CONFIG_CACULE_SCHED
++static void reset_lifetime(u64 now, struct sched_entity *se)
++{
++ struct cacule_node *cn;
++ u64 max_life_ns, life_time;
++ s64 diff;
++
++ /*
++ * left shift 20 bits is approximately = * 1000000
++ * we don't need the precision of life time
++ * Ex. for 30s, with left shift (20bits) == 31.457s
++ */
++ max_life_ns = ((u64) cacule_max_lifetime) << 20;
++
++ for_each_sched_entity(se) {
++ cn = &se->cacule_node;
++ life_time = now - cn->cacule_start_time;
++ diff = life_time - max_life_ns;
++
++ if (unlikely(diff > 0)) {
++ // multiply life_time by 8 for more precision
++ u64 old_hrrn_x8 = life_time / ((cn->vruntime >> 3) | 1);
++
++ // reset life to half max_life (i.e ~15s)
++ cn->cacule_start_time = now - (max_life_ns >> 1);
++
++ // avoid division by zero
++ if (old_hrrn_x8 == 0) old_hrrn_x8 = 1;
++
++ // reset vruntime based on old hrrn ratio
++ cn->vruntime = (max_life_ns << 2) / old_hrrn_x8;
++ }
++ }
++}
++#endif /* CONFIG_CACULE_SCHED */
++
+ /*
+ * Update the current task's runtime statistics.
+ */
+ static void update_curr(struct cfs_rq *cfs_rq)
+ {
+ struct sched_entity *curr = cfs_rq->curr;
+- u64 now = rq_clock_task(rq_of(cfs_rq));
++ u64 now = sched_clock();
+ u64 delta_exec;
+
+ if (unlikely(!curr))
+@@ -860,13 +1095,23 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ curr->sum_exec_runtime += delta_exec;
+ schedstat_add(cfs_rq->exec_clock, delta_exec);
+
++
++#ifdef CONFIG_CACULE_SCHED
++ curr->cacule_node.vruntime += calc_delta_fair(delta_exec, curr);
++ reset_lifetime(now, curr);
++#else
+ curr->vruntime += calc_delta_fair(delta_exec, curr);
+ update_min_vruntime(cfs_rq);
++#endif
+
+ if (entity_is_task(curr)) {
+ struct task_struct *curtask = task_of(curr);
+
+- trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
++#ifdef CONFIG_CACULE_SCHED
++ trace_sched_stat_runtime(curtask, delta_exec, curr->cacule_node.vruntime);
++#else
++ trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
++#endif
+ cgroup_account_cputime(curtask, delta_exec);
+ account_group_exec_runtime(curtask, delta_exec);
+ }
+@@ -882,6 +1127,7 @@ static void update_curr_fair(struct rq *rq)
+ static inline void
+ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ u64 wait_start, prev_wait_start;
+
+ if (!schedstat_enabled())
+@@ -895,11 +1141,13 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ wait_start -= prev_wait_start;
+
+ __schedstat_set(se->statistics.wait_start, wait_start);
++#endif
+ }
+
+ static inline void
+ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct task_struct *p;
+ u64 delta;
+
+@@ -936,11 +1184,13 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ __schedstat_inc(se->statistics.wait_count);
+ __schedstat_add(se->statistics.wait_sum, delta);
+ __schedstat_set(se->statistics.wait_start, 0);
++#endif
+ }
+
+ static inline void
+ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct task_struct *tsk = NULL;
+ u64 sleep_start, block_start;
+
+@@ -1004,6 +1254,7 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ account_scheduler_latency(tsk, delta >> 10, 0);
+ }
+ }
++#endif
+ }
+
+ /*
+@@ -1012,6 +1263,7 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ static inline void
+ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ if (!schedstat_enabled())
+ return;
+
+@@ -1024,12 +1276,13 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
+ if (flags & ENQUEUE_WAKEUP)
+ update_stats_enqueue_sleeper(cfs_rq, se);
++#endif
+ }
+
+ static inline void
+ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
+-
++#if !defined(CONFIG_CACULE_RDB)
+ if (!schedstat_enabled())
+ return;
+
+@@ -1050,6 +1303,7 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ __schedstat_set(se->statistics.block_start,
+ rq_clock(rq_of(cfs_rq)));
+ }
++#endif
+ }
+
+ /*
+@@ -1061,7 +1315,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ /*
+ * We are starting a new run period:
+ */
+- se->exec_start = rq_clock_task(rq_of(cfs_rq));
++ se->exec_start = sched_clock();
+ }
+
+ /**************************************************
+@@ -3076,15 +3330,19 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ static inline void
+ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ cfs_rq->avg.load_avg += se->avg.load_avg;
+ cfs_rq->avg.load_sum += se_weight(se) * se->avg.load_sum;
++#endif
+ }
+
+ static inline void
+ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
+ sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
++#endif
+ }
+ #else
+ static inline void
+@@ -3339,6 +3597,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq)
+ void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ u64 p_last_update_time;
+ u64 n_last_update_time;
+
+@@ -3378,6 +3637,7 @@ void set_task_rq_fair(struct sched_entity *se,
+ #endif
+ __update_load_avg_blocked_se(p_last_update_time, se);
+ se->avg.last_update_time = n_last_update_time;
++#endif
+ }
+
+
+@@ -3657,6 +3917,9 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
+ static inline int
+ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+ {
++#ifdef CONFIG_CACULE_RDB
++ return 0;
++#else
+ unsigned long removed_load = 0, removed_util = 0, removed_runnable = 0;
+ struct sched_avg *sa = &cfs_rq->avg;
+ int decayed = 0;
+@@ -3702,8 +3965,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+ #endif
+
+ return decayed;
++#endif
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ /**
+ * attach_entity_load_avg - attach this entity to its cfs_rq load avg
+ * @cfs_rq: cfs_rq to attach to
+@@ -3781,6 +4046,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
+
+ trace_pelt_cfs_tp(cfs_rq);
+ }
++#endif
+
+ /*
+ * Optional action to be done while updating the load average
+@@ -3792,6 +4058,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
+ /* Update task and its cfs_rq load average */
+ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ u64 now = cfs_rq_clock_pelt(cfs_rq);
+ int decayed;
+
+@@ -3823,8 +4090,10 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
+ if (flags & UPDATE_TG)
+ update_tg_load_avg(cfs_rq);
+ }
++#endif
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ #ifndef CONFIG_64BIT
+ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+ {
+@@ -3845,6 +4114,7 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+ return cfs_rq->avg.last_update_time;
+ }
+ #endif
++#endif
+
+ /*
+ * Synchronize entity load avg of dequeued entity without locking
+@@ -3852,11 +4122,13 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+ */
+ static void sync_entity_load_avg(struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ u64 last_update_time;
+
+ last_update_time = cfs_rq_last_update_time(cfs_rq);
+ __update_load_avg_blocked_se(last_update_time, se);
++#endif
+ }
+
+ /*
+@@ -3865,6 +4137,7 @@ static void sync_entity_load_avg(struct sched_entity *se)
+ */
+ static void remove_entity_load_avg(struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ unsigned long flags;
+
+@@ -3882,6 +4155,7 @@ static void remove_entity_load_avg(struct sched_entity *se)
+ cfs_rq->removed.load_avg += se->avg.load_avg;
+ cfs_rq->removed.runnable_avg += se->avg.runnable_avg;
+ raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
++#endif
+ }
+
+ static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
+@@ -4115,7 +4389,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
+
+ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+-#ifdef CONFIG_SCHED_DEBUG
++#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_CACULE_SCHED)
+ s64 d = se->vruntime - cfs_rq->min_vruntime;
+
+ if (d < 0)
+@@ -4126,6 +4400,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ #endif
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ static void
+ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ {
+@@ -4157,11 +4432,15 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ /* ensure we never gain time by being placed backwards. */
+ se->vruntime = max_vruntime(se->vruntime, vruntime);
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
++#if !defined(CONFIG_CACULE_RDB)
+ static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
++#endif
+
+ static inline void check_schedstat_required(void)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ #ifdef CONFIG_SCHEDSTATS
+ if (schedstat_enabled())
+ return;
+@@ -4178,6 +4457,7 @@ static inline void check_schedstat_required(void)
+ "kernel.sched_schedstats=1\n");
+ }
+ #endif
++#endif
+ }
+
+ static inline bool cfs_bandwidth_used(void);
+@@ -4215,18 +4495,23 @@ static inline bool cfs_bandwidth_used(void);
+ static void
+ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
++#if !defined(CONFIG_CACULE_SCHED)
+ bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
++#endif
+ bool curr = cfs_rq->curr == se;
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * If we're the current task, we must renormalise before calling
+ * update_curr().
+ */
+ if (renorm && curr)
+ se->vruntime += cfs_rq->min_vruntime;
++#endif
+
+ update_curr(cfs_rq);
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Otherwise, renormalise after, such that we're placed at the current
+ * moment in time, instead of some random moment in the past. Being
+@@ -4235,6 +4520,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ */
+ if (renorm && !curr)
+ se->vruntime += cfs_rq->min_vruntime;
++#endif
+
+ /*
+ * When enqueuing a sched_entity, we must:
+@@ -4249,8 +4535,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ update_cfs_group(se);
+ account_entity_enqueue(cfs_rq, se);
+
++#if !defined(CONFIG_CACULE_SCHED)
+ if (flags & ENQUEUE_WAKEUP)
+ place_entity(cfs_rq, se, 0);
++#endif
+
+ check_schedstat_required();
+ update_stats_enqueue(cfs_rq, se, flags);
+@@ -4259,6 +4547,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ __enqueue_entity(cfs_rq, se);
+ se->on_rq = 1;
+
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+ * When bandwidth control is enabled, cfs might have been removed
+ * because of a parent been throttled but cfs->nr_running > 1. Try to
+@@ -4269,8 +4558,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
+ if (cfs_rq->nr_running == 1)
+ check_enqueue_throttle(cfs_rq);
++#endif
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ static void __clear_buddies_last(struct sched_entity *se)
+ {
+ for_each_sched_entity(se) {
+@@ -4315,6 +4606,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ if (cfs_rq->skip == se)
+ __clear_buddies_skip(se);
+ }
++#endif // !CONFIG_CACULE_SCHED
+
+ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+
+@@ -4339,13 +4631,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
+ update_stats_dequeue(cfs_rq, se, flags);
+
++#if !defined(CONFIG_CACULE_SCHED)
+ clear_buddies(cfs_rq, se);
++#endif
+
+ if (se != cfs_rq->curr)
+ __dequeue_entity(cfs_rq, se);
+ se->on_rq = 0;
+ account_entity_dequeue(cfs_rq, se);
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Normalize after update_curr(); which will also have moved
+ * min_vruntime if @se is the one holding it back. But before doing
+@@ -4354,12 +4649,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ */
+ if (!(flags & DEQUEUE_SLEEP))
+ se->vruntime -= cfs_rq->min_vruntime;
++#endif
+
+ /* return excess runtime on last dequeue */
+ return_cfs_rq_runtime(cfs_rq);
+
+ update_cfs_group(se);
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Now advance min_vruntime if @se was the entity holding it back,
+ * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
+@@ -4368,8 +4665,23 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ */
+ if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
+ update_min_vruntime(cfs_rq);
++#endif
+ }
+
++#ifdef CONFIG_CACULE_SCHED
++/*
++ * Preempt the current task with a newly woken task if needed:
++ */
++static void
++check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
++{
++ u64 now = sched_clock();
++
++ // does head have higher IS than curr
++ if (entity_before(now, &curr->cacule_node, cfs_rq->head) == 1)
++ resched_curr(rq_of(cfs_rq));
++}
++#else
+ /*
+ * Preempt the current task with a newly woken task if needed:
+ */
+@@ -4409,6 +4721,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ if (delta > ideal_runtime)
+ resched_curr(rq_of(cfs_rq));
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ static void
+ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4443,6 +4756,21 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ se->prev_sum_exec_runtime = se->sum_exec_runtime;
+ }
+
++#ifdef CONFIG_CACULE_SCHED
++static struct sched_entity *
++pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
++{
++ struct cacule_node *se = cfs_rq->head;
++
++ if (unlikely(!se))
++ se = &curr->cacule_node;
++ else if (unlikely(curr
++ && entity_before(sched_clock(), se, &curr->cacule_node) == 1))
++ se = &curr->cacule_node;
++
++ return se_of(se);
++}
++#else
+ static int
+ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
+
+@@ -4503,6 +4831,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+
+ return se;
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+
+@@ -4762,6 +5091,9 @@ static int tg_throttle_down(struct task_group *tg, void *data)
+
+ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+ {
++#ifdef CONFIG_CACULE_RDB
++ return false;
++#else
+ struct rq *rq = rq_of(cfs_rq);
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+ struct sched_entity *se;
+@@ -4839,10 +5171,12 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+ cfs_rq->throttled = 1;
+ cfs_rq->throttled_clock = rq_clock(rq);
+ return true;
++#endif
+ }
+
+ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct rq *rq = rq_of(cfs_rq);
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+ struct sched_entity *se;
+@@ -4924,6 +5258,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+ /* Determine whether we need to wake up potentially idle CPU: */
+ if (rq->curr == rq->idle && rq->cfs.nr_running)
+ resched_curr(rq);
++#endif
+ }
+
+ static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
+@@ -5376,7 +5711,11 @@ static inline bool cfs_bandwidth_used(void)
+
+ static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
+ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
++
++#if !defined(CONFIG_CACULE_RDB)
+ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
++#endif
++
+ static inline void sync_throttle(struct task_group *tg, int cpu) {}
+ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+
+@@ -5507,7 +5846,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ {
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
++#if !defined(CONFIG_CACULE_RDB)
+ int idle_h_nr_running = task_has_idle_policy(p);
++#endif
+ int task_new = !(flags & ENQUEUE_WAKEUP);
+
+ /*
+@@ -5526,6 +5867,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ if (p->in_iowait)
+ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
+
++#ifdef CONFIG_CACULE_RDB
++ if (!se->on_rq) {
++ cfs_rq = cfs_rq_of(se);
++ enqueue_entity(cfs_rq, se, flags);
++ cfs_rq->h_nr_running++;
++ }
++#else
+ for_each_sched_entity(se) {
+ if (se->on_rq)
+ break;
+@@ -5563,6 +5911,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ if (throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
+ }
++#endif
+
+ /* At this point se is NULL and we are at root level*/
+ add_nr_running(rq, 1);
+@@ -5584,6 +5933,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ if (!task_new)
+ update_overutilized_status(rq);
+
++#if !defined(CONFIG_CACULE_RDB)
+ enqueue_throttle:
+ if (cfs_bandwidth_used()) {
+ /*
+@@ -5599,13 +5949,16 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ break;
+ }
+ }
++#endif
+
+ assert_list_leaf_cfs_rq(rq);
+
+ hrtick_update(rq);
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ static void set_next_buddy(struct sched_entity *se);
++#endif
+
+ /*
+ * The dequeue_task method is called before nr_running is
+@@ -5617,6 +5970,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
+ int task_sleep = flags & DEQUEUE_SLEEP;
++
++#ifdef CONFIG_CACULE_RDB
++ cfs_rq = cfs_rq_of(se);
++ dequeue_entity(cfs_rq, se, flags);
++ cfs_rq->h_nr_running--;
++#else
+ int idle_h_nr_running = task_has_idle_policy(p);
+ bool was_sched_idle = sched_idle_rq(rq);
+
+@@ -5637,12 +5996,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ if (cfs_rq->load.weight) {
+ /* Avoid re-evaluating load for this entity: */
+ se = parent_entity(se);
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Bias pick_next to pick a task from this cfs_rq, as
+ * p is sleeping when it is within its sched_slice.
+ */
+ if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+ set_next_buddy(se);
++#endif
+ break;
+ }
+ flags |= DEQUEUE_SLEEP;
+@@ -5663,15 +6024,18 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ goto dequeue_throttle;
+
+ }
++#endif
+
+ /* At this point se is NULL and we are at root level*/
+ sub_nr_running(rq, 1);
+
++#if !defined(CONFIG_CACULE_RDB)
+ /* balance early to pull high priority tasks */
+ if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+ rq->next_balance = jiffies;
+
+ dequeue_throttle:
++#endif
+ util_est_update(&rq->cfs, p, task_sleep);
+ hrtick_update(rq);
+ }
+@@ -5758,6 +6122,7 @@ static unsigned long capacity_of(int cpu)
+ return cpu_rq(cpu)->cpu_capacity;
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ static void record_wakee(struct task_struct *p)
+ {
+ /*
+@@ -5804,7 +6169,9 @@ static int wake_wide(struct task_struct *p)
+ return 0;
+ return 1;
+ }
++#endif
+
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+ * The purpose of wake_affine() is to quickly determine on which CPU we can run
+ * soonest. For the purpose of speed we only consider the waking and previous
+@@ -5906,6 +6273,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+ schedstat_inc(p->se.statistics.nr_wakeups_affine);
+ return target;
+ }
++#endif
+
+ static struct sched_group *
+ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
+@@ -6484,6 +6852,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
+ return min_t(unsigned long, util, capacity_orig_of(cpu));
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
+ * to @dst_cpu.
+@@ -6717,6 +7086,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+
+ return -1;
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ /*
+ * select_task_rq_fair: Select target runqueue for the waking task in domains
+@@ -6733,6 +7103,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+ static int
+ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+ {
++#ifdef CONFIG_CACULE_RDB
++ return select_idle_sibling(p, prev_cpu, prev_cpu);
++#else
+ int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
+ struct sched_domain *tmp, *sd = NULL;
+ int cpu = smp_processor_id();
+@@ -6741,6 +7114,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+ /* SD_flags and WF_flags share the first nibble */
+ int sd_flag = wake_flags & 0xF;
+
++#if !defined(CONFIG_CACULE_SCHED)
+ if (wake_flags & WF_TTWU) {
+ record_wakee(p);
+
+@@ -6753,6 +7127,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+
+ want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
+ }
++#endif
+
+ rcu_read_lock();
+ for_each_domain(cpu, tmp) {
+@@ -6788,9 +7163,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+ rcu_read_unlock();
+
+ return new_cpu;
++#endif
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ static void detach_entity_cfs_rq(struct sched_entity *se);
++#endif
+
+ /*
+ * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
+@@ -6799,6 +7177,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
+ */
+ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
+ {
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * As blocked tasks retain absolute vruntime the migration needs to
+ * deal with this by subtracting the old and adding the new
+@@ -6824,7 +7203,9 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
+
+ se->vruntime -= min_vruntime;
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
++#if !defined(CONFIG_CACULE_RDB)
+ if (p->on_rq == TASK_ON_RQ_MIGRATING) {
+ /*
+ * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
+@@ -6844,6 +7225,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
+ */
+ remove_entity_load_avg(&p->se);
+ }
++#endif
+
+ /* Tell new CPU we are migrated */
+ p->se.avg.last_update_time = 0;
+@@ -6869,6 +7251,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ }
+ #endif /* CONFIG_SMP */
+
++#if !defined(CONFIG_CACULE_SCHED)
+ static unsigned long wakeup_gran(struct sched_entity *se)
+ {
+ unsigned long gran = sysctl_sched_wakeup_granularity;
+@@ -6947,6 +7330,7 @@ static void set_skip_buddy(struct sched_entity *se)
+ for_each_sched_entity(se)
+ cfs_rq_of(se)->skip = se;
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ /*
+ * Preempt the current task with a newly woken task if needed:
+@@ -6955,9 +7339,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ {
+ struct task_struct *curr = rq->curr;
+ struct sched_entity *se = &curr->se, *pse = &p->se;
++
++#if !defined(CONFIG_CACULE_SCHED)
+ struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ int scale = cfs_rq->nr_running >= sched_nr_latency;
+ int next_buddy_marked = 0;
++#endif /* CONFIG_CACULE_SCHED */
+
+ if (unlikely(se == pse))
+ return;
+@@ -6971,10 +7358,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
+ return;
+
++#if !defined(CONFIG_CACULE_SCHED)
+ if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
+ set_next_buddy(pse);
+ next_buddy_marked = 1;
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ /*
+ * We can come here with TIF_NEED_RESCHED already set from new task
+@@ -7004,6 +7393,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ find_matching_se(&se, &pse);
+ update_curr(cfs_rq_of(se));
+ BUG_ON(!pse);
++
++#ifdef CONFIG_CACULE_SCHED
++ if (entity_before(sched_clock(), &se->cacule_node, &pse->cacule_node) == 1)
++ goto preempt;
++#else
+ if (wakeup_preempt_entity(se, pse) == 1) {
+ /*
+ * Bias pick_next to pick the sched entity that is
+@@ -7013,11 +7407,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ set_next_buddy(pse);
+ goto preempt;
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ return;
+
+ preempt:
+ resched_curr(rq);
++
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Only set the backward buddy when the current task is still
+ * on the rq. This can happen when a wakeup gets interleaved
+@@ -7032,6 +7429,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+
+ if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
+ set_last_buddy(se);
++#endif /* CONFIG_CACULE_SCHED */
+ }
+
+ struct task_struct *
+@@ -7127,11 +7525,23 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
+ if (prev)
+ put_prev_task(rq, prev);
+
++#ifdef CONFIG_CACULE_RDB
++ se = pick_next_entity(cfs_rq, NULL);
++ set_next_entity(cfs_rq, se);
++
++ if (cfs_rq->head) {
++ unsigned int IS_head = calc_interactivity(sched_clock(), cfs_rq->head);
++ WRITE_ONCE(cfs_rq->IS_head, IS_head);
++ } else {
++ WRITE_ONCE(cfs_rq->IS_head, 0);
++ }
++#else
+ do {
+ se = pick_next_entity(cfs_rq, NULL);
+ set_next_entity(cfs_rq, se);
+ cfs_rq = group_cfs_rq(se);
+ } while (cfs_rq);
++#endif
+
+ p = task_of(se);
+
+@@ -7153,6 +7563,10 @@ done: __maybe_unused;
+ return p;
+
+ idle:
++#ifdef CONFIG_CACULE_RDB
++ WRITE_ONCE(cfs_rq->IS_head, 0);
++#endif
++
+ if (!rf)
+ return NULL;
+
+@@ -7206,7 +7620,10 @@ static void yield_task_fair(struct rq *rq)
+ {
+ struct task_struct *curr = rq->curr;
+ struct cfs_rq *cfs_rq = task_cfs_rq(curr);
++
++#if !defined(CONFIG_CACULE_SCHED)
+ struct sched_entity *se = &curr->se;
++#endif
+
+ /*
+ * Are we the only task in the tree?
+@@ -7214,7 +7631,9 @@ static void yield_task_fair(struct rq *rq)
+ if (unlikely(rq->nr_running == 1))
+ return;
+
++#if !defined(CONFIG_CACULE_SCHED)
+ clear_buddies(cfs_rq, se);
++#endif
+
+ if (curr->policy != SCHED_BATCH) {
+ update_rq_clock(rq);
+@@ -7230,7 +7649,9 @@ static void yield_task_fair(struct rq *rq)
+ rq_clock_skip_update(rq);
+ }
+
++#if !defined(CONFIG_CACULE_SCHED)
+ set_skip_buddy(se);
++#endif
+ }
+
+ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
+@@ -7241,8 +7662,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
+ if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
+ return false;
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /* Tell the scheduler that we'd really like pse to run next. */
+ set_next_buddy(se);
++#endif
+
+ yield_task_fair(rq);
+
+@@ -7451,6 +7874,34 @@ struct lb_env {
+ struct list_head tasks;
+ };
+
++#ifdef CONFIG_CACULE_RDB
++static int task_hot(struct rq *src_rq)
++{
++ s64 delta;
++ struct task_struct *p;
++ struct cacule_node *cn = src_rq->cfs.head;
++
++ if (!cn)
++ return 0;
++
++ p = task_of(se_of(cn));
++
++ if (p->sched_class != &fair_sched_class)
++ return 0;
++
++ if (unlikely(task_has_idle_policy(p)))
++ return 0;
++
++ if (sysctl_sched_migration_cost == -1)
++ return 1;
++ if (sysctl_sched_migration_cost == 0)
++ return 0;
++
++ delta = sched_clock() - p->se.exec_start;
++
++ return delta < (s64)sysctl_sched_migration_cost;
++}
++#else
+ /*
+ * Is this task likely cache-hot:
+ */
+@@ -7470,6 +7921,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
+ if (env->sd->flags & SD_SHARE_CPUCAPACITY)
+ return 0;
+
++#if !defined(CONFIG_CACULE_SCHED)
+ /*
+ * Buddy candidates are cache hot:
+ */
+@@ -7477,6 +7929,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
+ (&p->se == cfs_rq_of(&p->se)->next ||
+ &p->se == cfs_rq_of(&p->se)->last))
+ return 1;
++#endif
+
+ if (sysctl_sched_migration_cost == -1)
+ return 1;
+@@ -7851,6 +8304,7 @@ static void attach_tasks(struct lb_env *env)
+
+ rq_unlock(env->dst_rq, &rf);
+ }
++#endif
+
+ #ifdef CONFIG_NO_HZ_COMMON
+ static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
+@@ -7896,6 +8350,7 @@ static inline bool others_have_blocked(struct rq *rq) { return false; }
+ static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
+ #endif
+
++#if !defined(CONFIG_CACULE_RDB)
+ static bool __update_blocked_others(struct rq *rq, bool *done)
+ {
+ const struct sched_class *curr_class;
+@@ -7921,6 +8376,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
+
+ return decayed;
+ }
++#endif
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+
+@@ -8028,6 +8484,7 @@ static unsigned long task_h_load(struct task_struct *p)
+ cfs_rq_load_avg(cfs_rq) + 1);
+ }
+ #else
++#if !defined(CONFIG_CACULE_RDB)
+ static bool __update_blocked_fair(struct rq *rq, bool *done)
+ {
+ struct cfs_rq *cfs_rq = &rq->cfs;
+@@ -8039,6 +8496,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
+
+ return decayed;
+ }
++#endif
+
+ static unsigned long task_h_load(struct task_struct *p)
+ {
+@@ -8048,6 +8506,7 @@ static unsigned long task_h_load(struct task_struct *p)
+
+ static void update_blocked_averages(int cpu)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ bool decayed = false, done = true;
+ struct rq *rq = cpu_rq(cpu);
+ struct rq_flags rf;
+@@ -8062,6 +8521,7 @@ static void update_blocked_averages(int cpu)
+ if (decayed)
+ cpufreq_update_util(rq, 0);
+ rq_unlock_irqrestore(rq, &rf);
++#endif
+ }
+
+ /********** Helpers for find_busiest_group ************************/
+@@ -9224,6 +9684,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+ * different in groups.
+ */
+
++#if !defined(CONFIG_CACULE_RDB)
+ /**
+ * find_busiest_group - Returns the busiest group within the sched_domain
+ * if there is an imbalance.
+@@ -9489,6 +9950,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
+
+ return busiest;
+ }
++#endif
+
+ /*
+ * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
+@@ -9535,6 +9997,7 @@ voluntary_active_balance(struct lb_env *env)
+ return 0;
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ static int need_active_balance(struct lb_env *env)
+ {
+ struct sched_domain *sd = env->sd;
+@@ -9856,6 +10319,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
+ out:
+ return ld_moved;
+ }
++#endif
+
+ static inline unsigned long
+ get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
+@@ -9894,6 +10358,7 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
+ *next_balance = next;
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+ * active_load_balance_cpu_stop is run by the CPU stopper. It pushes
+ * running tasks off the busiest CPU onto idle CPUs. It requires at
+@@ -9985,6 +10450,7 @@ static int active_load_balance_cpu_stop(void *data)
+ }
+
+ static DEFINE_SPINLOCK(balancing);
++#endif
+
+ /*
+ * Scale the max load_balance interval with the number of CPUs in the system.
+@@ -9995,6 +10461,7 @@ void update_max_interval(void)
+ max_load_balance_interval = HZ*num_online_cpus()/10;
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+ * It checks each scheduling domain to see if it is due to be balanced,
+ * and initiates a balancing operation if so.
+@@ -10100,6 +10567,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
+ #endif
+ }
+ }
++#endif
+
+ static inline int on_null_domain(struct rq *rq)
+ {
+@@ -10133,6 +10601,7 @@ static inline int find_new_ilb(void)
+ return nr_cpu_ids;
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+ * Kick a CPU to do the nohz balancing, if it is time for it. We pick any
+ * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
+@@ -10283,6 +10752,7 @@ static void nohz_balancer_kick(struct rq *rq)
+ if (flags)
+ kick_ilb(flags);
+ }
++#endif
+
+ static void set_cpu_sd_state_busy(int cpu)
+ {
+@@ -10390,6 +10860,7 @@ void nohz_balance_enter_idle(int cpu)
+ WRITE_ONCE(nohz.has_blocked, 1);
+ }
+
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+ * Internal function that runs load balance for all idle cpus. The load balance
+ * can be a simple update of blocked load or a complete load balance with
+@@ -10550,8 +11021,10 @@ static void nohz_newidle_balance(struct rq *this_rq)
+ kick_ilb(NOHZ_STATS_KICK);
+ raw_spin_lock(&this_rq->lock);
+ }
++#endif
+
+ #else /* !CONFIG_NO_HZ_COMMON */
++#if !defined(CONFIG_CACULE_RDB)
+ static inline void nohz_balancer_kick(struct rq *rq) { }
+
+ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
+@@ -10560,8 +11033,108 @@ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle
+ }
+
+ static inline void nohz_newidle_balance(struct rq *this_rq) { }
++#endif
++
+ #endif /* CONFIG_NO_HZ_COMMON */
+
++#ifdef CONFIG_CACULE_RDB
++static int
++can_migrate_task(struct task_struct *p, int dst_cpu, struct rq *src_rq)
++{
++ if (task_running(src_rq, p))
++ return 0;
++
++ if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr))
++ return 0;
++
++ if (p->se.exec_start == 0)
++ return 0;
++
++ return 1;
++}
++
++static void pull_from_unlock(struct rq *this_rq,
++ struct rq *src_rq,
++ struct rq_flags *rf,
++ struct task_struct *p,
++ int dst_cpu)
++{
++ // detach task
++ deactivate_task(src_rq, p, DEQUEUE_NOCLOCK);
++ set_task_cpu(p, dst_cpu);
++
++ // unlock src rq
++ rq_unlock(src_rq, rf);
++ local_irq_restore(rf->flags);
++
++ // lock this rq
++ raw_spin_lock(&this_rq->lock);
++ update_rq_clock(this_rq);
++
++ activate_task(this_rq, p, ENQUEUE_NOCLOCK);
++ check_preempt_curr(this_rq, p, 0);
++
++ // unlock this rq
++ raw_spin_unlock(&this_rq->lock);
++}
++
++static inline struct rq *
++find_max_IS_rq(struct cfs_rq *cfs_rq, int dst_cpu)
++{
++ struct rq *tmp_rq, *max_rq = NULL;
++ int cpu;
++ u32 max_IS = cfs_rq->IS_head;
++ u32 local_IS;
++
++ // find max hrrn
++ for_each_online_cpu(cpu) {
++ if (cpu == dst_cpu)
++ continue;
++
++ tmp_rq = cpu_rq(cpu);
++
++ if (tmp_rq->cfs.nr_running < 2 || !tmp_rq->cfs.head)
++ continue;
++
++ /* check if cache hot */
++ if (!cpus_share_cache(cpu, dst_cpu) && task_hot(tmp_rq))
++ continue;
++
++ local_IS = READ_ONCE(tmp_rq->cfs.IS_head);
++
++ if (local_IS > max_IS) {
++ max_IS = local_IS;
++ max_rq = tmp_rq;
++ }
++ }
++
++ return max_rq;
++}
++
++static int try_pull_from(struct rq *src_rq, struct rq *this_rq)
++{
++ struct rq_flags rf;
++ int dst_cpu = cpu_of(this_rq);
++ struct task_struct *p;
++
++ rq_lock_irqsave(src_rq, &rf);
++ update_rq_clock(src_rq);
++
++ if (src_rq->cfs.head && src_rq->cfs.nr_running > 1) {
++ p = task_of(se_of(src_rq->cfs.head));
++
++ if (can_migrate_task(p, dst_cpu, src_rq)) {
++ pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu);
++ return 1;
++ }
++ }
++
++ rq_unlock(src_rq, &rf);
++ local_irq_restore(rf.flags);
++
++ return 0;
++}
++
+ /*
+ * newidle_balance is called by schedule() if this_cpu is about to become
+ * idle. Attempts to pull tasks from other CPUs.
+@@ -10572,6 +11145,105 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
+ * > 0 - success, new (fair) tasks present
+ */
+ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
++{
++ int this_cpu = this_rq->cpu;
++ struct task_struct *p = NULL;
++ struct rq *src_rq;
++ int src_cpu;
++ struct rq_flags src_rf;
++ int pulled_task = 0;
++ int cores_round = 1;
++
++ update_misfit_status(NULL, this_rq);
++ /*
++ * We must set idle_stamp _before_ calling idle_balance(), such that we
++ * measure the duration of idle_balance() as idle time.
++ */
++ this_rq->idle_stamp = rq_clock(this_rq);
++
++ /*
++ * Do not pull tasks towards !active CPUs...
++ */
++ if (!cpu_active(this_cpu))
++ return 0;
++
++ /*
++ * This is OK, because current is on_cpu, which avoids it being picked
++ * for load-balance and preemption/IRQs are still disabled avoiding
++ * further scheduler activity on it and we're being very careful to
++ * re-start the picking loop.
++ */
++ rq_unpin_lock(this_rq, rf);
++ raw_spin_unlock(&this_rq->lock);
++
++again:
++ for_each_online_cpu(src_cpu) {
++
++ if (src_cpu == this_cpu)
++ continue;
++
++ if (cores_round && !cpus_share_cache(src_cpu, this_cpu))
++ continue;
++
++ src_rq = cpu_rq(src_cpu);
++
++ rq_lock_irqsave(src_rq, &src_rf);
++ update_rq_clock(src_rq);
++
++ if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head))
++ goto next;
++
++ p = task_of(se_of(src_rq->cfs.head));
++
++ if (can_migrate_task(p, this_cpu, src_rq)) {
++ pull_from_unlock(this_rq, src_rq, &src_rf, p, this_cpu);
++
++ pulled_task = 1;
++ goto out;
++ }
++
++next:
++ rq_unlock(src_rq, &src_rf);
++ local_irq_restore(src_rf.flags);
++
++ /*
++ * Stop searching for tasks to pull if there are
++ * now runnable tasks on this rq.
++ */
++ if (pulled_task || this_rq->nr_running > 0)
++ goto out;
++ }
++
++ if (cores_round) {
++ // now search for all cpus
++ cores_round = 0;
++ goto again;
++ }
++
++out:
++ raw_spin_lock(&this_rq->lock);
++
++ /*
++ * While browsing the domains, we released the rq lock, a task could
++ * have been enqueued in the meantime. Since we're not going idle,
++ * pretend we pulled a task.
++ */
++ if (this_rq->cfs.h_nr_running && !pulled_task)
++ pulled_task = 1;
++
++ /* Is there a task of a high priority class? */
++ if (this_rq->nr_running != this_rq->cfs.h_nr_running)
++ pulled_task = -1;
++
++ if (pulled_task)
++ this_rq->idle_stamp = 0;
++
++ rq_repin_lock(this_rq, rf);
++
++ return pulled_task;
++}
++#else
++static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+ {
+ unsigned long next_balance = jiffies + HZ;
+ int this_cpu = this_rq->cpu;
+@@ -10722,6 +11394,167 @@ void trigger_load_balance(struct rq *rq)
+
+ nohz_balancer_kick(rq);
+ }
++#endif
++
++#ifdef CONFIG_CACULE_RDB
++static int
++idle_try_pull_any(struct cfs_rq *cfs_rq)
++{
++ struct task_struct *p = NULL;
++ struct rq *this_rq = rq_of(cfs_rq), *src_rq;
++ int dst_cpu = cpu_of(this_rq);
++ int src_cpu;
++ struct rq_flags rf;
++ int pulled = 0;
++ int cores_round = 1;
++
++again:
++ for_each_online_cpu(src_cpu) {
++
++ if (src_cpu == dst_cpu)
++ continue;
++
++ if (cores_round && !cpus_share_cache(src_cpu, dst_cpu))
++ continue;
++
++ src_rq = cpu_rq(src_cpu);
++
++ rq_lock_irqsave(src_rq, &rf);
++ update_rq_clock(src_rq);
++
++ if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head))
++ goto next;
++
++ p = task_of(se_of(src_rq->cfs.head));
++
++ if (can_migrate_task(p, dst_cpu, src_rq)) {
++ pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu);
++ pulled = 1;
++ goto out;
++ }
++
++next:
++ rq_unlock(src_rq, &rf);
++ local_irq_restore(rf.flags);
++ }
++
++ if (cores_round) {
++ // now search for all cpus
++ cores_round = 0;
++ goto again;
++ }
++
++out:
++ return pulled;
++}
++
++
++static int
++try_pull_higher_IS(struct cfs_rq *cfs_rq)
++{
++ struct rq *this_rq = rq_of(cfs_rq), *max_rq;
++ int dst_cpu = cpu_of(this_rq);
++
++ max_rq = find_max_IS_rq(cfs_rq, dst_cpu);
++
++ if (!max_rq)
++ return 0;
++
++ if (try_pull_from(max_rq, this_rq))
++ return 1;
++
++ return 0;
++}
++
++static void try_pull_any(struct rq *this_rq)
++{
++ struct task_struct *p = NULL;
++ struct rq *src_rq;
++ int dst_cpu = cpu_of(this_rq);
++ int src_cpu;
++ struct rq_flags src_rf;
++ int cores_round = 1;
++
++again:
++ for_each_online_cpu(src_cpu) {
++
++ if (src_cpu == dst_cpu)
++ continue;
++
++ src_rq = cpu_rq(src_cpu);
++
++ if (cores_round) {
++ if (!cpus_share_cache(src_cpu, dst_cpu))
++ continue;
++ } else if (!cpus_share_cache(src_cpu, dst_cpu) && task_hot(src_rq)) {
++ /* check if cache hot */
++ continue;
++ }
++
++ if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head)
++ || src_rq->cfs.nr_running <= this_rq->cfs.nr_running)
++ continue;
++
++ rq_lock_irqsave(src_rq, &src_rf);
++ update_rq_clock(src_rq);
++
++ if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head)
++ || src_rq->cfs.nr_running <= this_rq->cfs.nr_running)
++ goto next;
++
++ p = task_of(se_of(src_rq->cfs.head));
++
++ if (can_migrate_task(p, dst_cpu, src_rq)) {
++ pull_from_unlock(this_rq, src_rq, &src_rf, p, dst_cpu);
++ return;
++ }
++
++next:
++ rq_unlock(src_rq, &src_rf);
++ local_irq_restore(src_rf.flags);
++ }
++
++ if (cores_round) {
++ // now search for all cpus
++ cores_round = 0;
++ goto again;
++ }
++}
++
++static inline void
++active_balance(struct rq *rq)
++{
++ struct cfs_rq *cfs_rq = &rq->cfs;
++
++ if (!cfs_rq->head || cfs_rq->nr_running < 2)
++ try_pull_higher_IS(&rq->cfs);
++ else
++ try_pull_any(rq);
++}
++
++void trigger_load_balance(struct rq *rq)
++{
++ unsigned long interval = 3UL;
++
++ /* Don't need to rebalance while attached to NULL domain */
++ if (unlikely(on_null_domain(rq)))
++ return;
++
++ if (time_before(jiffies, rq->next_balance))
++ return;
++
++ if (rq->idle_balance) {
++ idle_try_pull_any(&rq->cfs);
++ }
++ else {
++ active_balance(rq);
++
++ /* scale ms to jiffies */
++ interval = msecs_to_jiffies(interval);
++ rq->next_balance = jiffies + interval;
++ }
++}
++#endif
+
+ static void rq_online_fair(struct rq *rq)
+ {
+@@ -10765,11 +11598,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ update_overutilized_status(task_rq(curr));
+ }
+
++#ifdef CONFIG_CACULE_SCHED
+ /*
+ * called on fork with the child task as argument from the parent's context
+ * - child not yet on the tasklist
+ * - preemption disabled
+ */
++ static void task_fork_fair(struct task_struct *p)
++{
++ struct cfs_rq *cfs_rq;
++ struct sched_entity *curr;
++ struct rq *rq = this_rq();
++ struct rq_flags rf;
++
++ rq_lock(rq, &rf);
++ update_rq_clock(rq);
++
++ cfs_rq = task_cfs_rq(current);
++ curr = cfs_rq->curr;
++ if (curr)
++ update_curr(cfs_rq);
++
++ rq_unlock(rq, &rf);
++}
++#else
+ static void task_fork_fair(struct task_struct *p)
+ {
+ struct cfs_rq *cfs_rq;
+@@ -10800,6 +11652,7 @@ static void task_fork_fair(struct task_struct *p)
+ se->vruntime -= cfs_rq->min_vruntime;
+ rq_unlock(rq, &rf);
+ }
++#endif /* CONFIG_CACULE_SCHED */
+
+ /*
+ * Priority of the task has changed. Check to see if we preempt
+@@ -10876,9 +11729,12 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
+ }
+ }
+ #else
++#if !defined(CONFIG_CACULE_RDB)
+ static void propagate_entity_cfs_rq(struct sched_entity *se) { }
+ #endif
++#endif
+
++#if !defined(CONFIG_CACULE_RDB)
+ static void detach_entity_cfs_rq(struct sched_entity *se)
+ {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+@@ -10889,9 +11745,11 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
+ update_tg_load_avg(cfs_rq);
+ propagate_entity_cfs_rq(se);
+ }
++#endif
+
+ static void attach_entity_cfs_rq(struct sched_entity *se)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -10907,11 +11765,15 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
+ attach_entity_load_avg(cfs_rq, se);
+ update_tg_load_avg(cfs_rq);
+ propagate_entity_cfs_rq(se);
++#endif
+ }
+
+ static void detach_task_cfs_rq(struct task_struct *p)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct sched_entity *se = &p->se;
++
++#if !defined(CONFIG_CACULE_SCHED)
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ if (!vruntime_normalized(p)) {
+@@ -10922,19 +11784,28 @@ static void detach_task_cfs_rq(struct task_struct *p)
+ place_entity(cfs_rq, se, 0);
+ se->vruntime -= cfs_rq->min_vruntime;
+ }
++#endif
+
+ detach_entity_cfs_rq(se);
++#endif
+ }
+
+ static void attach_task_cfs_rq(struct task_struct *p)
+ {
++#if !defined(CONFIG_CACULE_RDB)
+ struct sched_entity *se = &p->se;
++
++#if !defined(CONFIG_CACULE_SCHED)
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
++#endif
+
+ attach_entity_cfs_rq(se);
+
++#if !defined(CONFIG_CACULE_SCHED)
+ if (!vruntime_normalized(p))
+ se->vruntime += cfs_rq->min_vruntime;
++#endif
++#endif
+ }
+
+ static void switched_from_fair(struct rq *rq, struct task_struct *p)
+@@ -10990,13 +11861,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
+ void init_cfs_rq(struct cfs_rq *cfs_rq)
+ {
+ cfs_rq->tasks_timeline = RB_ROOT_CACHED;
++
++#if !defined(CONFIG_CACULE_SCHED)
+ cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+ #ifndef CONFIG_64BIT
+ cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+ #endif
++#endif /* CONFIG_CACULE_SCHED */
++
+ #ifdef CONFIG_SMP
+ raw_spin_lock_init(&cfs_rq->removed.lock);
+ #endif
++
++#ifdef CONFIG_CACULE_SCHED
++ cfs_rq->head = NULL;
++ cfs_rq->tail = NULL;
++#endif
+ }
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -11321,7 +12201,9 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
+ __init void init_sched_fair_class(void)
+ {
+ #ifdef CONFIG_SMP
++#if !defined(CONFIG_CACULE_RDB)
+ open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
++#endif
+
+ #ifdef CONFIG_NO_HZ_COMMON
+ nohz.next_balance = jiffies;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 282a6bbaacd7..ee0e31c1ce16 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -516,10 +516,13 @@ struct cfs_rq {
+ unsigned int idle_h_nr_running; /* SCHED_IDLE */
+
+ u64 exec_clock;
++
++#if !defined(CONFIG_CACULE_SCHED)
+ u64 min_vruntime;
+ #ifndef CONFIG_64BIT
+ u64 min_vruntime_copy;
+ #endif
++#endif // CONFIG_CACULE_SCHED
+
+ struct rb_root_cached tasks_timeline;
+
+@@ -528,9 +531,19 @@ struct cfs_rq {
+ * It is set to NULL otherwise (i.e when none are currently running).
+ */
+ struct sched_entity *curr;
++#ifdef CONFIG_CACULE_SCHED
++ struct cacule_node *head;
++ struct cacule_node *tail;
++
++#ifdef CONFIG_CACULE_RDB
++ unsigned int IS_head;
++#endif
++
++#else
+ struct sched_entity *next;
+ struct sched_entity *last;
+ struct sched_entity *skip;
++#endif // CONFIG_CACULE_SCHED
+
+ #ifdef CONFIG_SCHED_DEBUG
+ unsigned int nr_spread_over;
+@@ -2094,7 +2107,12 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
+
+ extern const_debug unsigned int sysctl_sched_nr_migrate;
++
++#ifdef CONFIG_CACULE_RDB
++extern unsigned int sysctl_sched_migration_cost;
++#else
+ extern const_debug unsigned int sysctl_sched_migration_cost;
++#endif
+
+ #ifdef CONFIG_SCHED_HRTICK
+
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 62fbd09b5dc1..c6b24b552656 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1659,6 +1659,38 @@ static struct ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
++#ifdef CONFIG_CACULE_SCHED
++ {
++ .procname = "sched_interactivity_factor",
++ .data = &interactivity_factor,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++ {
++ .procname = "sched_max_lifetime_ms",
++ .data = &cacule_max_lifetime,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++ {
++ .procname = "sched_harsh_mode_enabled",
++ .data = &cacule_harsh_mode,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++#endif
++#if defined(CONFIG_CACULE_RDB) || defined(CONFIG_SCHED_DEBUG)
++ {
++ .procname = "sched_migration_cost_ns",
++ .data = &sysctl_sched_migration_cost,
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec,
++ },
++#endif
+ #ifdef CONFIG_SCHED_DEBUG
+ {
+ .procname = "sched_min_granularity_ns",
+@@ -1697,13 +1729,6 @@ static struct ctl_table kern_table[] = {
+ .extra1 = &min_sched_tunable_scaling,
+ .extra2 = &max_sched_tunable_scaling,
+ },
+- {
+- .procname = "sched_migration_cost_ns",
+- .data = &sysctl_sched_migration_cost,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec,
+- },
+ {
+ .procname = "sched_nr_migrate",
+ .data = &sysctl_sched_nr_migrate,