diff options
author | Scott B | 2022-02-10 06:43:47 -0800 |
---|---|---|
committer | Scott B | 2022-02-11 01:16:47 -0800 |
commit | 9b76b5b3d4d1ba39610587853b053e471e51137c (patch) | |
tree | a0f83afc66666bdba9f60cddec6e6b1bf128d6a6 | |
parent | 5dac791642c88b593c71bfd50c7bf568aa6e5f83 (diff) | |
download | aur-9b76b5b3d4d1ba39610587853b053e471e51137c.tar.gz |
pkg: revert Xanmod EAS scheduler changes
-rw-r--r-- | .SRCINFO | 2 | ||||
-rw-r--r-- | PKGBUILD | 4 | ||||
-rw-r--r-- | Revert-XANMOD-fair-Remove-all-energy-efficiency-functions.patch | 304 |
3 files changed, 310 insertions, 0 deletions
@@ -25,6 +25,7 @@ pkgbase = linux-xanmod-rog source = Bluetooth-btintel-Fix-bdaddress-comparison-with-garb.patch source = Bluetooth-Read-codec-capabilities-only-if-supported.patch source = Bluetooth-fix-deadlock-for-RFCOMM-sk-state-change.patch + source = Revert-XANMOD-fair-Remove-all-energy-efficiency-functions.patch source = udp-ipv6-optimisations-v2-net-next.patch source = af_unix-Replace-unix_table_lock-with-per-hash-locks.patch source = CONFIG_RCU_FAST_NO_HZ-removal-for-v5.17.patch @@ -54,6 +55,7 @@ pkgbase = linux-xanmod-rog sha256sums = 241f01f06849fcec462d72355ca3ab6bd34931731dec89876d785912ac532398 sha256sums = dd01bd3f774c3a9af42b6d89f534f39c4a5f200db32cd6d4b72a29325645100e sha256sums = a9647897e59b04cb883dcf649b3108e9397d5a6c672bc545ea0c6bb7bb30d5a9 + sha256sums = 3bb1cf422c64b4eea324b71048d0bdee04b5f9132136c6a4774e5205e45c46f1 sha256sums = 56f8f93a38ed7236c2504c79645a33123ee7bdf3c0cbb97dfd90600df06be7dd sha256sums = eac568d7f2b369a8a2b685fae236388c3516938000d4d6b2116b5874c4f93430 sha256sums = e74649fb883f2c8c3703c730e45119fca7126dc0599d6bc814de6dcf5a07e7cb @@ -114,6 +114,9 @@ source=("https://cdn.kernel.org/pub/linux/kernel/v${_branch}/linux-${_major}.tar "Bluetooth-Read-codec-capabilities-only-if-supported.patch" "Bluetooth-fix-deadlock-for-RFCOMM-sk-state-change.patch" + # Revert Xanmod scheduler power efficiency removal + "Revert-XANMOD-fair-Remove-all-energy-efficiency-functions.patch" + # amd-pstate included in Xanmod # multigenerational lru included in Xanmod # 5.17 TCP Optimizations included in Xanmod @@ -176,6 +179,7 @@ sha256sums=('027d7e8988bb69ac12ee92406c3be1fe13f990b1ca2249e226225cd1573308bb' '241f01f06849fcec462d72355ca3ab6bd34931731dec89876d785912ac532398' 'dd01bd3f774c3a9af42b6d89f534f39c4a5f200db32cd6d4b72a29325645100e' 'a9647897e59b04cb883dcf649b3108e9397d5a6c672bc545ea0c6bb7bb30d5a9' + '3bb1cf422c64b4eea324b71048d0bdee04b5f9132136c6a4774e5205e45c46f1' '56f8f93a38ed7236c2504c79645a33123ee7bdf3c0cbb97dfd90600df06be7dd' 'eac568d7f2b369a8a2b685fae236388c3516938000d4d6b2116b5874c4f93430' 'e74649fb883f2c8c3703c730e45119fca7126dc0599d6bc814de6dcf5a07e7cb' diff --git a/Revert-XANMOD-fair-Remove-all-energy-efficiency-functions.patch b/Revert-XANMOD-fair-Remove-all-energy-efficiency-functions.patch new file mode 100644 index 000000000000..25ca555df848 --- /dev/null +++ b/Revert-XANMOD-fair-Remove-all-energy-efficiency-functions.patch @@ -0,0 +1,304 @@ +From fb398de362e0dd83013990ceebe79d9b4e2438bd Mon Sep 17 00:00:00 2001 +From: Scott B <arglebargle@arglebargle.dev> +Date: Thu, 10 Feb 2022 05:48:41 -0800 +Subject: [PATCH] Revert "XANMOD: fair: Remove all energy efficiency functions" + +This reverts commit 05bdfcdb4122ff03c18c3f3e9ba5c59684484ef8. +--- + kernel/sched/fair.c | 273 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 273 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index b9f607aeee97..069e01772d92 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6609,6 +6609,271 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) + return min_t(unsigned long, util, capacity_orig_of(cpu)); + } + ++/* ++ * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued) ++ * to @dst_cpu. ++ */ ++static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu) ++{ ++ struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs; ++ unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg); ++ ++ /* ++ * If @p migrates from @cpu to another, remove its contribution. Or, ++ * if @p migrates from another CPU to @cpu, add its contribution. In ++ * the other cases, @cpu is not impacted by the migration, so the ++ * util_avg should already be correct. ++ */ ++ if (task_cpu(p) == cpu && dst_cpu != cpu) ++ lsub_positive(&util, task_util(p)); ++ else if (task_cpu(p) != cpu && dst_cpu == cpu) ++ util += task_util(p); ++ ++ if (sched_feat(UTIL_EST)) { ++ util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued); ++ ++ /* ++ * During wake-up, the task isn't enqueued yet and doesn't ++ * appear in the cfs_rq->avg.util_est.enqueued of any rq, ++ * so just add it (if needed) to "simulate" what will be ++ * cpu_util() after the task has been enqueued. ++ */ ++ if (dst_cpu == cpu) ++ util_est += _task_util_est(p); ++ ++ util = max(util, util_est); ++ } ++ ++ return min(util, capacity_orig_of(cpu)); ++} ++ ++/* ++ * compute_energy(): Estimates the energy that @pd would consume if @p was ++ * migrated to @dst_cpu. compute_energy() predicts what will be the utilization ++ * landscape of @pd's CPUs after the task migration, and uses the Energy Model ++ * to compute what would be the energy if we decided to actually migrate that ++ * task. ++ */ ++static long ++compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) ++{ ++ struct cpumask *pd_mask = perf_domain_span(pd); ++ unsigned long cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask)); ++ unsigned long max_util = 0, sum_util = 0; ++ unsigned long _cpu_cap = cpu_cap; ++ int cpu; ++ ++ _cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask)); ++ ++ /* ++ * The capacity state of CPUs of the current rd can be driven by CPUs ++ * of another rd if they belong to the same pd. So, account for the ++ * utilization of these CPUs too by masking pd with cpu_online_mask ++ * instead of the rd span. ++ * ++ * If an entire pd is outside of the current rd, it will not appear in ++ * its pd list and will not be accounted by compute_energy(). ++ */ ++ for_each_cpu_and(cpu, pd_mask, cpu_online_mask) { ++ unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu); ++ unsigned long cpu_util, util_running = util_freq; ++ struct task_struct *tsk = NULL; ++ ++ /* ++ * When @p is placed on @cpu: ++ * ++ * util_running = max(cpu_util, cpu_util_est) + ++ * max(task_util, _task_util_est) ++ * ++ * while cpu_util_next is: max(cpu_util + task_util, ++ * cpu_util_est + _task_util_est) ++ */ ++ if (cpu == dst_cpu) { ++ tsk = p; ++ util_running = ++ cpu_util_next(cpu, p, -1) + task_util_est(p); ++ } ++ ++ /* ++ * Busy time computation: utilization clamping is not ++ * required since the ratio (sum_util / cpu_capacity) ++ * is already enough to scale the EM reported power ++ * consumption at the (eventually clamped) cpu_capacity. ++ */ ++ cpu_util = effective_cpu_util(cpu, util_running, cpu_cap, ++ ENERGY_UTIL, NULL); ++ ++ sum_util += min(cpu_util, _cpu_cap); ++ ++ /* ++ * Performance domain frequency: utilization clamping ++ * must be considered since it affects the selection ++ * of the performance domain frequency. ++ * NOTE: in case RT tasks are running, by default the ++ * FREQUENCY_UTIL's utilization can be max OPP. ++ */ ++ cpu_util = effective_cpu_util(cpu, util_freq, cpu_cap, ++ FREQUENCY_UTIL, tsk); ++ max_util = max(max_util, min(cpu_util, _cpu_cap)); ++ } ++ ++ return em_cpu_energy(pd->em_pd, max_util, sum_util, _cpu_cap); ++} ++ ++/* ++ * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the ++ * waking task. find_energy_efficient_cpu() looks for the CPU with maximum ++ * spare capacity in each performance domain and uses it as a potential ++ * candidate to execute the task. Then, it uses the Energy Model to figure ++ * out which of the CPU candidates is the most energy-efficient. ++ * ++ * The rationale for this heuristic is as follows. In a performance domain, ++ * all the most energy efficient CPU candidates (according to the Energy ++ * Model) are those for which we'll request a low frequency. When there are ++ * several CPUs for which the frequency request will be the same, we don't ++ * have enough data to break the tie between them, because the Energy Model ++ * only includes active power costs. With this model, if we assume that ++ * frequency requests follow utilization (e.g. using schedutil), the CPU with ++ * the maximum spare capacity in a performance domain is guaranteed to be among ++ * the best candidates of the performance domain. ++ * ++ * In practice, it could be preferable from an energy standpoint to pack ++ * small tasks on a CPU in order to let other CPUs go in deeper idle states, ++ * but that could also hurt our chances to go cluster idle, and we have no ++ * ways to tell with the current Energy Model if this is actually a good ++ * idea or not. So, find_energy_efficient_cpu() basically favors ++ * cluster-packing, and spreading inside a cluster. That should at least be ++ * a good thing for latency, and this is consistent with the idea that most ++ * of the energy savings of EAS come from the asymmetry of the system, and ++ * not so much from breaking the tie between identical CPUs. That's also the ++ * reason why EAS is enabled in the topology code only for systems where ++ * SD_ASYM_CPUCAPACITY is set. ++ * ++ * NOTE: Forkees are not accepted in the energy-aware wake-up path because ++ * they don't have any useful utilization data yet and it's not possible to ++ * forecast their impact on energy consumption. Consequently, they will be ++ * placed by find_idlest_cpu() on the least loaded CPU, which might turn out ++ * to be energy-inefficient in some use-cases. The alternative would be to ++ * bias new tasks towards specific types of CPUs first, or to try to infer ++ * their util_avg from the parent task, but those heuristics could hurt ++ * other use-cases too. So, until someone finds a better way to solve this, ++ * let's keep things simple by re-using the existing slow path. ++ */ ++static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) ++{ ++ unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; ++ struct root_domain *rd = cpu_rq(smp_processor_id())->rd; ++ int cpu, best_energy_cpu = prev_cpu, target = -1; ++ unsigned long cpu_cap, util, base_energy = 0; ++ struct sched_domain *sd; ++ struct perf_domain *pd; ++ ++ rcu_read_lock(); ++ pd = rcu_dereference(rd->pd); ++ if (!pd || READ_ONCE(rd->overutilized)) ++ goto unlock; ++ ++ /* ++ * Energy-aware wake-up happens on the lowest sched_domain starting ++ * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu. ++ */ ++ sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity)); ++ while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) ++ sd = sd->parent; ++ if (!sd) ++ goto unlock; ++ ++ target = prev_cpu; ++ ++ sync_entity_load_avg(&p->se); ++ if (!task_util_est(p)) ++ goto unlock; ++ ++ for (; pd; pd = pd->next) { ++ unsigned long cur_delta, spare_cap, max_spare_cap = 0; ++ bool compute_prev_delta = false; ++ unsigned long base_energy_pd; ++ int max_spare_cap_cpu = -1; ++ ++ for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { ++ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) ++ continue; ++ ++ util = cpu_util_next(cpu, p, cpu); ++ cpu_cap = capacity_of(cpu); ++ spare_cap = cpu_cap; ++ lsub_positive(&spare_cap, util); ++ ++ /* ++ * Skip CPUs that cannot satisfy the capacity request. ++ * IOW, placing the task there would make the CPU ++ * overutilized. Take uclamp into account to see how ++ * much capacity we can get out of the CPU; this is ++ * aligned with sched_cpu_util(). ++ */ ++ util = uclamp_rq_util_with(cpu_rq(cpu), util, p); ++ if (!fits_capacity(util, cpu_cap)) ++ continue; ++ ++ if (cpu == prev_cpu) { ++ /* Always use prev_cpu as a candidate. */ ++ compute_prev_delta = true; ++ } else if (spare_cap > max_spare_cap) { ++ /* ++ * Find the CPU with the maximum spare capacity ++ * in the performance domain. ++ */ ++ max_spare_cap = spare_cap; ++ max_spare_cap_cpu = cpu; ++ } ++ } ++ ++ if (max_spare_cap_cpu < 0 && !compute_prev_delta) ++ continue; ++ ++ /* Compute the 'base' energy of the pd, without @p */ ++ base_energy_pd = compute_energy(p, -1, pd); ++ base_energy += base_energy_pd; ++ ++ /* Evaluate the energy impact of using prev_cpu. */ ++ if (compute_prev_delta) { ++ prev_delta = compute_energy(p, prev_cpu, pd); ++ if (prev_delta < base_energy_pd) ++ goto unlock; ++ prev_delta -= base_energy_pd; ++ best_delta = min(best_delta, prev_delta); ++ } ++ ++ /* Evaluate the energy impact of using max_spare_cap_cpu. */ ++ if (max_spare_cap_cpu >= 0) { ++ cur_delta = compute_energy(p, max_spare_cap_cpu, pd); ++ if (cur_delta < base_energy_pd) ++ goto unlock; ++ cur_delta -= base_energy_pd; ++ if (cur_delta < best_delta) { ++ best_delta = cur_delta; ++ best_energy_cpu = max_spare_cap_cpu; ++ } ++ } ++ } ++ rcu_read_unlock(); ++ ++ /* ++ * Pick the best CPU if prev_cpu cannot be used, or if it saves at ++ * least 6% of the energy used by prev_cpu. ++ */ ++ if ((prev_delta == ULONG_MAX) || ++ (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4)) ++ target = best_energy_cpu; ++ ++ return target; ++ ++unlock: ++ rcu_read_unlock(); ++ ++ return target; ++} ++ + /* + * select_task_rq_fair: Select target runqueue for the waking task in domains + * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE, +@@ -6636,6 +6901,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) + lockdep_assert_held(&p->pi_lock); + if (wake_flags & WF_TTWU) { + record_wakee(p); ++ ++ if (sched_energy_enabled()) { ++ new_cpu = find_energy_efficient_cpu(p, prev_cpu); ++ if (new_cpu >= 0) ++ return new_cpu; ++ new_cpu = prev_cpu; ++ } ++ + want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); + } + +-- +2.35.1 + |