From 6c03ea3961fd475bd19ef61665c58006c1e86d72 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Feb 2022 17:16:34 +0100 Subject: [PATCH 5/9] tick/rcu: Stop allowing RCU_SOFTIRQ in idle RCU_SOFTIRQ used to be special in that it could be raised on purpose within the idle path to prevent from stopping the tick. Some code still prevents from unnecessary warnings related to this specific behaviour while entering in dynticks-idle mode. However the nohz layout has changed quite a bit in ten years, and the removal of CONFIG_RCU_FAST_NO_HZ has been the final straw to this safe-conduct. Now the RCU_SOFTIRQ vector is expected to be raised from sane places. A remaining corner case is admitted though when the vector is invoked in fragile hotplug path. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Paul Menzel --- include/linux/interrupt.h | 8 ++++++- kernel/time/tick-sched.c | 50 +++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c..9613326d2f8a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,13 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * Ignoring the RCU vector after ksoftirqd is parked is fine + * because: + * 1) rcutree_migrate_callbacks() takes care of the queue. + * 2) rcu_report_dead() reports the final quiescent states. + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4c62fa0f70f1..3506f6ed790c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -999,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1025,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /* -- 2.36.0