From 5a67018a9b1ba4b86296e8835c3a7eba4a3b2251 Mon Sep 17 00:00:00 2001
From: Scott B <arglebargle@arglebargle.dev>
Date: Tue, 8 Feb 2022 03:49:03 -0800
Subject: [PATCH] implement threaded console printing

see: https://lore.kernel.org/lkml/20220207194323.273637-1-john.ogness@linutronix.de/

Squashed commit of the following:

commit a3c7ea4824b8fdfe376e560bb58935882173c6cf
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:23 2022 +0106

    console: introduce CON_MIGHT_SLEEP for vt

    Deadlocks and the framebuffer console have been a recurring issue
    that is getting worse. Daniel Vetter suggested [0] that
    fbcon->write() should no longer be called from an atomic context.

    Introduce a new console flag CON_MIGHT_SLEEP for a console driver to
    specify that it is only called from sleepable contexts. Set the
    fbcon to use this new flag.

    [0] https://lore.kernel.org/all/YYuS1uNhxWOEX1Ci@phenom.ffwll.local

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit 53c5471906324493450e76a8f8b63aae8d289a89
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:22 2022 +0106

    printk: remove @console_locked

    The static global variable @console_locked is used to help debug VT
    code to make sure that certain code paths are running with the
    console_lock held. However, this information is also available with
    the static global variable @consoles_paused (for locking via
    console_lock()), and the static global variable @console_lock_count
    (for locking via console_trylock()).

    Remove @console_locked and update is_console_locked() to use the
    alternative variables.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit ec942196284e6e5cef26634c78d894d78f7f3f3c
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:21 2022 +0106

    printk: reimplement console_lock for proper kthread support

    With non-threaded console printers preemption is disabled while
    holding the console lock in order to avoid the situation where the
    console printer is scheduled away and no other task can lock the
    console (for printing or otherwise). Disabling preemption is
    necessary because the console lock is implemented purely as a
    semaphore, which has no owner.

    Like non-threaded console printers, kthread printers use the
    console lock to synchronize during printing. However, since they
    use console_lock() instead of a best-effort console_trylock(), it
    is not possible to disable preemption upon locking. Therefore an
    alternative for synchronizing and avoiding the above mentioned
    situation is needed.

    The kthread printers do not need to synchronize against each other,
    but they do need to synchronize against console_lock() callers. To
    provide this synchronization, introduce a per-console mutex. The
    mutex is taken by the kthread printer during printing and is also
    taken by console_lock() callers. Since mutexes have owners, when
    calling console_lock(), the scheduler is able to schedule any
    kthread printers that may have been preempted while printing.

    Rather than console_lock() callers holding the per-console mutex
    for the duration of the console lock, the per-console mutex is only
    taken in order to set a new CON_PAUSED flag, which is checked by
    the kthread printers. This avoids any issues due to nested locking
    between the various per-console mutexes.

    The kthread printers must also synchronize against console_trylock()
    callers. Since console_trylock() is non-blocking, a global atomic
    counter will be used to identify if any kthread printers are active.
    The kthread printers will also check the atomic counter to identify
    if the console has been locked by another task via
    console_trylock().

    A locking overview for console_lock(), console_trylock(), and the
    kthread printers is as follows (pseudo code):

    console_lock()
    {
    	down(&console_sem);
    	for_each_console(con) {
    		mutex_lock(&con->lock);
    		con->flags |= CON_PAUSED;
    		mutex_unlock(&con->lock);
    	}
    }

    console_trylock()
    {
    	assert(down_trylock(&console_sem));
    	assert(atomic_cmpxchg(&console_lock_count, 0, -1));
    }

    kthread_printer()
    {
    	mutex_lock(&con->lock);
    	assert(con->flags & CON_PAUSED);
    	assert(atomic_inc_unless_negative(&console_lock_count));
    	con->write();
    	atomic_dec(&console_lock_count);
    	mutex_unlock(&con->lock);
    }

    Also note that the console owner and waiter logic now only applies
    between contexts that have both taken the console lock via
    console_trylock(). This is for 2 reasons:

    1. Contexts that have taken the console lock via console_lock()
       require a sleepable context when unlocking to unpause the kthread
       printers. But a waiter context has used console_trylock() and
       may not be sleepable.

    2. The kthread printers no longer acquire the console lock, so it is
       not possible to handover the console lock.

    This also has implications for console_unlock(), which attempts a
    console_trylock() before returning. Introduce
    console_trylock_sched() to allow console_unlock() to specify if it
    is in a sleepable context.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit 1a562cebbb95792d0d616fe6131bdd1dd5536679
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:20 2022 +0106

    printk: add kthread console printers

    Create a kthread for each console to perform console printing. During
    normal operation (@system_state == SYSTEM_RUNNING), the kthread
    printers are responsible for all printing on their respective
    consoles.

    During non-normal operation, console printing is done as it has been:
    within the context of the printk caller or within irq work triggered
    by the printk caller.

    Console printers synchronize against each other and against console
    lockers by taking the console lock for each message that is printed.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit 5e5106fe642456c007ac3c7dfa3bde4839462ad7
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:19 2022 +0106

    printk: add functions to allow direct printing

    Once kthread printing is introduced, console printing will no longer
    occur in the context of the printk caller. However, there are some
    special contexts where it is desirable for the printk caller to
    directly print out kernel messages. Using pr_flush() to wait for
    threaded printers is only possible if the caller is in a sleepable
    context and the kthreads are active. That is not always the case.

    Introduce printk_direct_enter() and printk_direct_exit() functions
    to explicitly (and globally) activate/deactivate direct console
    printing.

    Activate direct printing for:
     - sysrq
     - emergency reboot/shutdown
     - cpu and rcu stalls
     - hard and soft lockups
     - hung tasks
     - stack dumps

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit 333aa5c4e14c83839ae5455c7ee80ba8668da3eb
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:18 2022 +0106

    printk: add pr_flush()

    Provide a might-sleep function to allow waiting for threaded console
    printers to catch up to the latest logged message.

    Use pr_flush() whenever it is desirable to get buffered messages
    printed before continuing: suspend_console(), resume_console(),
    console_stop(), console_start(), console_unblank().

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit fd7f3d7adfc7cd8d18b2dc82f110aa6c9e4d0240
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:17 2022 +0106

    printk: move buffer definitions into console_emit_next_record() caller

    Extended consoles print extended messages and do not print messages about
    dropped records.

    Non-extended consoles print "normal" messages as well as extra messages
    about dropped records.

    Currently the buffers for these various message types are defined within
    the functions that might use them and their usage is based upon the
    CON_EXTENDED flag. This will be a problem when moving to kthread printers
    because each printer must be able to provide its own buffers.

    Move all the message buffer definitions outside of
    console_emit_next_record(). The caller knows if extended or dropped
    messages should be printed and can specify the appropriate buffers to
    use. The console_emit_next_record() and call_console_driver() functions
    can know what to print based on whether specified buffers are non-NULL.

    With this change, buffer definition/allocation/specification is separated
    from the code that does the various types of string printing.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit cf0aefeb7b956ebc9a699e8c31381134ae667773
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:16 2022 +0106

    printk: refactor and rework printing logic

    Refactor/rework printing logic in order to prepare for moving to threaded
    console printing.

    - Move @console_seq into struct console so that the current "position" of
      each console can be tracked individually.

    - Move @console_dropped into struct console so that the current drop count
      of each console can be tracked individually.

    - Modify printing logic so that each console independently loads, prepares,
      prints, and delays its next record.

    - Remove exclusive_console logic. Since console positions are handled
      independently, replaying past records occurs naturally.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit 04b01131cdddc66e138dbeaab6e18f87cfda65a8
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:15 2022 +0106

    printk: call boot_delay_msec() in printk_delay()

    boot_delay_msec() is always called immediately before printk_delay()
    so just call it from within printk_delay().

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit b8ddb3cdb9804d25527fd28c90530ddc1242e1c6
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:14 2022 +0106

    printk: get caller_id/timestamp after migration disable

    Currently the local CPU timestamp and caller_id for the record are
    collected while migration is enabled. Since this information is
    CPU-specific, it should be collected with migration disabled.

    Migration is disabled immediately after collecting this information
    anyway, so just move the information collection to after the
    migration disabling.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit 4674d1a71101187a43a54f953af13a54f7c5ddb0
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:13 2022 +0106

    printk: use percpu flag instead of cpu_online()

    The CON_ANYTIME console flag is used to label consoles that will
    work correctly before percpu resources are allocated. To check
    the condition, cpu_online(raw_smp_processor_id()) was used.
    However, this is odd because CPUs can go offline at a later point.
    Also, the function is forced to use the raw_ variant because
    migration is not disabled.

    Since commit ab6f762f0f53 ("printk: queue wake_up_klogd irq_work
    only if per-CPU areas are ready") there is a variable to identify
    if percpu resources have been allocated. Use that variable instead
    of cpu_online(raw_smp_processor_id()).

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit e635a66758c7419ff56067be17274ab1292337ab
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:12 2022 +0106

    printk: cpu sync always disable interrupts

    The CPU sync functions are a NOP for !CONFIG_SMP. But for
    !CONFIG_SMP they still need to disable interrupts in order to
    preserve context within the CPU sync sections.

    Signed-off-by: John Ogness <john.ogness@linutronix.de>

commit a7951e00f27943a43c4844e385cd5433f93b0aee
Author: John Ogness <john.ogness@linutronix.de>
Date:   Mon Feb 7 20:49:11 2022 +0106

    printk: rename cpulock functions

    Since the printk cpulock is CPU-reentrant and since it is used
    in all contexts, its usage must be carefully considered and
    most likely will require programming locklessly. To avoid
    mistaking the printk cpulock as a typical lock, rename it to
    cpu_sync. The main functions then become:

        printk_cpu_sync_get_irqsave(flags);
        printk_cpu_sync_put_irqrestore(flags);

    Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 drivers/tty/sysrq.c     |   2 +
 drivers/tty/vt/vt.c     |   2 +-
 include/linux/console.h |  22 +-
 include/linux/printk.h  |  70 ++--
 kernel/hung_task.c      |  11 +-
 kernel/printk/printk.c  | 903 +++++++++++++++++++++++++++++-----------
 kernel/rcu/tree_stall.h |   2 +
 kernel/reboot.c         |  14 +-
 kernel/watchdog.c       |   4 +
 kernel/watchdog_hld.c   |   4 +
 lib/dump_stack.c        |   6 +-
 lib/nmi_backtrace.c     |   6 +-
 12 files changed, 765 insertions(+), 281 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index bbfd004449b5..a809f56c392f 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -594,9 +594,11 @@ void __handle_sysrq(int key, bool check_mask)
 		 * should not) and is the invoked operation enabled?
 		 */
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
+			printk_direct_enter();
 			pr_info("%s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
 			op_p->handler(key);
+			printk_direct_exit();
 		} else {
 			pr_info("This sysrq operation is disabled.\n");
 			console_loglevel = orig_log_level;
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 7359c3e80d63..ab4712cc9327 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -3161,7 +3161,7 @@ static struct console vt_console_driver = {
 	.write		= vt_console_print,
 	.device		= vt_console_device,
 	.unblank	= unblank_screen,
-	.flags		= CON_PRINTBUFFER,
+	.flags		= CON_PRINTBUFFER|CON_MIGHT_SLEEP,
 	.index		= -1,
 };
 #endif
diff --git a/include/linux/console.h b/include/linux/console.h
index a97f277cfdfa..2e65f7e1b5ee 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,7 @@
 
 #include <linux/atomic.h>
 #include <linux/types.h>
+#include <linux/mutex.h>
 
 struct vc_data;
 struct console_font_op;
@@ -133,9 +134,11 @@ static inline int con_debug_leave(void)
 #define CON_CONSDEV	(2) /* Preferred console, /dev/console */
 #define CON_ENABLED	(4)
 #define CON_BOOT	(8)
-#define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
+#define CON_ANYTIME	(16) /* Safe to call before per-cpu resources ready */
 #define CON_BRL		(32) /* Used for a braille device */
 #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
+#define CON_PAUSED	(128) /* Sleep while console is locked */
+#define CON_MIGHT_SLEEP	(256) /* Can only be called from sleepable context */
 
 struct console {
 	char	name[16];
@@ -151,6 +154,23 @@ struct console {
 	int	cflag;
 	uint	ispeed;
 	uint	ospeed;
+	u64	seq;
+	unsigned long dropped;
+	struct task_struct *thread;
+
+	/*
+	 * The per-console lock is used by printing kthreads to synchronize
+	 * this console with callers of console_lock(). This is necessary in
+	 * order to allow printing kthreads to run in parallel to each other,
+	 * while each safely accessing their own @flags and synchronizing
+	 * against direct printing via console_lock/console_unlock.
+	 *
+	 * Note: For synchronizing against direct printing via
+	 *       console_trylock/console_unlock, see the static global
+	 *       variable @console_lock_count.
+	 */
+	struct mutex lock;
+
 	void	*data;
 	struct	 console *next;
 };
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9497f6b98339..eedf7546ff89 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -170,6 +170,11 @@ extern void __printk_safe_exit(void);
 #define printk_deferred_enter __printk_safe_enter
 #define printk_deferred_exit __printk_safe_exit
 
+extern void printk_direct_enter(void);
+extern void printk_direct_exit(void);
+
+extern bool pr_flush(int timeout_ms, bool reset_on_progress);
+
 /*
  * Please don't use printk_ratelimit(), because it shares ratelimiting state
  * with all other unrelated printk_ratelimit() callsites.  Instead use
@@ -224,6 +229,19 @@ static inline void printk_deferred_exit(void)
 {
 }
 
+static inline void printk_direct_enter(void)
+{
+}
+
+static inline void printk_direct_exit(void)
+{
+}
+
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+	return true;
+}
+
 static inline int printk_ratelimit(void)
 {
 	return 0;
@@ -281,45 +299,45 @@ static inline void printk_trigger_flush(void)
 #endif
 
 #ifdef CONFIG_SMP
-extern int __printk_cpu_trylock(void);
-extern void __printk_wait_on_cpu_lock(void);
-extern void __printk_cpu_unlock(void);
+extern int __printk_cpu_sync_try_get(void);
+extern void __printk_cpu_sync_wait(void);
+extern void __printk_cpu_sync_put(void);
+
+#else
+
+#define __printk_cpu_sync_try_get() true
+#define __printk_cpu_sync_wait()
+#define __printk_cpu_sync_put()
+#endif /* CONFIG_SMP */
 
 /**
- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
- *                             lock and disable interrupts.
+ * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk
+ *                                 cpu-reentrant spinning lock.
  * @flags: Stack-allocated storage for saving local interrupt state,
- *         to be passed to printk_cpu_unlock_irqrestore().
+ *         to be passed to printk_cpu_sync_put_irqrestore().
  *
  * If the lock is owned by another CPU, spin until it becomes available.
  * Interrupts are restored while spinning.
  */
-#define printk_cpu_lock_irqsave(flags)		\
-	for (;;) {				\
-		local_irq_save(flags);		\
-		if (__printk_cpu_trylock())	\
-			break;			\
-		local_irq_restore(flags);	\
-		__printk_wait_on_cpu_lock();	\
+#define printk_cpu_sync_get_irqsave(flags)		\
+	for (;;) {					\
+		local_irq_save(flags);			\
+		if (__printk_cpu_sync_try_get())	\
+			break;				\
+		local_irq_restore(flags);		\
+		__printk_cpu_sync_wait();		\
 	}
 
 /**
- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
- *                                  lock and restore interrupts.
- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning
+ *                                    lock and restore interrupts.
+ * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave().
  */
-#define printk_cpu_unlock_irqrestore(flags)	\
+#define printk_cpu_sync_put_irqrestore(flags)	\
 	do {					\
-		__printk_cpu_unlock();		\
+		__printk_cpu_sync_put();	\
 		local_irq_restore(flags);	\
-	} while (0)				\
-
-#else
-
-#define printk_cpu_lock_irqsave(flags) ((void)flags)
-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
-
-#endif /* CONFIG_SMP */
+	} while (0)
 
 extern int kptr_restrict;
 
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 9888e2bc8c76..6de7cec90c3b 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -125,6 +125,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 	 * complain:
 	 */
 	if (sysctl_hung_task_warnings) {
+		printk_direct_enter();
+
 		if (sysctl_hung_task_warnings > 0)
 			sysctl_hung_task_warnings--;
 		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -140,6 +142,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 
 		if (sysctl_hung_task_all_cpu_backtrace)
 			hung_task_show_all_bt = true;
+
+		printk_direct_exit();
 	}
 
 	touch_nmi_watchdog();
@@ -202,12 +206,17 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	}
  unlock:
 	rcu_read_unlock();
-	if (hung_task_show_lock)
+	if (hung_task_show_lock) {
+		printk_direct_enter();
 		debug_show_all_locks();
+		printk_direct_exit();
+	}
 
 	if (hung_task_show_all_bt) {
 		hung_task_show_all_bt = false;
+		printk_direct_enter();
 		trigger_all_cpu_backtrace();
+		printk_direct_exit();
 	}
 
 	if (hung_task_call_panic)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 57b132b658e1..6a81b11498e1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -214,6 +214,26 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
 /* Number of registered extended console drivers. */
 static int nr_ext_console_drivers;
 
+/*
+ * Used to synchronize printing kthreads against direct printing via
+ * console_trylock/console_unlock.
+ *
+ * Values:
+ * -1 = console locked (via trylock), kthreads will not print
+ *  0 = no kthread printing, console not locked (via trylock)
+ * >0 = kthread(s) actively printing
+ *
+ * Note: For synchronizing against direct printing via
+ *       console_lock/console_unlock, see the @lock variable in
+ *       struct console.
+ */
+static atomic_t console_lock_count = ATOMIC_INIT(0);
+
+#define console_excl_trylock() (atomic_cmpxchg(&console_lock_count, 0, -1) == 0)
+#define console_excl_unlock() atomic_cmpxchg(&console_lock_count, -1, 0)
+#define console_printer_tryenter() atomic_inc_unless_negative(&console_lock_count)
+#define console_printer_exit() atomic_dec(&console_lock_count)
+
 /*
  * Helper macros to handle lockdep when locking/unlocking console_sem. We use
  * macros instead of functions so that _RET_IP_ contains useful information.
@@ -257,19 +277,37 @@ static void __up_console_sem(unsigned long ip)
 #define up_console_sem() __up_console_sem(_RET_IP_)
 
 /*
- * This is used for debugging the mess that is the VT code by
- * keeping track if we have the console semaphore held. It's
- * definitely not the perfect debug tool (we don't know if _WE_
- * hold it and are racing, but it helps tracking those weird code
- * paths in the console code where we end up in places I want
- * locked without the console semaphore held).
+ * Tracks whether kthread printers are all paused. A value of true implies
+ * that the console is locked via console_lock() or the console is suspended.
+ * Reading and writing to this variable requires holding @console_sem.
  */
-static int console_locked, console_suspended;
+static bool consoles_paused;
 
 /*
- * If exclusive_console is non-NULL then only this console is to be printed to.
+ * Pause or unpause all kthread printers.
+ *
+ * Requires the console_lock.
  */
-static struct console *exclusive_console;
+static void __pause_all_consoles(bool do_pause)
+{
+	struct console *con;
+
+	for_each_console(con) {
+		mutex_lock(&con->lock);
+		if (do_pause)
+			con->flags |= CON_PAUSED;
+		else
+			con->flags &= ~CON_PAUSED;
+		mutex_unlock(&con->lock);
+	}
+
+	consoles_paused = do_pause;
+}
+
+#define pause_all_consoles() __pause_all_consoles(true)
+#define unpause_all_consoles() __pause_all_consoles(false)
+
+static int console_suspended;
 
 /*
  *	Array of consoles built from command line options (console=)
@@ -353,7 +391,47 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
 /* syslog_lock protects syslog_* variables and write access to clear_seq. */
 static DEFINE_MUTEX(syslog_lock);
 
+/*
+ * A flag to signify if printk_late_init() has already started the kthread
+ * printers. If true, any later registered consoles must start their own
+ * kthread directly. The flag is write protected by the console_lock.
+ */
+static bool kthreads_started;
+
 #ifdef CONFIG_PRINTK
+static atomic_t printk_direct = ATOMIC_INIT(0);
+
+/**
+ * printk_direct_enter - cause console printing to occur in the context of
+ *                       printk() callers
+ *
+ * This globally effects all printk() callers.
+ *
+ * Context: Any context.
+ */
+void printk_direct_enter(void)
+{
+	atomic_inc(&printk_direct);
+}
+
+/**
+ * printk_direct_exit - restore console printing behavior from direct
+ *
+ * Context: Any context.
+ */
+void printk_direct_exit(void)
+{
+	atomic_dec(&printk_direct);
+}
+
+static inline bool allow_direct_printing(void)
+{
+	return (!kthreads_started ||
+		system_state != SYSTEM_RUNNING ||
+		oops_in_progress ||
+		atomic_read(&printk_direct));
+}
+
 DECLARE_WAIT_QUEUE_HEAD(log_wait);
 /* All 3 protected by @syslog_lock. */
 /* the next printk record to read by syslog(READ) or /proc/kmsg */
@@ -361,12 +439,6 @@ static u64 syslog_seq;
 static size_t syslog_partial;
 static bool syslog_time;
 
-/* All 3 protected by @console_sem. */
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
 struct latched_seq {
 	seqcount_latch_t	latch;
 	u64			val[2];
@@ -392,6 +464,9 @@ static struct latched_seq clear_seq = {
 /* the maximum size of a formatted record (i.e. with prefix added per line) */
 #define CONSOLE_LOG_MAX		1024
 
+/* the maximum size for a dropped text message */
+#define DROPPED_TEXT_MAX	64
+
 /* the maximum size allowed to be reserved for a record */
 #define LOG_LINE_MAX		(CONSOLE_LOG_MAX - PREFIX_MAX)
 
@@ -1888,47 +1963,26 @@ static int console_trylock_spinning(void)
 }
 
 /*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
+ * Call the specified console driver, asking it to write out the specified
+ * text and length. If @dropped_text is non-NULL and any records have been
+ * dropped, a dropped message will be written out first.
  */
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-				 const char *text, size_t len)
+static void call_console_driver(struct console *con, const char *text, size_t len,
+				char *dropped_text)
 {
-	static char dropped_text[64];
-	size_t dropped_len = 0;
-	struct console *con;
+	size_t dropped_len;
 
 	trace_console_rcuidle(text, len);
 
-	if (!console_drivers)
-		return;
-
-	if (console_dropped) {
-		dropped_len = snprintf(dropped_text, sizeof(dropped_text),
+	if (con->dropped && dropped_text) {
+		dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
 				       "** %lu printk messages dropped **\n",
-				       console_dropped);
-		console_dropped = 0;
+				       con->dropped);
+		con->dropped = 0;
+		con->write(con, dropped_text, dropped_len);
 	}
 
-	for_each_console(con) {
-		if (exclusive_console && con != exclusive_console)
-			continue;
-		if (!(con->flags & CON_ENABLED))
-			continue;
-		if (!con->write)
-			continue;
-		if (!cpu_online(smp_processor_id()) &&
-		    !(con->flags & CON_ANYTIME))
-			continue;
-		if (con->flags & CON_EXTENDED)
-			con->write(con, ext_text, ext_len);
-		else {
-			if (dropped_len)
-				con->write(con, dropped_text, dropped_len);
-			con->write(con, text, len);
-		}
-	}
+	con->write(con, text, len);
 }
 
 /*
@@ -2003,8 +2057,10 @@ static u8 *__printk_recursion_counter(void)
 
 int printk_delay_msec __read_mostly;
 
-static inline void printk_delay(void)
+static inline void printk_delay(int level)
 {
+	boot_delay_msec(level);
+
 	if (unlikely(printk_delay_msec)) {
 		int m = printk_delay_msec;
 
@@ -2018,7 +2074,7 @@ static inline void printk_delay(void)
 static inline u32 printk_caller_id(void)
 {
 	return in_task() ? task_pid_nr(current) :
-		0x80000000 + raw_smp_processor_id();
+		0x80000000 + smp_processor_id();
 }
 
 /**
@@ -2100,7 +2156,6 @@ int vprintk_store(int facility, int level,
 		  const struct dev_printk_info *dev_info,
 		  const char *fmt, va_list args)
 {
-	const u32 caller_id = printk_caller_id();
 	struct prb_reserved_entry e;
 	enum printk_info_flags flags = 0;
 	struct printk_record r;
@@ -2110,10 +2165,14 @@ int vprintk_store(int facility, int level,
 	u8 *recursion_ptr;
 	u16 reserve_size;
 	va_list args2;
+	u32 caller_id;
 	u16 text_len;
 	int ret = 0;
 	u64 ts_nsec;
 
+	if (!printk_enter_irqsave(recursion_ptr, irqflags))
+		return 0;
+
 	/*
 	 * Since the duration of printk() can vary depending on the message
 	 * and state of the ringbuffer, grab the timestamp now so that it is
@@ -2122,8 +2181,7 @@ int vprintk_store(int facility, int level,
 	 */
 	ts_nsec = local_clock();
 
-	if (!printk_enter_irqsave(recursion_ptr, irqflags))
-		return 0;
+	caller_id = printk_caller_id();
 
 	/*
 	 * The sprintf needs to come first since the syslog prefix might be
@@ -2223,13 +2281,10 @@ asmlinkage int vprintk_emit(int facility, int level,
 		in_sched = true;
 	}
 
-	boot_delay_msec(level);
-	printk_delay();
-
 	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
 
 	/* If called from the scheduler, we can not call up(). */
-	if (!in_sched) {
+	if (!in_sched && allow_direct_printing()) {
 		/*
 		 * Disable preemption to avoid being preempted while holding
 		 * console_sem which would prevent anyone from printing to
@@ -2270,18 +2325,19 @@ asmlinkage __visible int _printk(const char *fmt, ...)
 }
 EXPORT_SYMBOL(_printk);
 
+static void start_printk_kthread(struct console *con);
+
 #else /* CONFIG_PRINTK */
 
 #define CONSOLE_LOG_MAX		0
+#define DROPPED_TEXT_MAX	0
 #define printk_time		false
 
 #define prb_read_valid(rb, seq, r)	false
 #define prb_first_valid_seq(rb)		0
+#define prb_next_seq(rb)		0
 
 static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
 
 static size_t record_print_text(const struct printk_record *r,
 				bool syslog, bool time)
@@ -2298,9 +2354,14 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
 				  struct dev_printk_info *dev_info) { return 0; }
 static void console_lock_spinning_enable(void) { }
 static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-				 const char *text, size_t len) {}
+static void call_console_driver(struct console *con, const char *text, size_t len,
+				char *dropped_text)
+{
+}
 static bool suppress_message_printing(int level) { return false; }
+static void printk_delay(int level) {}
+static void start_printk_kthread(struct console *con) {}
+static bool allow_direct_printing(void) { return true; }
 
 #endif /* CONFIG_PRINTK */
 
@@ -2476,6 +2537,7 @@ void suspend_console(void)
 	if (!console_suspend_enabled)
 		return;
 	pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
+	pr_flush(1000, true);
 	console_lock();
 	console_suspended = 1;
 	up_console_sem();
@@ -2488,6 +2550,7 @@ void resume_console(void)
 	down_console_sem();
 	console_suspended = 0;
 	console_unlock();
+	pr_flush(1000, true);
 }
 
 /**
@@ -2524,7 +2587,7 @@ void console_lock(void)
 	down_console_sem();
 	if (console_suspended)
 		return;
-	console_locked = 1;
+	pause_all_consoles();
 	console_may_schedule = 1;
 }
 EXPORT_SYMBOL(console_lock);
@@ -2545,44 +2608,266 @@ int console_trylock(void)
 		up_console_sem();
 		return 0;
 	}
-	console_locked = 1;
+	if (!console_excl_trylock()) {
+		up_console_sem();
+		return 0;
+	}
 	console_may_schedule = 0;
 	return 1;
 }
 EXPORT_SYMBOL(console_trylock);
 
+/*
+ * A variant of console_trylock() that allows specifying if the context may
+ * sleep. If yes, a trylock on @console_sem is attempted and if successful,
+ * the threaded printers are paused. This is important to ensure that
+ * sleepable contexts do not become involved in console_lock handovers and
+ * will call cond_resched() during the printing loop.
+ */
+static int console_trylock_sched(bool may_schedule)
+{
+	if (!may_schedule)
+		return console_trylock();
+
+	might_sleep();
+
+	if (down_trylock_console_sem())
+		return 0;
+	if (console_suspended) {
+		up_console_sem();
+		return 0;
+	}
+	pause_all_consoles();
+	console_may_schedule = 1;
+	return 1;
+}
+
+/*
+ * This is used to help to make sure that certain paths within the VT code are
+ * running with the console lock held. It is definitely not the perfect debug
+ * tool (it is not known if the VT code is the task holding the console lock),
+ * but it helps tracking those weird code paths in the console code such as
+ * when the console is suspended: where the console is not locked but no
+ * console printing may occur.
+ *
+ * Note: This returns true when the console is suspended but is not locked.
+ *       This is intentional because the VT code must consider that situation
+ *       the same as if the console was locked.
+ */
 int is_console_locked(void)
 {
-	return console_locked;
+	return (consoles_paused || atomic_read(&console_lock_count));
 }
 EXPORT_SYMBOL(is_console_locked);
 
 /*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
+ * Check if the given console is currently capable and allowed to print
+ * records.
+ *
+ * Requires the console_lock.
  */
-static int have_callable_console(void)
+static inline bool console_is_usable(struct console *con)
 {
-	struct console *con;
+	if (!(con->flags & CON_ENABLED))
+		return false;
 
-	for_each_console(con)
-		if ((con->flags & CON_ENABLED) &&
-				(con->flags & CON_ANYTIME))
-			return 1;
+	if (!con->write)
+		return false;
 
-	return 0;
+	/*
+	 * Console drivers may assume that per-cpu resources have been
+	 * allocated. So unless they're explicitly marked as being able to
+	 * cope (CON_ANYTIME) don't call them until per-cpu resources have
+	 * been allocated.
+	 */
+	if (!printk_percpu_data_ready() &&
+	    !(con->flags & CON_ANYTIME))
+		return false;
+
+	return true;
+}
+
+static void __console_unlock(void)
+{
+	/*
+	 * Depending on whether console_lock() or console_trylock() was used,
+	 * appropriately allow the kthread printers to continue.
+	 */
+	if (consoles_paused)
+		unpause_all_consoles();
+	else
+		console_excl_unlock();
+
+	/* Wake the kthread printers. */
+	wake_up_klogd();
+
+	up_console_sem();
 }
 
 /*
- * Can we actually use the console at this time on this cpu?
+ * Print one record for the given console. The record printed is whatever
+ * record is the next available record for the given console.
  *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
+ * @text is a buffer of size CONSOLE_LOG_MAX.
+ *
+ * If extended messages should be printed, @ext_text is a buffer of size
+ * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL.
+ *
+ * If dropped messages should be printed, @dropped_text is a buffer of size
+ * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
+ *
+ * Requires the console_lock.
+ *
+ * Returns false if the given console has no next record to print, otherwise
+ * true.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false. A NULL pointer may be provided
+ * to disable allowing the console_lock to be taken over by a printk waiter.
  */
-static inline int can_use_console(void)
+static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
+				     char *dropped_text, bool *handover)
 {
-	return cpu_online(raw_smp_processor_id()) || have_callable_console();
+	struct printk_info info;
+	struct printk_record r;
+	unsigned long flags;
+	bool allow_handover;
+	char *write_text;
+	size_t len;
+
+	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
+
+	if (handover)
+		*handover = false;
+
+	if (!prb_read_valid(prb, con->seq, &r))
+		return false;
+
+	if (con->seq != r.info->seq) {
+		con->dropped += r.info->seq - con->seq;
+		con->seq = r.info->seq;
+	}
+
+	/* Skip record that has level above the console loglevel. */
+	if (suppress_message_printing(r.info->level)) {
+		con->seq++;
+		goto skip;
+	}
+
+	if (ext_text) {
+		write_text = ext_text;
+		len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info);
+		len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len,
+					  &r.text_buf[0], r.info->text_len, &r.info->dev_info);
+	} else {
+		write_text = text;
+		len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+	}
+
+	/* Handovers may only happen between trylock contexts. */
+	allow_handover = (handover && atomic_read(&console_lock_count) == -1);
+
+	if (allow_handover) {
+		/*
+		 * While actively printing out messages, if another printk()
+		 * were to occur on another CPU, it may wait for this one to
+		 * finish. This task can not be preempted if there is a
+		 * waiter waiting to take over.
+		 *
+		 * Interrupts are disabled because the hand over to a waiter
+		 * must not be interrupted until the hand over is completed
+		 * (@console_waiter is cleared).
+		 */
+		printk_safe_enter_irqsave(flags);
+		console_lock_spinning_enable();
+	}
+
+	stop_critical_timings();	/* don't trace print latency */
+	call_console_driver(con, write_text, len, dropped_text);
+	start_critical_timings();
+
+	con->seq++;
+
+	if (allow_handover) {
+		*handover = console_lock_spinning_disable_and_check();
+		printk_safe_exit_irqrestore(flags);
+	}
+
+	printk_delay(r.info->level);
+skip:
+	return true;
+}
+
+/*
+ * Print out all remaining records to all consoles.
+ *
+ * Requires the console_lock.
+ *
+ * Returns true if a console was available for flushing, otherwise false.
+ *
+ * @next_seq is set to the highest sequence number of all of the consoles that
+ * were flushed.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false.
+ */
+static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
+{
+	static char dropped_text[DROPPED_TEXT_MAX];
+	static char ext_text[CONSOLE_EXT_LOG_MAX];
+	static char text[CONSOLE_LOG_MAX];
+	bool any_usable = false;
+	struct console *con;
+	bool any_progress;
+
+	*next_seq = 0;
+	*handover = false;
+
+	do {
+		/* Let the kthread printers do the work if they can. */
+		if (!allow_direct_printing())
+			break;
+
+		any_progress = false;
+
+		for_each_console(con) {
+			bool progress;
+
+			if (!console_is_usable(con))
+				continue;
+			if ((con->flags & CON_MIGHT_SLEEP) && !do_cond_resched)
+				continue;
+			any_usable = true;
+
+			if (con->flags & CON_EXTENDED) {
+				/* Extended consoles do not print "dropped messages". */
+				progress = console_emit_next_record(con, &text[0],
+								    &ext_text[0], NULL,
+								    handover);
+			} else {
+				progress = console_emit_next_record(con, &text[0],
+								    NULL, &dropped_text[0],
+								    handover);
+			}
+			if (*handover)
+				return true;
+
+			/* Track the highest seq flushed. */
+			if (con->seq > *next_seq)
+				*next_seq = con->seq;
+
+			if (!progress)
+				continue;
+			any_progress = true;
+
+			if (do_cond_resched)
+				cond_resched();
+		}
+	} while (any_progress);
+
+	return any_usable;
 }
 
 /**
@@ -2601,21 +2886,16 @@ static inline int can_use_console(void)
  */
 void console_unlock(void)
 {
-	static char ext_text[CONSOLE_EXT_LOG_MAX];
-	static char text[CONSOLE_LOG_MAX];
-	unsigned long flags;
-	bool do_cond_resched, retry;
-	struct printk_info info;
-	struct printk_record r;
-	u64 __maybe_unused next_seq;
+	bool do_cond_resched;
+	bool handover;
+	bool flushed;
+	u64 next_seq;
 
 	if (console_suspended) {
 		up_console_sem();
 		return;
 	}
 
-	prb_rec_init_rd(&r, &info, text, sizeof(text));
-
 	/*
 	 * Console drivers are called with interrupts disabled, so
 	 * @console_may_schedule should be cleared before; however, we may
@@ -2624,117 +2904,31 @@ void console_unlock(void)
 	 * between lines if allowable.  Not doing so can cause a very long
 	 * scheduling stall on a slow console leading to RCU stall and
 	 * softlockup warnings which exacerbate the issue with more
-	 * messages practically incapacitating the system.
-	 *
-	 * console_trylock() is not able to detect the preemptive
-	 * context reliably. Therefore the value must be stored before
-	 * and cleared after the "again" goto label.
+	 * messages practically incapacitating the system. Therefore, create
+	 * a local to use for the printing loop.
 	 */
 	do_cond_resched = console_may_schedule;
-again:
-	console_may_schedule = 0;
 
-	/*
-	 * We released the console_sem lock, so we need to recheck if
-	 * cpu is online and (if not) is there at least one CON_ANYTIME
-	 * console.
-	 */
-	if (!can_use_console()) {
-		console_locked = 0;
-		up_console_sem();
-		return;
-	}
+	do {
+		console_may_schedule = 0;
 
-	for (;;) {
-		size_t ext_len = 0;
-		int handover;
-		size_t len;
-
-skip:
-		if (!prb_read_valid(prb, console_seq, &r))
+		flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
+		if (handover)
 			break;
 
-		if (console_seq != r.info->seq) {
-			console_dropped += r.info->seq - console_seq;
-			console_seq = r.info->seq;
-		}
+		__console_unlock();
 
-		if (suppress_message_printing(r.info->level)) {
-			/*
-			 * Skip record we have buffered and already printed
-			 * directly to the console when we received it, and
-			 * record that has level above the console loglevel.
-			 */
-			console_seq++;
-			goto skip;
-		}
-
-		/* Output to all consoles once old messages replayed. */
-		if (unlikely(exclusive_console &&
-			     console_seq >= exclusive_console_stop_seq)) {
-			exclusive_console = NULL;
-		}
+		/* Were there any consoles available for flushing? */
+		if (!flushed)
+			break;
 
 		/*
-		 * Handle extended console text first because later
-		 * record_print_text() will modify the record buffer in-place.
+		 * Some context may have added new records after
+		 * console_flush_all() but before unlocking the console.
+		 * Re-check if there is a new record to flush. If the trylock
+		 * fails, another context is already handling the printing.
 		 */
-		if (nr_ext_console_drivers) {
-			ext_len = info_print_ext_header(ext_text,
-						sizeof(ext_text),
-						r.info);
-			ext_len += msg_print_ext_body(ext_text + ext_len,
-						sizeof(ext_text) - ext_len,
-						&r.text_buf[0],
-						r.info->text_len,
-						&r.info->dev_info);
-		}
-		len = record_print_text(&r,
-				console_msg_format & MSG_FORMAT_SYSLOG,
-				printk_time);
-		console_seq++;
-
-		/*
-		 * While actively printing out messages, if another printk()
-		 * were to occur on another CPU, it may wait for this one to
-		 * finish. This task can not be preempted if there is a
-		 * waiter waiting to take over.
-		 *
-		 * Interrupts are disabled because the hand over to a waiter
-		 * must not be interrupted until the hand over is completed
-		 * (@console_waiter is cleared).
-		 */
-		printk_safe_enter_irqsave(flags);
-		console_lock_spinning_enable();
-
-		stop_critical_timings();	/* don't trace print latency */
-		call_console_drivers(ext_text, ext_len, text, len);
-		start_critical_timings();
-
-		handover = console_lock_spinning_disable_and_check();
-		printk_safe_exit_irqrestore(flags);
-		if (handover)
-			return;
-
-		if (do_cond_resched)
-			cond_resched();
-	}
-
-	/* Get consistent value of the next-to-be-used sequence number. */
-	next_seq = console_seq;
-
-	console_locked = 0;
-	up_console_sem();
-
-	/*
-	 * Someone could have filled up the buffer again, so re-check if there's
-	 * something to flush. In case we cannot trylock the console_sem again,
-	 * there's a new owner and the console_unlock() from them will do the
-	 * flush, no worries.
-	 */
-	retry = prb_read_valid(prb, next_seq, NULL);
-	if (retry && console_trylock())
-		goto again;
+	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock_sched(do_cond_resched));
 }
 EXPORT_SYMBOL(console_unlock);
 
@@ -2765,10 +2959,15 @@ void console_unblank(void)
 	if (oops_in_progress) {
 		if (down_trylock_console_sem() != 0)
 			return;
-	} else
+		if (!console_excl_trylock()) {
+			up_console_sem();
+			return;
+		}
+	} else {
+		pr_flush(1000, true);
 		console_lock();
+	}
 
-	console_locked = 1;
 	console_may_schedule = 0;
 	for_each_console(c)
 		if ((c->flags & CON_ENABLED) && c->unblank)
@@ -2794,8 +2993,14 @@ void console_flush_on_panic(enum con_flush_mode mode)
 	console_trylock();
 	console_may_schedule = 0;
 
-	if (mode == CONSOLE_REPLAY_ALL)
-		console_seq = prb_first_valid_seq(prb);
+	if (mode == CONSOLE_REPLAY_ALL) {
+		struct console *c;
+		u64 seq;
+
+		seq = prb_first_valid_seq(prb);
+		for_each_console(c)
+			c->seq = seq;
+	}
 	console_unlock();
 }
 
@@ -2826,6 +3031,7 @@ struct tty_driver *console_device(int *index)
  */
 void console_stop(struct console *console)
 {
+	pr_flush(1000, true);
 	console_lock();
 	console->flags &= ~CON_ENABLED;
 	console_unlock();
@@ -2837,6 +3043,7 @@ void console_start(struct console *console)
 	console_lock();
 	console->flags |= CON_ENABLED;
 	console_unlock();
+	pr_flush(1000, true);
 }
 EXPORT_SYMBOL(console_start);
 
@@ -3017,27 +3224,22 @@ void register_console(struct console *newcon)
 	if (newcon->flags & CON_EXTENDED)
 		nr_ext_console_drivers++;
 
-	if (newcon->flags & CON_PRINTBUFFER) {
-		/*
-		 * console_unlock(); will print out the buffered messages
-		 * for us.
-		 *
-		 * We're about to replay the log buffer.  Only do this to the
-		 * just-registered console to avoid excessive message spam to
-		 * the already-registered consoles.
-		 *
-		 * Set exclusive_console with disabled interrupts to reduce
-		 * race window with eventual console_flush_on_panic() that
-		 * ignores console_lock.
-		 */
-		exclusive_console = newcon;
-		exclusive_console_stop_seq = console_seq;
+	if (consoles_paused)
+		newcon->flags |= CON_PAUSED;
 
+	newcon->dropped = 0;
+	mutex_init(&newcon->lock);
+	if (newcon->flags & CON_PRINTBUFFER) {
 		/* Get a consistent copy of @syslog_seq. */
 		mutex_lock(&syslog_lock);
-		console_seq = syslog_seq;
+		newcon->seq = syslog_seq;
 		mutex_unlock(&syslog_lock);
+	} else {
+		/* Begin with next message. */
+		newcon->seq = prb_next_seq(prb);
 	}
+	if (kthreads_started)
+		start_printk_kthread(newcon);
 	console_unlock();
 	console_sysfs_notify();
 
@@ -3094,6 +3296,11 @@ int unregister_console(struct console *console)
 		}
 	}
 
+	if (console->thread) {
+		kthread_stop(console->thread);
+		console->thread = NULL;
+	}
+
 	if (res)
 		goto out_disable_unlock;
 
@@ -3199,16 +3406,206 @@ static int __init printk_late_init(void)
 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
 					console_cpu_notify, NULL);
 	WARN_ON(ret < 0);
+
+	console_lock();
+	for_each_console(con)
+		start_printk_kthread(con);
+	kthreads_started = true;
+	console_unlock();
+
 	return 0;
 }
 late_initcall(printk_late_init);
 
 #if defined CONFIG_PRINTK
+/**
+ * pr_flush() - Wait for printing threads to catch up.
+ *
+ * @timeout_ms:        The maximum time (in ms) to wait.
+ * @reset_on_progress: Reset the timeout if forward progress is seen.
+ *
+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
+ * represents infinite waiting.
+ *
+ * If @reset_on_progress is true, the timeout will be reset whenever any
+ * printer has been seen to make some forward progress.
+ *
+ * Context: Process context. May sleep while acquiring console lock.
+ * Return: true if all enabled printers are caught up.
+ */
+bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+	int remaining = timeout_ms;
+	struct console *con;
+	u64 last_diff = 0;
+	u64 printk_seq;
+	u64 diff;
+	u64 seq;
+
+	might_sleep();
+
+	seq = prb_next_seq(prb);
+
+	for (;;) {
+		diff = 0;
+
+		console_lock();
+		for_each_console(con) {
+			if (!console_is_usable(con))
+				continue;
+			printk_seq = con->seq;
+			if (printk_seq < seq)
+				diff += seq - printk_seq;
+		}
+		console_unlock();
+
+		if (diff != last_diff && reset_on_progress)
+			remaining = timeout_ms;
+
+		if (diff == 0 || remaining == 0)
+			break;
+
+		if (remaining < 0) {
+			/* no timeout limit */
+			msleep(100);
+		} else if (remaining < 100) {
+			msleep(remaining);
+			remaining = 0;
+		} else {
+			msleep(100);
+			remaining -= 100;
+		}
+
+		last_diff = diff;
+	}
+
+	return (diff == 0);
+}
+EXPORT_SYMBOL(pr_flush);
+
+static bool printer_should_wake(struct console *con, u64 seq)
+{
+	short flags;
+
+	if (kthread_should_stop())
+		return true;
+
+	/*
+	 * This is an unsafe read to con->flags, but false positives
+	 * are not an issue as long as they are rare.
+	 */
+	flags = data_race(READ_ONCE(con->flags));
+
+	if (!(flags & CON_ENABLED) ||
+	    (flags & CON_PAUSED) ||
+	    atomic_read(&console_lock_count) == -1) {
+		return false;
+	}
+
+	return prb_read_valid(prb, seq, NULL);
+}
+
+static int printk_kthread_func(void *data)
+{
+	struct console *con = data;
+	char *dropped_text = NULL;
+	char *ext_text = NULL;
+	bool progress;
+	u64 seq = 0;
+	char *text;
+	int error;
+
+	pr_info("%sconsole [%s%d]: printing thread started\n",
+		(con->flags & CON_BOOT) ? "boot" : "",
+		con->name, con->index);
+
+	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
+	if (!text)
+		goto out;
+
+	if (con->flags & CON_EXTENDED) {
+		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+		if (!ext_text)
+			goto out;
+	} else {
+		dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
+		if (!dropped_text)
+			goto out;
+	}
+
+	for (;;) {
+		error = wait_event_interruptible(log_wait, printer_should_wake(con, seq));
+
+		if (kthread_should_stop())
+			break;
+
+		if (error)
+			continue;
+
+		do {
+			error = mutex_lock_interruptible(&con->lock);
+			if (error)
+				break;
+
+			if (!console_is_usable(con)) {
+				mutex_unlock(&con->lock);
+				break;
+			}
+
+			if ((con->flags & CON_PAUSED) || !console_printer_tryenter()) {
+				mutex_unlock(&con->lock);
+				break;
+			}
+
+			/*
+			 * Even though the printk kthread is always preemptible, it is
+			 * still not allowed to call cond_resched() from within
+			 * console drivers. The task may become non-preemptible in the
+			 * console driver call chain. For example, vt_console_print()
+			 * takes a spinlock and then can call into fbcon_redraw(),
+			 * which can conditionally invoke cond_resched().
+			 */
+			console_may_schedule = 0;
+			progress = console_emit_next_record(con, text, ext_text,
+							    dropped_text, NULL);
+
+			seq = con->seq;
+
+			console_printer_exit();
+
+			mutex_unlock(&con->lock);
+		} while (progress);
+	}
+out:
+	kfree(dropped_text);
+	kfree(ext_text);
+	kfree(text);
+	pr_info("%sconsole [%s%d]: printing thread stopped\n",
+		(con->flags & CON_BOOT) ? "boot" : "",
+		con->name, con->index);
+	return 0;
+}
+
+/* Must be called within console_lock(). */
+static void start_printk_kthread(struct console *con)
+{
+	con->thread = kthread_run(printk_kthread_func, con,
+				  "pr/%s%d", con->name, con->index);
+	if (IS_ERR(con->thread)) {
+		con->thread = NULL;
+		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
+			(con->flags & CON_BOOT) ? "boot" : "",
+			con->name, con->index);
+		return;
+	}
+}
+
 /*
  * Delayed printk version, for scheduler-internal messages:
  */
 #define PRINTK_PENDING_WAKEUP	0x01
 #define PRINTK_PENDING_OUTPUT	0x02
+#define PRINTK_DIRECT_OUTPUT	0x04
 
 static DEFINE_PER_CPU(int, printk_pending);
 
@@ -3217,13 +3614,19 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
 	int pending = __this_cpu_xchg(printk_pending, 0);
 
 	if (pending & PRINTK_PENDING_OUTPUT) {
+		if (pending & PRINTK_DIRECT_OUTPUT)
+			printk_direct_enter();
+
 		/* If trylock fails, someone else is doing the printing */
 		if (console_trylock())
 			console_unlock();
+
+		if (pending & PRINTK_DIRECT_OUTPUT)
+			printk_direct_exit();
 	}
 
 	if (pending & PRINTK_PENDING_WAKEUP)
-		wake_up_interruptible(&log_wait);
+		wake_up_interruptible_all(&log_wait);
 }
 
 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
@@ -3244,11 +3647,16 @@ void wake_up_klogd(void)
 
 void defer_console_output(void)
 {
+	int val = PRINTK_PENDING_OUTPUT;
+
 	if (!printk_percpu_data_ready())
 		return;
 
+	if (atomic_read(&printk_direct))
+		val |= PRINTK_DIRECT_OUTPUT;
+
 	preempt_disable();
-	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
+	__this_cpu_or(printk_pending, val);
 	irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
 	preempt_enable();
 }
@@ -3586,26 +3994,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
 #endif
 
 #ifdef CONFIG_SMP
-static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
-static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
+static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1);
+static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0);
 
 /**
- * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
- *                               spinning lock is not owned by any CPU.
+ * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant
+ *                            spinning lock is not owned by any CPU.
  *
  * Context: Any context.
  */
-void __printk_wait_on_cpu_lock(void)
+void __printk_cpu_sync_wait(void)
 {
 	do {
 		cpu_relax();
-	} while (atomic_read(&printk_cpulock_owner) != -1);
+	} while (atomic_read(&printk_cpu_sync_owner) != -1);
 }
-EXPORT_SYMBOL(__printk_wait_on_cpu_lock);
+EXPORT_SYMBOL(__printk_cpu_sync_wait);
 
 /**
- * __printk_cpu_trylock() - Try to acquire the printk cpu-reentrant
- *                          spinning lock.
+ * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant
+ *                               spinning lock.
  *
  * If no processor has the lock, the calling processor takes the lock and
  * becomes the owner. If the calling processor is already the owner of the
@@ -3614,7 +4022,7 @@ EXPORT_SYMBOL(__printk_wait_on_cpu_lock);
  * Context: Any context. Expects interrupts to be disabled.
  * Return: 1 on success, otherwise 0.
  */
-int __printk_cpu_trylock(void)
+int __printk_cpu_sync_try_get(void)
 {
 	int cpu;
 	int old;
@@ -3624,79 +4032,80 @@ int __printk_cpu_trylock(void)
 	/*
 	 * Guarantee loads and stores from this CPU when it is the lock owner
 	 * are _not_ visible to the previous lock owner. This pairs with
-	 * __printk_cpu_unlock:B.
+	 * __printk_cpu_sync_put:B.
 	 *
 	 * Memory barrier involvement:
 	 *
-	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B, then
-	 * __printk_cpu_unlock:A can never read from __printk_cpu_trylock:B.
+	 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
+	 * then __printk_cpu_sync_put:A can never read from
+	 * __printk_cpu_sync_try_get:B.
 	 *
 	 * Relies on:
 	 *
-	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
+	 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
 	 * of the previous CPU
 	 *    matching
-	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
-	 * of this CPU
+	 * ACQUIRE from __printk_cpu_sync_try_get:A to
+	 * __printk_cpu_sync_try_get:B of this CPU
 	 */
-	old = atomic_cmpxchg_acquire(&printk_cpulock_owner, -1,
-				     cpu); /* LMM(__printk_cpu_trylock:A) */
+	old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1,
+				     cpu); /* LMM(__printk_cpu_sync_try_get:A) */
 	if (old == -1) {
 		/*
 		 * This CPU is now the owner and begins loading/storing
-		 * data: LMM(__printk_cpu_trylock:B)
+		 * data: LMM(__printk_cpu_sync_try_get:B)
 		 */
 		return 1;
 
 	} else if (old == cpu) {
 		/* This CPU is already the owner. */
-		atomic_inc(&printk_cpulock_nested);
+		atomic_inc(&printk_cpu_sync_nested);
 		return 1;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL(__printk_cpu_trylock);
+EXPORT_SYMBOL(__printk_cpu_sync_try_get);
 
 /**
- * __printk_cpu_unlock() - Release the printk cpu-reentrant spinning lock.
+ * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock.
  *
  * The calling processor must be the owner of the lock.
  *
  * Context: Any context. Expects interrupts to be disabled.
  */
-void __printk_cpu_unlock(void)
+void __printk_cpu_sync_put(void)
 {
-	if (atomic_read(&printk_cpulock_nested)) {
-		atomic_dec(&printk_cpulock_nested);
+	if (atomic_read(&printk_cpu_sync_nested)) {
+		atomic_dec(&printk_cpu_sync_nested);
 		return;
 	}
 
 	/*
 	 * This CPU is finished loading/storing data:
-	 * LMM(__printk_cpu_unlock:A)
+	 * LMM(__printk_cpu_sync_put:A)
 	 */
 
 	/*
 	 * Guarantee loads and stores from this CPU when it was the
 	 * lock owner are visible to the next lock owner. This pairs
-	 * with __printk_cpu_trylock:A.
+	 * with __printk_cpu_sync_try_get:A.
 	 *
 	 * Memory barrier involvement:
 	 *
-	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B,
-	 * then __printk_cpu_trylock:B reads from __printk_cpu_unlock:A.
+	 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
+	 * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A.
 	 *
 	 * Relies on:
 	 *
-	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
+	 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
 	 * of this CPU
 	 *    matching
-	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
-	 * of the next CPU
+	 * ACQUIRE from __printk_cpu_sync_try_get:A to
+	 * __printk_cpu_sync_try_get:B of the next CPU
 	 */
-	atomic_set_release(&printk_cpulock_owner,
-			   -1); /* LMM(__printk_cpu_unlock:B) */
+	atomic_set_release(&printk_cpu_sync_owner,
+			   -1); /* LMM(__printk_cpu_sync_put:B) */
 }
-EXPORT_SYMBOL(__printk_cpu_unlock);
+EXPORT_SYMBOL(__printk_cpu_sync_put);
 #endif /* CONFIG_SMP */
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5e2fa6fd97f1..26953f9b2ab2 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -610,6 +610,7 @@ static void print_cpu_stall(unsigned long gps)
 	 * See Documentation/RCU/stallwarn.rst for info on how to debug
 	 * RCU CPU stall warnings.
 	 */
+	printk_direct_enter();
 	trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected"));
 	pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
 	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
@@ -644,6 +645,7 @@ static void print_cpu_stall(unsigned long gps)
 	 */
 	set_tsk_need_resched(current);
 	set_preempt_need_resched();
+	printk_direct_exit();
 }
 
 static void check_cpu_stall(struct rcu_data *rdp)
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 6bcc5d6a6572..0f219ce610fc 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -447,9 +447,11 @@ static int __orderly_reboot(void)
 	ret = run_cmd(reboot_cmd);
 
 	if (ret) {
+		printk_direct_enter();
 		pr_warn("Failed to start orderly reboot: forcing the issue\n");
 		emergency_sync();
 		kernel_restart(NULL);
+		printk_direct_exit();
 	}
 
 	return ret;
@@ -462,6 +464,7 @@ static int __orderly_poweroff(bool force)
 	ret = run_cmd(poweroff_cmd);
 
 	if (ret && force) {
+		printk_direct_enter();
 		pr_warn("Failed to start orderly shutdown: forcing the issue\n");
 
 		/*
@@ -471,6 +474,7 @@ static int __orderly_poweroff(bool force)
 		 */
 		emergency_sync();
 		kernel_power_off();
+		printk_direct_exit();
 	}
 
 	return ret;
@@ -528,6 +532,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot);
  */
 static void hw_failure_emergency_poweroff_func(struct work_struct *work)
 {
+	printk_direct_enter();
+
 	/*
 	 * We have reached here after the emergency shutdown waiting period has
 	 * expired. This means orderly_poweroff has not been able to shut off
@@ -544,6 +550,8 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work)
 	 */
 	pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");
 	emergency_restart();
+
+	printk_direct_exit();
 }
 
 static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work,
@@ -582,11 +590,13 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced)
 {
 	static atomic_t allow_proceed = ATOMIC_INIT(1);
 
+	printk_direct_enter();
+
 	pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);
 
 	/* Shutdown should be initiated only once. */
 	if (!atomic_dec_and_test(&allow_proceed))
-		return;
+		goto out;
 
 	/*
 	 * Queue a backup emergency shutdown in the event of
@@ -594,6 +604,8 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced)
 	 */
 	hw_failure_emergency_poweroff(ms_until_forced);
 	orderly_poweroff(true);
+out:
+	printk_direct_exit();
 }
 EXPORT_SYMBOL_GPL(hw_protection_shutdown);
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ad912511a0c0..930563f155ee 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -424,6 +424,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		/* Start period for the next softlockup warning. */
 		update_report_ts();
 
+		printk_direct_enter();
+
 		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
@@ -442,6 +444,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
+
+		printk_direct_exit();
 	}
 
 	return HRTIMER_RESTART;
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 247bf0b1582c..56cfbae94c42 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -135,6 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		if (__this_cpu_read(hard_watchdog_warn) == true)
 			return;
 
+		printk_direct_enter();
+
 		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
 			 this_cpu);
 		print_modules();
@@ -155,6 +157,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
 
+		printk_direct_exit();
+
 		__this_cpu_write(hard_watchdog_warn, true);
 		return;
 	}
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 6b7f1bf6715d..e3b4eeb1dcb4 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -102,9 +102,11 @@ asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
 	 * Permit this cpu to perform nested stack dumps while serialising
 	 * against other CPUs
 	 */
-	printk_cpu_lock_irqsave(flags);
+	printk_direct_enter();
+	printk_cpu_sync_get_irqsave(flags);
 	__dump_stack(log_lvl);
-	printk_cpu_unlock_irqrestore(flags);
+	printk_cpu_sync_put_irqrestore(flags);
+	printk_direct_exit();
 }
 EXPORT_SYMBOL(dump_stack_lvl);
 
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 199ab201d501..dabeb35bc8dc 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -99,7 +99,8 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
 		 * Allow nested NMI backtraces while serializing
 		 * against other CPUs.
 		 */
-		printk_cpu_lock_irqsave(flags);
+		printk_direct_enter();
+		printk_cpu_sync_get_irqsave(flags);
 		if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
 				cpu, (void *)instruction_pointer(regs));
@@ -110,7 +111,8 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
 			else
 				dump_stack();
 		}
-		printk_cpu_unlock_irqrestore(flags);
+		printk_cpu_sync_put_irqrestore(flags);
+		printk_direct_exit();
 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 		return true;
 	}
-- 
2.35.1