printk: Add non-BKL console basic infrastructure
The current console/printk subsystem is protected by a Big Kernel Lock, (aka console_lock) which has ill defined semantics and is more or less stateless. This puts severe limitations on the console subsystem and makes forced takeover and output in emergency and panic situations a fragile endavour which is based on try and pray. The goal of non-BKL consoles is to break out of the console lock jail and to provide a new infrastructure that avoids the pitfalls and allows console drivers to be gradually converted over. The proposed infrastructure aims for the following properties: - Per console locking instead of global locking - Per console state which allows to make informed decisions - Stateful handover and takeover As a first step state is added to struct console. The per console state is an atomic_long_t with a 32bit bit field and on 64bit also a 32bit sequence for tracking the last printed ringbuffer sequence number. On 32bit the sequence is separate from state for obvious reasons which requires handling a few extra race conditions. Reserve state bits, which will be populated later in the series. Wire it up into the console register/unregister functionality and exclude such consoles from being handled in the console BKL mechanisms. Since the non-BKL consoles will not depend on the console lock/unlock dance for printing, only perform said dance if a BKL console is registered. The decision to use a bitfield was made as using a plain u32 with mask/shift operations turned out to result in uncomprehensible code. Co-developed-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
This commit is contained in:
committed by
Sebastian Andrzej Siewior
parent
b9ca69dd0f
commit
fe2ba02c78
@@ -156,6 +156,8 @@ static inline int con_debug_leave(void)
|
||||
* /dev/kmesg which requires a larger output buffer.
|
||||
* @CON_SUSPENDED: Indicates if a console is suspended. If true, the
|
||||
* printing callbacks must not be called.
|
||||
* @CON_NO_BKL: Console can operate outside of the BKL style console_lock
|
||||
* constraints.
|
||||
*/
|
||||
enum cons_flags {
|
||||
CON_PRINTBUFFER = BIT(0),
|
||||
@@ -166,6 +168,32 @@ enum cons_flags {
|
||||
CON_BRL = BIT(5),
|
||||
CON_EXTENDED = BIT(6),
|
||||
CON_SUSPENDED = BIT(7),
|
||||
CON_NO_BKL = BIT(8),
|
||||
};
|
||||
|
||||
/**
|
||||
* struct cons_state - console state for NOBKL consoles
|
||||
* @atom: Compound of the state fields for atomic operations
|
||||
* @seq: Sequence for record tracking (64bit only)
|
||||
* @bits: Compound of the state bits below
|
||||
*
|
||||
* To be used for state read and preparation of atomic_long_cmpxchg()
|
||||
* operations.
|
||||
*/
|
||||
struct cons_state {
|
||||
union {
|
||||
unsigned long atom;
|
||||
struct {
|
||||
#ifdef CONFIG_64BIT
|
||||
u32 seq;
|
||||
#endif
|
||||
union {
|
||||
u32 bits;
|
||||
struct {
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -187,6 +215,8 @@ enum cons_flags {
|
||||
* @dropped: Number of unreported dropped ringbuffer records
|
||||
* @data: Driver private data
|
||||
* @node: hlist node for the console list
|
||||
*
|
||||
* @atomic_state: State array for NOBKL consoles; real and handover
|
||||
*/
|
||||
struct console {
|
||||
char name[16];
|
||||
@@ -206,6 +236,9 @@ struct console {
|
||||
unsigned long dropped;
|
||||
void *data;
|
||||
struct hlist_node node;
|
||||
|
||||
/* NOBKL console specific members */
|
||||
atomic_long_t __private atomic_state[2];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y = printk.o
|
||||
obj-$(CONFIG_PRINTK) += printk_safe.o
|
||||
obj-$(CONFIG_PRINTK) += printk_safe.o printk_nobkl.o
|
||||
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
|
||||
obj-$(CONFIG_PRINTK_INDEX) += index.o
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* internal.h - printk internal definitions
|
||||
*/
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/console.h>
|
||||
|
||||
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
|
||||
void __init printk_sysctl_init(void);
|
||||
@@ -61,6 +62,10 @@ void defer_console_output(void);
|
||||
|
||||
u16 printk_parse_prefix(const char *text, int *level,
|
||||
enum printk_info_flags *flags);
|
||||
|
||||
void cons_nobkl_cleanup(struct console *con);
|
||||
void cons_nobkl_init(struct console *con);
|
||||
|
||||
#else
|
||||
|
||||
#define PRINTK_PREFIX_MAX 0
|
||||
@@ -76,8 +81,13 @@ u16 printk_parse_prefix(const char *text, int *level,
|
||||
#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
|
||||
|
||||
static inline bool printk_percpu_data_ready(void) { return false; }
|
||||
static inline void cons_nobkl_init(struct console *con) { }
|
||||
static inline void cons_nobkl_cleanup(struct console *con) { }
|
||||
|
||||
#endif /* CONFIG_PRINTK */
|
||||
|
||||
extern bool have_boot_console;
|
||||
|
||||
/**
|
||||
* struct printk_buffers - Buffers to read/format/output printk messages.
|
||||
* @outbuf: After formatting, contains text to output.
|
||||
|
||||
+44
-6
@@ -444,6 +444,19 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
|
||||
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
|
||||
static DEFINE_MUTEX(syslog_lock);
|
||||
|
||||
/*
|
||||
* Specifies if a BKL console was ever registered. Used to determine if the
|
||||
* console lock/unlock dance is needed for console printing.
|
||||
*/
|
||||
static bool have_bkl_console;
|
||||
|
||||
/*
|
||||
* Specifies if a boot console is registered. Used to determine if NOBKL
|
||||
* consoles may be used since NOBKL consoles cannot synchronize with boot
|
||||
* consoles.
|
||||
*/
|
||||
bool have_boot_console;
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* All 3 protected by @syslog_lock. */
|
||||
@@ -2288,7 +2301,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
|
||||
|
||||
/* If called from the scheduler, we can not call up(). */
|
||||
if (!in_sched) {
|
||||
if (!in_sched && have_bkl_console) {
|
||||
/*
|
||||
* The caller may be holding system-critical or
|
||||
* timing-sensitive locks. Disable preemption during
|
||||
@@ -2605,7 +2618,7 @@ void resume_console(void)
|
||||
*/
|
||||
static int console_cpu_notify(unsigned int cpu)
|
||||
{
|
||||
if (!cpuhp_tasks_frozen) {
|
||||
if (!cpuhp_tasks_frozen && have_bkl_console) {
|
||||
/* If trylock fails, someone else is doing the printing */
|
||||
if (console_trylock())
|
||||
console_unlock();
|
||||
@@ -3077,6 +3090,9 @@ void console_unblank(void)
|
||||
struct console *c;
|
||||
int cookie;
|
||||
|
||||
if (!have_bkl_console)
|
||||
return;
|
||||
|
||||
/*
|
||||
* First check if there are any consoles implementing the unblank()
|
||||
* callback. If not, there is no reason to continue and take the
|
||||
@@ -3144,6 +3160,9 @@ void console_flush_on_panic(enum con_flush_mode mode)
|
||||
bool handover;
|
||||
u64 next_seq;
|
||||
|
||||
if (!have_bkl_console)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Ignore the console lock and flush out the messages. Attempting a
|
||||
* trylock would not be useful because:
|
||||
@@ -3326,9 +3345,10 @@ static void try_enable_default_console(struct console *newcon)
|
||||
newcon->flags |= CON_CONSDEV;
|
||||
}
|
||||
|
||||
#define con_printk(lvl, con, fmt, ...) \
|
||||
printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \
|
||||
(con->flags & CON_BOOT) ? "boot" : "", \
|
||||
#define con_printk(lvl, con, fmt, ...) \
|
||||
printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \
|
||||
(con->flags & CON_NO_BKL) ? "" : "legacy ", \
|
||||
(con->flags & CON_BOOT) ? "boot" : "", \
|
||||
con->name, con->index, ##__VA_ARGS__)
|
||||
|
||||
static void console_init_seq(struct console *newcon, bool bootcon_registered)
|
||||
@@ -3488,6 +3508,14 @@ void register_console(struct console *newcon)
|
||||
newcon->dropped = 0;
|
||||
console_init_seq(newcon, bootcon_registered);
|
||||
|
||||
if (!(newcon->flags & CON_NO_BKL))
|
||||
have_bkl_console = true;
|
||||
else
|
||||
cons_nobkl_init(newcon);
|
||||
|
||||
if (newcon->flags & CON_BOOT)
|
||||
have_boot_console = true;
|
||||
|
||||
/*
|
||||
* Put this console in the list - keep the
|
||||
* preferred driver at the head of the list.
|
||||
@@ -3531,6 +3559,9 @@ void register_console(struct console *newcon)
|
||||
if (con->flags & CON_BOOT)
|
||||
unregister_console_locked(con);
|
||||
}
|
||||
|
||||
/* All boot consoles have been unregistered. */
|
||||
have_boot_console = false;
|
||||
}
|
||||
unlock:
|
||||
console_list_unlock();
|
||||
@@ -3579,6 +3610,9 @@ static int unregister_console_locked(struct console *console)
|
||||
*/
|
||||
synchronize_srcu(&console_srcu);
|
||||
|
||||
if (console->flags & CON_NO_BKL)
|
||||
cons_nobkl_cleanup(console);
|
||||
|
||||
console_sysfs_notify();
|
||||
|
||||
if (console->exit)
|
||||
@@ -3891,11 +3925,15 @@ void wake_up_klogd(void)
|
||||
*/
|
||||
void defer_console_output(void)
|
||||
{
|
||||
int val = PRINTK_PENDING_WAKEUP;
|
||||
|
||||
/*
|
||||
* New messages may have been added directly to the ringbuffer
|
||||
* using vprintk_store(), so wake any waiters as well.
|
||||
*/
|
||||
__wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
|
||||
if (have_bkl_console)
|
||||
val |= PRINTK_PENDING_OUTPUT;
|
||||
__wake_up_klogd(val);
|
||||
}
|
||||
|
||||
void printk_trigger_flush(void)
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// Copyright (C) 2022 Linutronix GmbH, John Ogness
|
||||
// Copyright (C) 2022 Intel, Thomas Gleixner
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/console.h>
|
||||
#include "internal.h"
|
||||
/*
|
||||
* Printk implementation for consoles that do not depend on the BKL style
|
||||
* console_lock mechanism.
|
||||
*
|
||||
* Console is locked on a CPU when state::locked is set and state:cpu ==
|
||||
* current CPU. This is valid for the current execution context.
|
||||
*
|
||||
* Nesting execution contexts on the same CPU can carefully take over
|
||||
* if the driver allows reentrancy via state::unsafe = false. When the
|
||||
* interrupted context resumes it checks the state before entering
|
||||
* an unsafe region and aborts the operation if it detects a takeover.
|
||||
*
|
||||
* In case of panic or emergency the nesting context can take over the
|
||||
* console forcefully. The write callback is then invoked with the unsafe
|
||||
* flag set in the write context data, which allows the driver side to avoid
|
||||
* locks and to evaluate the driver state so it can use an emergency path
|
||||
* or repair the state instead of blindly assuming that it works.
|
||||
*
|
||||
* If the interrupted context touches the assigned record buffer after
|
||||
* takeover, it does not cause harm because at the same execution level
|
||||
* there is no concurrency on the same CPU. A threaded printer always has
|
||||
* its own record buffer so it can never interfere with any of the per CPU
|
||||
* record buffers.
|
||||
*
|
||||
* A concurrent writer on a different CPU can request to take over the
|
||||
* console by:
|
||||
*
|
||||
* 1) Carefully writing the desired state into state[REQ]
|
||||
* if there is no same or higher priority request pending.
|
||||
* This locks state[REQ] except for higher priority
|
||||
* waiters.
|
||||
*
|
||||
* 2) Setting state[CUR].req_prio unless a same or higher
|
||||
* priority waiter won the race.
|
||||
*
|
||||
* 3) Carefully spin on state[CUR] until that is locked with the
|
||||
* expected state. When the state is not the expected one then it
|
||||
* has to verify that state[REQ] is still the same and that
|
||||
* state[CUR] has not been taken over or unlocked.
|
||||
*
|
||||
* The unlocker hands over to state[REQ], but only if state[CUR]
|
||||
* matches.
|
||||
*
|
||||
* In case that the owner does not react on the request and does not make
|
||||
* observable progress, the waiter will timeout and can then decide to do
|
||||
* a hostile takeover.
|
||||
*/
|
||||
|
||||
#define copy_full_state(_dst, _src) do { _dst = _src; } while (0)
|
||||
#define copy_bit_state(_dst, _src) do { _dst.bits = _src.bits; } while (0)
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define copy_seq_state64(_dst, _src) do { _dst.seq = _src.seq; } while (0)
|
||||
#else
|
||||
#define copy_seq_state64(_dst, _src) do { } while (0)
|
||||
#endif
|
||||
|
||||
enum state_selector {
|
||||
CON_STATE_CUR,
|
||||
CON_STATE_REQ,
|
||||
};
|
||||
|
||||
/**
|
||||
* cons_state_set - Helper function to set the console state
|
||||
* @con: Console to update
|
||||
* @which: Selects real state or handover state
|
||||
* @new: The new state to write
|
||||
*
|
||||
* Only to be used when the console is not yet or no longer visible in the
|
||||
* system. Otherwise use cons_state_try_cmpxchg().
|
||||
*/
|
||||
static inline void cons_state_set(struct console *con, enum state_selector which,
|
||||
struct cons_state *new)
|
||||
{
|
||||
atomic_long_set(&ACCESS_PRIVATE(con, atomic_state[which]), new->atom);
|
||||
}
|
||||
|
||||
/**
|
||||
* cons_state_read - Helper function to read the console state
|
||||
* @con: Console to update
|
||||
* @which: Selects real state or handover state
|
||||
* @state: The state to store the result
|
||||
*/
|
||||
static inline void cons_state_read(struct console *con, enum state_selector which,
|
||||
struct cons_state *state)
|
||||
{
|
||||
state->atom = atomic_long_read(&ACCESS_PRIVATE(con, atomic_state[which]));
|
||||
}
|
||||
|
||||
/**
|
||||
* cons_state_try_cmpxchg() - Helper function for atomic_long_try_cmpxchg() on console state
|
||||
* @con: Console to update
|
||||
* @which: Selects real state or handover state
|
||||
* @old: Old/expected state
|
||||
* @new: New state
|
||||
*
|
||||
* Returns: True on success, false on fail
|
||||
*/
|
||||
static inline bool cons_state_try_cmpxchg(struct console *con,
|
||||
enum state_selector which,
|
||||
struct cons_state *old,
|
||||
struct cons_state *new)
|
||||
{
|
||||
return atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_state[which]),
|
||||
&old->atom, new->atom);
|
||||
}
|
||||
|
||||
/**
|
||||
* cons_nobkl_init - Initialize the NOBKL console specific data
|
||||
* @con: Console to initialize
|
||||
*/
|
||||
void cons_nobkl_init(struct console *con)
|
||||
{
|
||||
struct cons_state state = { };
|
||||
|
||||
cons_state_set(con, CON_STATE_CUR, &state);
|
||||
cons_state_set(con, CON_STATE_REQ, &state);
|
||||
}
|
||||
|
||||
/**
|
||||
* cons_nobkl_cleanup - Cleanup the NOBKL console specific data
|
||||
* @con: Console to cleanup
|
||||
*/
|
||||
void cons_nobkl_cleanup(struct console *con)
|
||||
{
|
||||
struct cons_state state = { };
|
||||
|
||||
cons_state_set(con, CON_STATE_CUR, &state);
|
||||
cons_state_set(con, CON_STATE_REQ, &state);
|
||||
}
|
||||
Reference in New Issue
Block a user