- Add PREEMPT_RT maintainers
- Fix another aspect of delayed dequeued tasks wrt determining their state, i.e., whether they're runnable or blocked - Handle delayed dequeued tasks and their migration wrt PSI properly - Fix the situation where a delayed dequeue task gets enqueued into a new class, which should not happen - Fix a case where memory allocation would happen while the runqueue lock is held, which is a no-no - Do not over-schedule when tasks with shorter slices preempt the currently running task - Make sure delayed to deque entities are properly handled before unthrottling - Other smaller cleanups and improvements -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmcU3tMACgkQEsHwGGHe VUqpuhAAqqyi2NNgrIOlEWh/Ej4NQZL7KleF84cSpKCIBK2somYX5ksgMcUgn82i bIuDVErQu/a4lhNAf5zn7TO3yuPA1Q5xd/453qBlWM9ApkH0S69Mp9f0yocVu8F0 t3XsgXm+/R8A4TYbiv8cB+r1Xt8E5NUP6RkNIKCHbPLAG94gqYF8UZEJ9sAl9ZXw qEWc9afpnp+4LQ9PlzePuaM976LWUPB49OoFZMnFmY1VkvFuVjkjXSVzJX6l4qB7 Omo/+TXOOBSHXVVflNx/68Q16irFHAnqwPPrLCBQWBLIPz3iRiZjV9ptD9tUZkRM M+klL7w0jRG+8wa9fTwuqybmBNIBt4Az1/WUw9Lc3ryEWRsCKzkGT8au3lv5FpQY CTwIIBSMmUcqQSG40R0HHS3nDR4UBFFD0PAww+8cJQZc0IPd2rT9/hfqYdt3sq2Z vV9rmTFOcDlApeDdCGcfC7zJhdgVuBgDVjdTsE5SNRUduBUsBYOeLDnT+0Qi0ArJ txVINGxQDm6jz512f4CAB/xzUcYpU4o639Z1Jkd6a8QbO1NBZGX1ioOcvPEMhmFF f/qFyM8ctR5Kj6LJCZiDcstqtAZviW1d2uMp48gk2QeSvkCyIUQqrWshItd02iBG TZdSYRvSYtYSIz7WYtE/CABUDmrJGjuLtb+jOrR93//TsWwwVdE= =1D7H -----END PGP SIGNATURE----- Merge tag 'sched_urgent_for_v6.12_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduling fixes from Borislav Petkov: - Add PREEMPT_RT maintainers - Fix another aspect of delayed dequeued tasks wrt determining their state, i.e., whether they're runnable or blocked - Handle delayed dequeued tasks and their migration wrt PSI properly - Fix the situation where a delayed dequeue task gets enqueued into a new class, which should not happen - Fix a case where memory allocation would happen while the runqueue lock is held, which is a no-no - Do not over-schedule when tasks with shorter slices preempt the currently running task - Make sure delayed to deque entities are properly handled before unthrottling - Other smaller cleanups and improvements * tag 'sched_urgent_for_v6.12_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: MAINTAINERS: Add an entry for PREEMPT_RT. sched/fair: Fix external p->on_rq users sched/psi: Fix mistaken CPU pressure indication after corrupted task state bug sched/core: Dequeue PSI signals for blocked tasks that are delayed sched: Fix delayed_dequeue vs switched_from_fair() sched/core: Disable page allocation in task_tick_mm_cid() sched/deadline: Use hrtick_enabled_dl() before start_hrtick_dl() sched/eevdf: Fix wakeup-preempt by checking cfs_rq->nr_running sched: Fix sched_delayed vs cfs_bandwidth
This commit is contained in:
commit
2b4d25010d
@ -19527,6 +19527,14 @@ S: Maintained
|
|||||||
F: Documentation/tools/rtla/
|
F: Documentation/tools/rtla/
|
||||||
F: tools/tracing/rtla/
|
F: tools/tracing/rtla/
|
||||||
|
|
||||||
|
Real-time Linux (PREEMPT_RT)
|
||||||
|
M: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||||
|
M: Clark Williams <clrkwllms@kernel.org>
|
||||||
|
M: Steven Rostedt <rostedt@goodmis.org>
|
||||||
|
L: linux-rt-devel@lists.linux.dev
|
||||||
|
S: Supported
|
||||||
|
K: PREEMPT_RT
|
||||||
|
|
||||||
REALTEK AUDIO CODECS
|
REALTEK AUDIO CODECS
|
||||||
M: Oder Chiou <oder_chiou@realtek.com>
|
M: Oder Chiou <oder_chiou@realtek.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|||||||
@ -2133,6 +2133,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
|
|||||||
|
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
|
static inline bool task_is_runnable(struct task_struct *p)
|
||||||
|
{
|
||||||
|
return p->on_rq && !p->se.sched_delayed;
|
||||||
|
}
|
||||||
|
|
||||||
extern bool sched_task_on_rq(struct task_struct *p);
|
extern bool sched_task_on_rq(struct task_struct *p);
|
||||||
extern unsigned long get_wchan(struct task_struct *p);
|
extern unsigned long get_wchan(struct task_struct *p);
|
||||||
extern struct task_struct *cpu_curr_snapshot(int cpu);
|
extern struct task_struct *cpu_curr_snapshot(int cpu);
|
||||||
|
|||||||
@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum task_work_notify_mode {
|
enum task_work_notify_mode {
|
||||||
TWA_NONE,
|
TWA_NONE = 0,
|
||||||
TWA_RESUME,
|
TWA_RESUME,
|
||||||
TWA_SIGNAL,
|
TWA_SIGNAL,
|
||||||
TWA_SIGNAL_NO_IPI,
|
TWA_SIGNAL_NO_IPI,
|
||||||
TWA_NMI_CURRENT,
|
TWA_NMI_CURRENT,
|
||||||
|
|
||||||
|
TWA_FLAGS = 0xff00,
|
||||||
|
TWAF_NO_ALLOC = 0x0100,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool task_work_pending(struct task_struct *task)
|
static inline bool task_work_pending(struct task_struct *task)
|
||||||
|
|||||||
@ -9251,7 +9251,7 @@ static void perf_event_switch(struct task_struct *task,
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!sched_in && task->on_rq) {
|
if (!sched_in && task_is_runnable(task)) {
|
||||||
switch_event.event_id.header.misc |=
|
switch_event.event_id.header.misc |=
|
||||||
PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
|
PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -109,7 +109,12 @@ static int __set_task_frozen(struct task_struct *p, void *arg)
|
|||||||
{
|
{
|
||||||
unsigned int state = READ_ONCE(p->__state);
|
unsigned int state = READ_ONCE(p->__state);
|
||||||
|
|
||||||
if (p->on_rq)
|
/*
|
||||||
|
* Allow freezing the sched_delayed tasks; they will not execute until
|
||||||
|
* ttwu() fixes them up, so it is safe to swap their state now, instead
|
||||||
|
* of waiting for them to get fully dequeued.
|
||||||
|
*/
|
||||||
|
if (task_is_runnable(p))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (p != current && task_curr(p))
|
if (p != current && task_curr(p))
|
||||||
|
|||||||
@ -985,6 +985,15 @@ static bool rcu_tasks_is_holdout(struct task_struct *t)
|
|||||||
if (!READ_ONCE(t->on_rq))
|
if (!READ_ONCE(t->on_rq))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* t->on_rq && !t->se.sched_delayed *could* be considered sleeping but
|
||||||
|
* since it is a spurious state (it will transition into the
|
||||||
|
* traditional blocked state or get woken up without outside
|
||||||
|
* dependencies), not considering it such should only affect timing.
|
||||||
|
*
|
||||||
|
* Be conservative for now and not include it.
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Idle tasks (or idle injection) within the idle loop are RCU-tasks
|
* Idle tasks (or idle injection) within the idle loop are RCU-tasks
|
||||||
* quiescent states. But CPU boot code performed by the idle task
|
* quiescent states. But CPU boot code performed by the idle task
|
||||||
|
|||||||
@ -548,6 +548,11 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
|
|||||||
* ON_RQ_MIGRATING state is used for migration without holding both
|
* ON_RQ_MIGRATING state is used for migration without holding both
|
||||||
* rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
|
* rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
|
||||||
*
|
*
|
||||||
|
* Additionally it is possible to be ->on_rq but still be considered not
|
||||||
|
* runnable when p->se.sched_delayed is true. These tasks are on the runqueue
|
||||||
|
* but will be dequeued as soon as they get picked again. See the
|
||||||
|
* task_is_runnable() helper.
|
||||||
|
*
|
||||||
* p->on_cpu <- { 0, 1 }:
|
* p->on_cpu <- { 0, 1 }:
|
||||||
*
|
*
|
||||||
* is set by prepare_task() and cleared by finish_task() such that it will be
|
* is set by prepare_task() and cleared by finish_task() such that it will be
|
||||||
@ -2012,11 +2017,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
if (!(flags & ENQUEUE_NOCLOCK))
|
if (!(flags & ENQUEUE_NOCLOCK))
|
||||||
update_rq_clock(rq);
|
update_rq_clock(rq);
|
||||||
|
|
||||||
if (!(flags & ENQUEUE_RESTORE)) {
|
|
||||||
sched_info_enqueue(rq, p);
|
|
||||||
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
|
|
||||||
}
|
|
||||||
|
|
||||||
p->sched_class->enqueue_task(rq, p, flags);
|
p->sched_class->enqueue_task(rq, p, flags);
|
||||||
/*
|
/*
|
||||||
* Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
|
* Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
|
||||||
@ -2024,6 +2024,11 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
*/
|
*/
|
||||||
uclamp_rq_inc(rq, p);
|
uclamp_rq_inc(rq, p);
|
||||||
|
|
||||||
|
if (!(flags & ENQUEUE_RESTORE)) {
|
||||||
|
sched_info_enqueue(rq, p);
|
||||||
|
psi_enqueue(p, flags & ENQUEUE_MIGRATED);
|
||||||
|
}
|
||||||
|
|
||||||
if (sched_core_enabled(rq))
|
if (sched_core_enabled(rq))
|
||||||
sched_core_enqueue(rq, p);
|
sched_core_enqueue(rq, p);
|
||||||
}
|
}
|
||||||
@ -2041,7 +2046,7 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
|
|
||||||
if (!(flags & DEQUEUE_SAVE)) {
|
if (!(flags & DEQUEUE_SAVE)) {
|
||||||
sched_info_dequeue(rq, p);
|
sched_info_dequeue(rq, p);
|
||||||
psi_dequeue(p, flags & DEQUEUE_SLEEP);
|
psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4323,9 +4328,10 @@ static bool __task_needs_rq_lock(struct task_struct *p)
|
|||||||
* @arg: Argument to function.
|
* @arg: Argument to function.
|
||||||
*
|
*
|
||||||
* Fix the task in it's current state by avoiding wakeups and or rq operations
|
* Fix the task in it's current state by avoiding wakeups and or rq operations
|
||||||
* and call @func(@arg) on it. This function can use ->on_rq and task_curr()
|
* and call @func(@arg) on it. This function can use task_is_runnable() and
|
||||||
* to work out what the state is, if required. Given that @func can be invoked
|
* task_curr() to work out what the state is, if required. Given that @func
|
||||||
* with a runqueue lock held, it had better be quite lightweight.
|
* can be invoked with a runqueue lock held, it had better be quite
|
||||||
|
* lightweight.
|
||||||
*
|
*
|
||||||
* Returns:
|
* Returns:
|
||||||
* Whatever @func returns
|
* Whatever @func returns
|
||||||
@ -6544,6 +6550,7 @@ static void __sched notrace __schedule(int sched_mode)
|
|||||||
* as a preemption by schedule_debug() and RCU.
|
* as a preemption by schedule_debug() and RCU.
|
||||||
*/
|
*/
|
||||||
bool preempt = sched_mode > SM_NONE;
|
bool preempt = sched_mode > SM_NONE;
|
||||||
|
bool block = false;
|
||||||
unsigned long *switch_count;
|
unsigned long *switch_count;
|
||||||
unsigned long prev_state;
|
unsigned long prev_state;
|
||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
@ -6629,6 +6636,7 @@ static void __sched notrace __schedule(int sched_mode)
|
|||||||
* After this, schedule() must not care about p->state any more.
|
* After this, schedule() must not care about p->state any more.
|
||||||
*/
|
*/
|
||||||
block_task(rq, prev, flags);
|
block_task(rq, prev, flags);
|
||||||
|
block = true;
|
||||||
}
|
}
|
||||||
switch_count = &prev->nvcsw;
|
switch_count = &prev->nvcsw;
|
||||||
}
|
}
|
||||||
@ -6674,7 +6682,7 @@ static void __sched notrace __schedule(int sched_mode)
|
|||||||
|
|
||||||
migrate_disable_switch(rq, prev);
|
migrate_disable_switch(rq, prev);
|
||||||
psi_account_irqtime(rq, prev, next);
|
psi_account_irqtime(rq, prev, next);
|
||||||
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
|
psi_sched_switch(prev, next, block);
|
||||||
|
|
||||||
trace_sched_switch(preempt, prev, next, prev_state);
|
trace_sched_switch(preempt, prev, next, prev_state);
|
||||||
|
|
||||||
@ -7017,20 +7025,20 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(default_wake_function);
|
EXPORT_SYMBOL(default_wake_function);
|
||||||
|
|
||||||
void __setscheduler_prio(struct task_struct *p, int prio)
|
const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
|
||||||
{
|
{
|
||||||
if (dl_prio(prio))
|
if (dl_prio(prio))
|
||||||
p->sched_class = &dl_sched_class;
|
return &dl_sched_class;
|
||||||
else if (rt_prio(prio))
|
|
||||||
p->sched_class = &rt_sched_class;
|
|
||||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
|
||||||
else if (task_should_scx(p))
|
|
||||||
p->sched_class = &ext_sched_class;
|
|
||||||
#endif
|
|
||||||
else
|
|
||||||
p->sched_class = &fair_sched_class;
|
|
||||||
|
|
||||||
p->prio = prio;
|
if (rt_prio(prio))
|
||||||
|
return &rt_sched_class;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||||
|
if (task_should_scx(p))
|
||||||
|
return &ext_sched_class;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return &fair_sched_class;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_RT_MUTEXES
|
#ifdef CONFIG_RT_MUTEXES
|
||||||
@ -7076,7 +7084,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
|||||||
{
|
{
|
||||||
int prio, oldprio, queued, running, queue_flag =
|
int prio, oldprio, queued, running, queue_flag =
|
||||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||||
const struct sched_class *prev_class;
|
const struct sched_class *prev_class, *next_class;
|
||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
|
|
||||||
@ -7134,6 +7142,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
|||||||
queue_flag &= ~DEQUEUE_MOVE;
|
queue_flag &= ~DEQUEUE_MOVE;
|
||||||
|
|
||||||
prev_class = p->sched_class;
|
prev_class = p->sched_class;
|
||||||
|
next_class = __setscheduler_class(p, prio);
|
||||||
|
|
||||||
|
if (prev_class != next_class && p->se.sched_delayed)
|
||||||
|
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||||
|
|
||||||
queued = task_on_rq_queued(p);
|
queued = task_on_rq_queued(p);
|
||||||
running = task_current(rq, p);
|
running = task_current(rq, p);
|
||||||
if (queued)
|
if (queued)
|
||||||
@ -7171,7 +7184,9 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
|||||||
p->rt.timeout = 0;
|
p->rt.timeout = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
__setscheduler_prio(p, prio);
|
p->sched_class = next_class;
|
||||||
|
p->prio = prio;
|
||||||
|
|
||||||
check_class_changing(rq, p, prev_class);
|
check_class_changing(rq, p, prev_class);
|
||||||
|
|
||||||
if (queued)
|
if (queued)
|
||||||
@ -10465,7 +10480,9 @@ void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
|
|||||||
return;
|
return;
|
||||||
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
|
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
|
||||||
return;
|
return;
|
||||||
task_work_add(curr, work, TWA_RESUME);
|
|
||||||
|
/* No page allocation under rq lock */
|
||||||
|
task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sched_mm_cid_exit_signals(struct task_struct *t)
|
void sched_mm_cid_exit_signals(struct task_struct *t)
|
||||||
|
|||||||
@ -2385,7 +2385,7 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
|
|||||||
|
|
||||||
deadline_queue_push_tasks(rq);
|
deadline_queue_push_tasks(rq);
|
||||||
|
|
||||||
if (hrtick_enabled(rq))
|
if (hrtick_enabled_dl(rq))
|
||||||
start_hrtick_dl(rq, &p->dl);
|
start_hrtick_dl(rq, &p->dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -4493,7 +4493,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
|
|||||||
|
|
||||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||||
|
|
||||||
__setscheduler_prio(p, p->prio);
|
p->sched_class = __setscheduler_class(p, p->prio);
|
||||||
check_class_changing(task_rq(p), p, old_class);
|
check_class_changing(task_rq(p), p, old_class);
|
||||||
|
|
||||||
sched_enq_and_set_task(&ctx);
|
sched_enq_and_set_task(&ctx);
|
||||||
@ -5204,7 +5204,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||||
|
|
||||||
p->scx.slice = SCX_SLICE_DFL;
|
p->scx.slice = SCX_SLICE_DFL;
|
||||||
__setscheduler_prio(p, p->prio);
|
p->sched_class = __setscheduler_class(p, p->prio);
|
||||||
check_class_changing(task_rq(p), p, old_class);
|
check_class_changing(task_rq(p), p, old_class);
|
||||||
|
|
||||||
sched_enq_and_set_task(&ctx);
|
sched_enq_and_set_task(&ctx);
|
||||||
|
|||||||
@ -1247,7 +1247,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
|||||||
|
|
||||||
account_cfs_rq_runtime(cfs_rq, delta_exec);
|
account_cfs_rq_runtime(cfs_rq, delta_exec);
|
||||||
|
|
||||||
if (rq->nr_running == 1)
|
if (cfs_rq->nr_running == 1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (resched || did_preempt_short(cfs_rq, curr)) {
|
if (resched || did_preempt_short(cfs_rq, curr)) {
|
||||||
@ -6058,10 +6058,13 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
|||||||
for_each_sched_entity(se) {
|
for_each_sched_entity(se) {
|
||||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||||
|
|
||||||
if (se->on_rq) {
|
/* Handle any unfinished DELAY_DEQUEUE business first. */
|
||||||
SCHED_WARN_ON(se->sched_delayed);
|
if (se->sched_delayed) {
|
||||||
|
int flags = DEQUEUE_SLEEP | DEQUEUE_DELAYED;
|
||||||
|
|
||||||
|
dequeue_entity(qcfs_rq, se, flags);
|
||||||
|
} else if (se->on_rq)
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
|
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
|
||||||
|
|
||||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||||
@ -13174,22 +13177,6 @@ static void attach_task_cfs_rq(struct task_struct *p)
|
|||||||
static void switched_from_fair(struct rq *rq, struct task_struct *p)
|
static void switched_from_fair(struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
detach_task_cfs_rq(p);
|
detach_task_cfs_rq(p);
|
||||||
/*
|
|
||||||
* Since this is called after changing class, this is a little weird
|
|
||||||
* and we cannot use DEQUEUE_DELAYED.
|
|
||||||
*/
|
|
||||||
if (p->se.sched_delayed) {
|
|
||||||
/* First, dequeue it from its new class' structures */
|
|
||||||
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
|
|
||||||
/*
|
|
||||||
* Now, clean up the fair_sched_class side of things
|
|
||||||
* related to sched_delayed being true and that wasn't done
|
|
||||||
* due to the generic dequeue not using DEQUEUE_DELAYED.
|
|
||||||
*/
|
|
||||||
finish_delayed_dequeue_entity(&p->se);
|
|
||||||
p->se.rel_deadline = 0;
|
|
||||||
__block_task(rq, p);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void switched_to_fair(struct rq *rq, struct task_struct *p)
|
static void switched_to_fair(struct rq *rq, struct task_struct *p)
|
||||||
|
|||||||
@ -3800,7 +3800,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
|
|||||||
|
|
||||||
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
|
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
|
||||||
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
|
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
|
||||||
extern void __setscheduler_prio(struct task_struct *p, int prio);
|
extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio);
|
||||||
extern void set_load_weight(struct task_struct *p, bool update_load);
|
extern void set_load_weight(struct task_struct *p, bool update_load);
|
||||||
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
|
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
|
||||||
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
|
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
|
||||||
|
|||||||
@ -119,45 +119,63 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
|
|||||||
/*
|
/*
|
||||||
* PSI tracks state that persists across sleeps, such as iowaits and
|
* PSI tracks state that persists across sleeps, such as iowaits and
|
||||||
* memory stalls. As a result, it has to distinguish between sleeps,
|
* memory stalls. As a result, it has to distinguish between sleeps,
|
||||||
* where a task's runnable state changes, and requeues, where a task
|
* where a task's runnable state changes, and migrations, where a task
|
||||||
* and its state are being moved between CPUs and runqueues.
|
* and its runnable state are being moved between CPUs and runqueues.
|
||||||
|
*
|
||||||
|
* A notable case is a task whose dequeue is delayed. PSI considers
|
||||||
|
* those sleeping, but because they are still on the runqueue they can
|
||||||
|
* go through migration requeues. In this case, *sleeping* states need
|
||||||
|
* to be transferred.
|
||||||
*/
|
*/
|
||||||
static inline void psi_enqueue(struct task_struct *p, bool wakeup)
|
static inline void psi_enqueue(struct task_struct *p, bool migrate)
|
||||||
{
|
{
|
||||||
int clear = 0, set = TSK_RUNNING;
|
int clear = 0, set = 0;
|
||||||
|
|
||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (p->in_memstall)
|
if (p->se.sched_delayed) {
|
||||||
set |= TSK_MEMSTALL_RUNNING;
|
/* CPU migration of "sleeping" task */
|
||||||
|
SCHED_WARN_ON(!migrate);
|
||||||
if (!wakeup) {
|
|
||||||
if (p->in_memstall)
|
if (p->in_memstall)
|
||||||
set |= TSK_MEMSTALL;
|
set |= TSK_MEMSTALL;
|
||||||
|
if (p->in_iowait)
|
||||||
|
set |= TSK_IOWAIT;
|
||||||
|
} else if (migrate) {
|
||||||
|
/* CPU migration of runnable task */
|
||||||
|
set = TSK_RUNNING;
|
||||||
|
if (p->in_memstall)
|
||||||
|
set |= TSK_MEMSTALL | TSK_MEMSTALL_RUNNING;
|
||||||
} else {
|
} else {
|
||||||
|
/* Wakeup of new or sleeping task */
|
||||||
if (p->in_iowait)
|
if (p->in_iowait)
|
||||||
clear |= TSK_IOWAIT;
|
clear |= TSK_IOWAIT;
|
||||||
|
set = TSK_RUNNING;
|
||||||
|
if (p->in_memstall)
|
||||||
|
set |= TSK_MEMSTALL_RUNNING;
|
||||||
}
|
}
|
||||||
|
|
||||||
psi_task_change(p, clear, set);
|
psi_task_change(p, clear, set);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
static inline void psi_dequeue(struct task_struct *p, bool migrate)
|
||||||
{
|
{
|
||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When migrating a task to another CPU, clear all psi
|
||||||
|
* state. The enqueue callback above will work it out.
|
||||||
|
*/
|
||||||
|
if (migrate)
|
||||||
|
psi_task_change(p, p->psi_flags, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A voluntary sleep is a dequeue followed by a task switch. To
|
* A voluntary sleep is a dequeue followed by a task switch. To
|
||||||
* avoid walking all ancestors twice, psi_task_switch() handles
|
* avoid walking all ancestors twice, psi_task_switch() handles
|
||||||
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
|
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
|
||||||
* Do nothing here.
|
* Do nothing here.
|
||||||
*/
|
*/
|
||||||
if (sleep)
|
|
||||||
return;
|
|
||||||
|
|
||||||
psi_task_change(p, p->psi_flags, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
||||||
@ -190,8 +208,8 @@ static inline void psi_sched_switch(struct task_struct *prev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_PSI */
|
#else /* CONFIG_PSI */
|
||||||
static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
|
static inline void psi_enqueue(struct task_struct *p, bool migrate) {}
|
||||||
static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
|
static inline void psi_dequeue(struct task_struct *p, bool migrate) {}
|
||||||
static inline void psi_ttwu_dequeue(struct task_struct *p) {}
|
static inline void psi_ttwu_dequeue(struct task_struct *p) {}
|
||||||
static inline void psi_sched_switch(struct task_struct *prev,
|
static inline void psi_sched_switch(struct task_struct *prev,
|
||||||
struct task_struct *next,
|
struct task_struct *next,
|
||||||
|
|||||||
@ -529,7 +529,7 @@ int __sched_setscheduler(struct task_struct *p,
|
|||||||
{
|
{
|
||||||
int oldpolicy = -1, policy = attr->sched_policy;
|
int oldpolicy = -1, policy = attr->sched_policy;
|
||||||
int retval, oldprio, newprio, queued, running;
|
int retval, oldprio, newprio, queued, running;
|
||||||
const struct sched_class *prev_class;
|
const struct sched_class *prev_class, *next_class;
|
||||||
struct balance_callback *head;
|
struct balance_callback *head;
|
||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
int reset_on_fork;
|
int reset_on_fork;
|
||||||
@ -706,6 +706,12 @@ int __sched_setscheduler(struct task_struct *p,
|
|||||||
queue_flags &= ~DEQUEUE_MOVE;
|
queue_flags &= ~DEQUEUE_MOVE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_class = p->sched_class;
|
||||||
|
next_class = __setscheduler_class(p, newprio);
|
||||||
|
|
||||||
|
if (prev_class != next_class && p->se.sched_delayed)
|
||||||
|
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||||
|
|
||||||
queued = task_on_rq_queued(p);
|
queued = task_on_rq_queued(p);
|
||||||
running = task_current(rq, p);
|
running = task_current(rq, p);
|
||||||
if (queued)
|
if (queued)
|
||||||
@ -713,11 +719,10 @@ int __sched_setscheduler(struct task_struct *p,
|
|||||||
if (running)
|
if (running)
|
||||||
put_prev_task(rq, p);
|
put_prev_task(rq, p);
|
||||||
|
|
||||||
prev_class = p->sched_class;
|
|
||||||
|
|
||||||
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
|
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
|
||||||
__setscheduler_params(p, attr);
|
__setscheduler_params(p, attr);
|
||||||
__setscheduler_prio(p, newprio);
|
p->sched_class = next_class;
|
||||||
|
p->prio = newprio;
|
||||||
}
|
}
|
||||||
__setscheduler_uclamp(p, attr);
|
__setscheduler_uclamp(p, attr);
|
||||||
check_class_changing(rq, p, prev_class);
|
check_class_changing(rq, p, prev_class);
|
||||||
|
|||||||
@ -55,15 +55,26 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
|||||||
enum task_work_notify_mode notify)
|
enum task_work_notify_mode notify)
|
||||||
{
|
{
|
||||||
struct callback_head *head;
|
struct callback_head *head;
|
||||||
|
int flags = notify & TWA_FLAGS;
|
||||||
|
|
||||||
|
notify &= ~TWA_FLAGS;
|
||||||
if (notify == TWA_NMI_CURRENT) {
|
if (notify == TWA_NMI_CURRENT) {
|
||||||
if (WARN_ON_ONCE(task != current))
|
if (WARN_ON_ONCE(task != current))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (!IS_ENABLED(CONFIG_IRQ_WORK))
|
if (!IS_ENABLED(CONFIG_IRQ_WORK))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
} else {
|
} else {
|
||||||
/* record the work call stack in order to print it in KASAN reports */
|
/*
|
||||||
kasan_record_aux_stack(work);
|
* Record the work call stack in order to print it in KASAN
|
||||||
|
* reports.
|
||||||
|
*
|
||||||
|
* Note that stack allocation can fail if TWAF_NO_ALLOC flag
|
||||||
|
* is set and new page is needed to expand the stack buffer.
|
||||||
|
*/
|
||||||
|
if (flags & TWAF_NO_ALLOC)
|
||||||
|
kasan_record_aux_stack_noalloc(work);
|
||||||
|
else
|
||||||
|
kasan_record_aux_stack(work);
|
||||||
}
|
}
|
||||||
|
|
||||||
head = READ_ONCE(task->task_works);
|
head = READ_ONCE(task->task_works);
|
||||||
|
|||||||
@ -434,6 +434,12 @@ static void tick_nohz_kick_task(struct task_struct *tsk)
|
|||||||
* smp_mb__after_spin_lock()
|
* smp_mb__after_spin_lock()
|
||||||
* tick_nohz_task_switch()
|
* tick_nohz_task_switch()
|
||||||
* LOAD p->tick_dep_mask
|
* LOAD p->tick_dep_mask
|
||||||
|
*
|
||||||
|
* XXX given a task picks up the dependency on schedule(), should we
|
||||||
|
* only care about tasks that are currently on the CPU instead of all
|
||||||
|
* that are on the runqueue?
|
||||||
|
*
|
||||||
|
* That is, does this want to be: task_on_cpu() / task_curr()?
|
||||||
*/
|
*/
|
||||||
if (!sched_task_on_rq(tsk))
|
if (!sched_task_on_rq(tsk))
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -1485,7 +1485,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
|
|||||||
/* reset the max latency */
|
/* reset the max latency */
|
||||||
tr->max_latency = 0;
|
tr->max_latency = 0;
|
||||||
|
|
||||||
while (p->on_rq) {
|
while (task_is_runnable(p)) {
|
||||||
/*
|
/*
|
||||||
* Sleep to make sure the -deadline thread is asleep too.
|
* Sleep to make sure the -deadline thread is asleep too.
|
||||||
* On virtual machines we can't rely on timings,
|
* On virtual machines we can't rely on timings,
|
||||||
|
|||||||
@ -6387,7 +6387,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
|
|||||||
|
|
||||||
WRITE_ONCE(vcpu->scheduled_out, true);
|
WRITE_ONCE(vcpu->scheduled_out, true);
|
||||||
|
|
||||||
if (current->on_rq && vcpu->wants_to_run) {
|
if (task_is_runnable(current) && vcpu->wants_to_run) {
|
||||||
WRITE_ONCE(vcpu->preempted, true);
|
WRITE_ONCE(vcpu->preempted, true);
|
||||||
WRITE_ONCE(vcpu->ready, true);
|
WRITE_ONCE(vcpu->ready, true);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user