Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Mostly simple overlapping changes. For example, David Ahern's adjacency list revamp in 'net-next' conflicted with an adjacency list traversal bug fix in 'net'. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
+49
-28
@@ -64,6 +64,9 @@
|
||||
#include <linux/file.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/cgroup.h>
|
||||
|
||||
/*
|
||||
* pidlists linger the following amount before being destroyed. The goal
|
||||
* is avoiding frequent destruction in the middle of consecutive read calls
|
||||
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
|
||||
struct cgroup *cgrp = &root->cgrp;
|
||||
struct cgrp_cset_link *link, *tmp_link;
|
||||
|
||||
trace_cgroup_destroy_root(root);
|
||||
|
||||
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
|
||||
|
||||
BUG_ON(atomic_read(&root->nr_cgrps));
|
||||
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
|
||||
strcpy(root->release_agent_path, opts.release_agent);
|
||||
spin_unlock(&release_agent_path_lock);
|
||||
}
|
||||
|
||||
trace_cgroup_remount(root);
|
||||
|
||||
out_unlock:
|
||||
kfree(opts.release_agent);
|
||||
kfree(opts.name);
|
||||
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
|
||||
if (ret)
|
||||
goto destroy_root;
|
||||
|
||||
trace_cgroup_setup_root(root);
|
||||
|
||||
/*
|
||||
* There must be no failure case after here, since rebinding takes
|
||||
* care of subsystems' refcounts, which are explicitly dropped in
|
||||
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
|
||||
.fs_flags = FS_USERNS_MOUNT,
|
||||
};
|
||||
|
||||
static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||
struct cgroup_namespace *ns)
|
||||
static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||
struct cgroup_namespace *ns)
|
||||
{
|
||||
struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
|
||||
int ret;
|
||||
|
||||
ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
|
||||
if (ret < 0 || ret >= buflen)
|
||||
return NULL;
|
||||
return buf;
|
||||
return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
|
||||
}
|
||||
|
||||
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||
struct cgroup_namespace *ns)
|
||||
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||
struct cgroup_namespace *ns)
|
||||
{
|
||||
char *ret;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
spin_lock_irq(&css_set_lock);
|
||||
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
|
||||
*
|
||||
* Return value is the same as kernfs_path().
|
||||
*/
|
||||
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
|
||||
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
|
||||
{
|
||||
struct cgroup_root *root;
|
||||
struct cgroup *cgrp;
|
||||
int hierarchy_id = 1;
|
||||
char *path = NULL;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
spin_lock_irq(&css_set_lock);
|
||||
@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
|
||||
|
||||
if (root) {
|
||||
cgrp = task_cgroup_from_root(task, root);
|
||||
path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
|
||||
ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
|
||||
} else {
|
||||
/* if no hierarchy exists, everyone is in "/" */
|
||||
if (strlcpy(buf, "/", buflen) < buflen)
|
||||
path = buf;
|
||||
ret = strlcpy(buf, "/", buflen);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
return path;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_cgroup_path);
|
||||
|
||||
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
|
||||
ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
|
||||
|
||||
cgroup_migrate_finish(&preloaded_csets);
|
||||
|
||||
if (!ret)
|
||||
trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3611,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
|
||||
mutex_lock(&cgroup_mutex);
|
||||
|
||||
ret = kernfs_rename(kn, new_parent, new_name_str);
|
||||
if (!ret)
|
||||
trace_cgroup_rename(cgrp);
|
||||
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
|
||||
@@ -4381,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
|
||||
|
||||
if (task) {
|
||||
ret = cgroup_migrate(task, false, to->root);
|
||||
if (!ret)
|
||||
trace_cgroup_transfer_tasks(to, task, false);
|
||||
put_task_struct(task);
|
||||
}
|
||||
} while (task && !ret);
|
||||
@@ -5046,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
|
||||
ss->css_released(css);
|
||||
} else {
|
||||
/* cgroup release path */
|
||||
trace_cgroup_release(cgrp);
|
||||
|
||||
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
|
||||
cgrp->id = -1;
|
||||
|
||||
@@ -5332,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
|
||||
if (ret)
|
||||
goto out_destroy;
|
||||
|
||||
trace_cgroup_mkdir(cgrp);
|
||||
|
||||
/* let's create and online css's */
|
||||
kernfs_activate(kn);
|
||||
|
||||
@@ -5507,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
|
||||
|
||||
ret = cgroup_destroy_locked(cgrp);
|
||||
|
||||
if (!ret)
|
||||
trace_cgroup_rmdir(cgrp);
|
||||
|
||||
cgroup_kn_unlock(kn);
|
||||
return ret;
|
||||
}
|
||||
@@ -5743,7 +5763,7 @@ core_initcall(cgroup_wq_init);
|
||||
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *tsk)
|
||||
{
|
||||
char *buf, *path;
|
||||
char *buf;
|
||||
int retval;
|
||||
struct cgroup_root *root;
|
||||
|
||||
@@ -5786,17 +5806,17 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
|
||||
* " (deleted)" is appended to the cgroup path.
|
||||
*/
|
||||
if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
|
||||
path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
|
||||
retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
|
||||
current->nsproxy->cgroup_ns);
|
||||
if (!path) {
|
||||
if (retval >= PATH_MAX)
|
||||
retval = -ENAMETOOLONG;
|
||||
if (retval < 0)
|
||||
goto out_unlock;
|
||||
}
|
||||
} else {
|
||||
path = "/";
|
||||
}
|
||||
|
||||
seq_puts(m, path);
|
||||
seq_puts(m, buf);
|
||||
} else {
|
||||
seq_puts(m, "/");
|
||||
}
|
||||
|
||||
if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
|
||||
seq_puts(m, " (deleted)\n");
|
||||
@@ -6062,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work)
|
||||
{
|
||||
struct cgroup *cgrp =
|
||||
container_of(work, struct cgroup, release_agent_work);
|
||||
char *pathbuf = NULL, *agentbuf = NULL, *path;
|
||||
char *pathbuf = NULL, *agentbuf = NULL;
|
||||
char *argv[3], *envp[3];
|
||||
int ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
|
||||
@@ -6073,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work)
|
||||
goto out;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
|
||||
ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
if (!path)
|
||||
if (ret < 0 || ret >= PATH_MAX)
|
||||
goto out;
|
||||
|
||||
argv[0] = agentbuf;
|
||||
argv[1] = path;
|
||||
argv[1] = pathbuf;
|
||||
argv[2] = NULL;
|
||||
|
||||
/* minimal command environment */
|
||||
|
||||
+1
-1
@@ -228,7 +228,7 @@ static struct {
|
||||
.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
.dep_map = {.name = "cpu_hotplug.lock" },
|
||||
.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
+7
-6
@@ -2715,7 +2715,7 @@ void __cpuset_memory_pressure_bump(void)
|
||||
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *tsk)
|
||||
{
|
||||
char *buf, *p;
|
||||
char *buf;
|
||||
struct cgroup_subsys_state *css;
|
||||
int retval;
|
||||
|
||||
@@ -2724,14 +2724,15 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
|
||||
if (!buf)
|
||||
goto out;
|
||||
|
||||
retval = -ENAMETOOLONG;
|
||||
css = task_get_css(tsk, cpuset_cgrp_id);
|
||||
p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
|
||||
current->nsproxy->cgroup_ns);
|
||||
retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
|
||||
current->nsproxy->cgroup_ns);
|
||||
css_put(css);
|
||||
if (!p)
|
||||
if (retval >= PATH_MAX)
|
||||
retval = -ENAMETOOLONG;
|
||||
if (retval < 0)
|
||||
goto out_free;
|
||||
seq_puts(m, p);
|
||||
seq_puts(m, buf);
|
||||
seq_putc(m, '\n');
|
||||
retval = 0;
|
||||
out_free:
|
||||
|
||||
+17
-6
@@ -1960,6 +1960,12 @@ void perf_event_disable(struct perf_event *event)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_disable);
|
||||
|
||||
void perf_event_disable_inatomic(struct perf_event *event)
|
||||
{
|
||||
event->pending_disable = 1;
|
||||
irq_work_queue(&event->pending);
|
||||
}
|
||||
|
||||
static void perf_set_shadow_time(struct perf_event *event,
|
||||
struct perf_event_context *ctx,
|
||||
u64 tstamp)
|
||||
@@ -7075,8 +7081,8 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
if (events && atomic_dec_and_test(&event->event_limit)) {
|
||||
ret = 1;
|
||||
event->pending_kill = POLL_HUP;
|
||||
event->pending_disable = 1;
|
||||
irq_work_queue(&event->pending);
|
||||
|
||||
perf_event_disable_inatomic(event);
|
||||
}
|
||||
|
||||
READ_ONCE(event->overflow_handler)(event, data, regs);
|
||||
@@ -8855,7 +8861,10 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
|
||||
|
||||
void perf_pmu_unregister(struct pmu *pmu)
|
||||
{
|
||||
int remove_device;
|
||||
|
||||
mutex_lock(&pmus_lock);
|
||||
remove_device = pmu_bus_running;
|
||||
list_del_rcu(&pmu->entry);
|
||||
mutex_unlock(&pmus_lock);
|
||||
|
||||
@@ -8869,10 +8878,12 @@ void perf_pmu_unregister(struct pmu *pmu)
|
||||
free_percpu(pmu->pmu_disable_count);
|
||||
if (pmu->type >= PERF_TYPE_MAX)
|
||||
idr_remove(&pmu_idr, pmu->type);
|
||||
if (pmu->nr_addr_filters)
|
||||
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
|
||||
device_del(pmu->dev);
|
||||
put_device(pmu->dev);
|
||||
if (remove_device) {
|
||||
if (pmu->nr_addr_filters)
|
||||
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
|
||||
device_del(pmu->dev);
|
||||
put_device(pmu->dev);
|
||||
}
|
||||
free_pmu_context(pmu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
|
||||
|
||||
@@ -300,7 +300,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
|
||||
|
||||
retry:
|
||||
/* Read the page with vaddr into memory */
|
||||
ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
|
||||
ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
|
||||
&vma);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
@@ -1710,7 +1711,8 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
* but we treat this as a 'remote' access since it is
|
||||
* essentially a kernel access to the memory.
|
||||
*/
|
||||
result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
|
||||
result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
|
||||
NULL);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
|
||||
+5
-2
@@ -547,7 +547,8 @@ free_tsk:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||
static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
struct mm_struct *oldmm)
|
||||
{
|
||||
struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
|
||||
struct rb_node **rb_link, *rb_parent;
|
||||
@@ -1441,7 +1442,8 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
|
||||
* parts of the process environment (as per the clone
|
||||
* flags). The actual kick-off is left to the caller.
|
||||
*/
|
||||
static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
static __latent_entropy struct task_struct *copy_process(
|
||||
unsigned long clone_flags,
|
||||
unsigned long stack_start,
|
||||
unsigned long stack_size,
|
||||
int __user *child_tidptr,
|
||||
@@ -1926,6 +1928,7 @@ long _do_fork(unsigned long clone_flags,
|
||||
|
||||
p = copy_process(clone_flags, stack_start, stack_size,
|
||||
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
|
||||
add_latent_entropy();
|
||||
/*
|
||||
* Do this prior waking up the new thread - the thread pointer
|
||||
* might get invalid after that point, if the thread exits quickly.
|
||||
|
||||
@@ -721,6 +721,7 @@ int irq_set_parent(int irq, int parent_irq)
|
||||
irq_put_desc_unlock(desc, flags);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_set_parent);
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
+8
-1
@@ -53,8 +53,15 @@ void notrace __sanitizer_cov_trace_pc(void)
|
||||
/*
|
||||
* We are interested in code coverage as a function of a syscall inputs,
|
||||
* so we ignore code executed in interrupts.
|
||||
* The checks for whether we are in an interrupt are open-coded, because
|
||||
* 1. We can't use in_interrupt() here, since it also returns true
|
||||
* when we are inside local_bh_disable() section.
|
||||
* 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()),
|
||||
* since that leads to slower generated code (three separate tests,
|
||||
* one for each of the flags).
|
||||
*/
|
||||
if (!t || in_interrupt())
|
||||
if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET
|
||||
| NMI_MASK)))
|
||||
return;
|
||||
mode = READ_ONCE(t->kcov_mode);
|
||||
if (mode == KCOV_MODE_TRACE) {
|
||||
|
||||
@@ -498,9 +498,9 @@ static int enter_state(suspend_state_t state)
|
||||
|
||||
#ifndef CONFIG_SUSPEND_SKIP_SYNC
|
||||
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
|
||||
printk(KERN_INFO "PM: Syncing filesystems ... ");
|
||||
pr_info("PM: Syncing filesystems ... ");
|
||||
sys_sync();
|
||||
printk("done.\n");
|
||||
pr_cont("done.\n");
|
||||
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1769,6 +1769,10 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c
|
||||
cont_flush();
|
||||
}
|
||||
|
||||
/* Skip empty continuation lines that couldn't be added - they just flush */
|
||||
if (!text_len && (lflags & LOG_CONT))
|
||||
return 0;
|
||||
|
||||
/* If it doesn't end in a newline, try to buffer the current line */
|
||||
if (!(lflags & LOG_NEWLINE)) {
|
||||
if (cont_add(facility, level, lflags, text, text_len))
|
||||
|
||||
+10
-6
@@ -537,7 +537,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
|
||||
int this_len, retval;
|
||||
|
||||
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
|
||||
retval = access_process_vm(tsk, src, buf, this_len, 0);
|
||||
retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
|
||||
if (!retval) {
|
||||
if (copied)
|
||||
break;
|
||||
@@ -564,7 +564,8 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
|
||||
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
|
||||
if (copy_from_user(buf, src, this_len))
|
||||
return -EFAULT;
|
||||
retval = access_process_vm(tsk, dst, buf, this_len, 1);
|
||||
retval = access_process_vm(tsk, dst, buf, this_len,
|
||||
FOLL_FORCE | FOLL_WRITE);
|
||||
if (!retval) {
|
||||
if (copied)
|
||||
break;
|
||||
@@ -1127,7 +1128,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
|
||||
unsigned long tmp;
|
||||
int copied;
|
||||
|
||||
copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0);
|
||||
copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
|
||||
if (copied != sizeof(tmp))
|
||||
return -EIO;
|
||||
return put_user(tmp, (unsigned long __user *)data);
|
||||
@@ -1138,7 +1139,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
|
||||
{
|
||||
int copied;
|
||||
|
||||
copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
|
||||
copied = access_process_vm(tsk, addr, &data, sizeof(data),
|
||||
FOLL_FORCE | FOLL_WRITE);
|
||||
return (copied == sizeof(data)) ? 0 : -EIO;
|
||||
}
|
||||
|
||||
@@ -1155,7 +1157,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
|
||||
switch (request) {
|
||||
case PTRACE_PEEKTEXT:
|
||||
case PTRACE_PEEKDATA:
|
||||
ret = access_process_vm(child, addr, &word, sizeof(word), 0);
|
||||
ret = access_process_vm(child, addr, &word, sizeof(word),
|
||||
FOLL_FORCE);
|
||||
if (ret != sizeof(word))
|
||||
ret = -EIO;
|
||||
else
|
||||
@@ -1164,7 +1167,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
|
||||
|
||||
case PTRACE_POKETEXT:
|
||||
case PTRACE_POKEDATA:
|
||||
ret = access_process_vm(child, addr, &data, sizeof(data), 1);
|
||||
ret = access_process_vm(child, addr, &data, sizeof(data),
|
||||
FOLL_FORCE | FOLL_WRITE);
|
||||
ret = (ret != sizeof(data) ? -EIO : 0);
|
||||
break;
|
||||
|
||||
|
||||
+1
-1
@@ -170,7 +170,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
|
||||
false));
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_sched_ctrlblk);
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk);
|
||||
|
||||
+1
-1
@@ -3013,7 +3013,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
|
||||
/*
|
||||
* Do RCU core processing for the current CPU.
|
||||
*/
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
|
||||
@@ -7515,11 +7515,27 @@ static struct kmem_cache *task_group_cache __read_mostly;
|
||||
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
|
||||
|
||||
#define WAIT_TABLE_BITS 8
|
||||
#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
|
||||
static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
|
||||
|
||||
wait_queue_head_t *bit_waitqueue(void *word, int bit)
|
||||
{
|
||||
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
|
||||
unsigned long val = (unsigned long)word << shift | bit;
|
||||
|
||||
return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
|
||||
}
|
||||
EXPORT_SYMBOL(bit_waitqueue);
|
||||
|
||||
void __init sched_init(void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long alloc_size = 0, ptr;
|
||||
|
||||
for (i = 0; i < WAIT_TABLE_SIZE; i++)
|
||||
init_waitqueue_head(bit_wait_table + i);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
|
||||
#endif
|
||||
|
||||
@@ -415,7 +415,8 @@ static char *task_group_path(struct task_group *tg)
|
||||
if (autogroup_path(tg, group_path, PATH_MAX))
|
||||
return group_path;
|
||||
|
||||
return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
|
||||
cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
|
||||
return group_path;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
+17
-8
@@ -690,7 +690,14 @@ void init_entity_runnable_average(struct sched_entity *se)
|
||||
* will definitely be update (after enqueue).
|
||||
*/
|
||||
sa->period_contrib = 1023;
|
||||
sa->load_avg = scale_load_down(se->load.weight);
|
||||
/*
|
||||
* Tasks are intialized with full load to be seen as heavy tasks until
|
||||
* they get a chance to stabilize to their real load level.
|
||||
* Group entities are intialized with zero load to reflect the fact that
|
||||
* nothing has been attached to the task group yet.
|
||||
*/
|
||||
if (entity_is_task(se))
|
||||
sa->load_avg = scale_load_down(se->load.weight);
|
||||
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
|
||||
/*
|
||||
* At this point, util_avg won't be used in select_task_rq_fair anyway
|
||||
@@ -5471,13 +5478,18 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
|
||||
*/
|
||||
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
{
|
||||
struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
|
||||
u64 avg_idle = this_rq()->avg_idle;
|
||||
u64 avg_cost = this_sd->avg_scan_cost;
|
||||
struct sched_domain *this_sd;
|
||||
u64 avg_cost, avg_idle = this_rq()->avg_idle;
|
||||
u64 time, cost;
|
||||
s64 delta;
|
||||
int cpu, wrap;
|
||||
|
||||
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
|
||||
if (!this_sd)
|
||||
return -1;
|
||||
|
||||
avg_cost = this_sd->avg_scan_cost;
|
||||
|
||||
/*
|
||||
* Due to large variance we need a large fuzz factor; hackbench in
|
||||
* particularly is sensitive here.
|
||||
@@ -8522,7 +8534,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
|
||||
* run_rebalance_domains is triggered when needed from the scheduler tick.
|
||||
* Also triggered for nohz idle balancing (with nohz_balancing_kick set).
|
||||
*/
|
||||
static void run_rebalance_domains(struct softirq_action *h)
|
||||
static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
|
||||
{
|
||||
struct rq *this_rq = this_rq();
|
||||
enum cpu_idle_type idle = this_rq->idle_balance ?
|
||||
@@ -8827,7 +8839,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
{
|
||||
struct sched_entity *se;
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct rq *rq;
|
||||
int i;
|
||||
|
||||
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
|
||||
@@ -8842,8 +8853,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
rq = cpu_rq(i);
|
||||
|
||||
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
||||
GFP_KERNEL, cpu_to_node(i));
|
||||
if (!cfs_rq)
|
||||
|
||||
@@ -480,16 +480,6 @@ void wake_up_bit(void *word, int bit)
|
||||
}
|
||||
EXPORT_SYMBOL(wake_up_bit);
|
||||
|
||||
wait_queue_head_t *bit_waitqueue(void *word, int bit)
|
||||
{
|
||||
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
|
||||
const struct zone *zone = page_zone(virt_to_page(word));
|
||||
unsigned long val = (unsigned long)word << shift | bit;
|
||||
|
||||
return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
|
||||
}
|
||||
EXPORT_SYMBOL(bit_waitqueue);
|
||||
|
||||
/*
|
||||
* Manipulate the atomic_t address to produce a better bit waitqueue table hash
|
||||
* index (we're keying off bit -1, but that would produce a horrible hash
|
||||
|
||||
+3
-3
@@ -58,7 +58,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
|
||||
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
|
||||
|
||||
const char * const softirq_to_name[NR_SOFTIRQS] = {
|
||||
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
|
||||
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
|
||||
"TASKLET", "SCHED", "HRTIMER", "RCU"
|
||||
};
|
||||
|
||||
@@ -496,7 +496,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
|
||||
}
|
||||
EXPORT_SYMBOL(__tasklet_hi_schedule_first);
|
||||
|
||||
static void tasklet_action(struct softirq_action *a)
|
||||
static __latent_entropy void tasklet_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
@@ -532,7 +532,7 @@ static void tasklet_action(struct softirq_action *a)
|
||||
}
|
||||
}
|
||||
|
||||
static void tasklet_hi_action(struct softirq_action *a)
|
||||
static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
|
||||
@@ -542,7 +542,6 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
|
||||
static int alarm_timer_create(struct k_itimer *new_timer)
|
||||
{
|
||||
enum alarmtimer_type type;
|
||||
struct alarm_base *base;
|
||||
|
||||
if (!alarmtimer_get_rtcdev())
|
||||
return -ENOTSUPP;
|
||||
@@ -551,7 +550,6 @@ static int alarm_timer_create(struct k_itimer *new_timer)
|
||||
return -EPERM;
|
||||
|
||||
type = clock2alarm(new_timer->it_clock);
|
||||
base = &alarm_bases[type];
|
||||
alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
+47
-33
@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags)
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
static inline struct timer_base *
|
||||
__get_target_base(struct timer_base *base, unsigned tflags)
|
||||
get_target_base(struct timer_base *base, unsigned tflags)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if ((tflags & TIMER_PINNED) || !base->migration_enabled)
|
||||
@@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags)
|
||||
|
||||
static inline void forward_timer_base(struct timer_base *base)
|
||||
{
|
||||
unsigned long jnow = READ_ONCE(jiffies);
|
||||
|
||||
/*
|
||||
* We only forward the base when it's idle and we have a delta between
|
||||
* base clock and jiffies.
|
||||
*/
|
||||
if (!base->is_idle || (long) (jiffies - base->clk) < 2)
|
||||
if (!base->is_idle || (long) (jnow - base->clk) < 2)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the next expiry value is > jiffies, then we fast forward to
|
||||
* jiffies otherwise we forward to the next expiry value.
|
||||
*/
|
||||
if (time_after(base->next_expiry, jiffies))
|
||||
base->clk = jiffies;
|
||||
if (time_after(base->next_expiry, jnow))
|
||||
base->clk = jnow;
|
||||
else
|
||||
base->clk = base->next_expiry;
|
||||
}
|
||||
#else
|
||||
static inline struct timer_base *
|
||||
__get_target_base(struct timer_base *base, unsigned tflags)
|
||||
get_target_base(struct timer_base *base, unsigned tflags)
|
||||
{
|
||||
return get_timer_this_cpu_base(tflags);
|
||||
}
|
||||
@@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags)
|
||||
static inline void forward_timer_base(struct timer_base *base) { }
|
||||
#endif
|
||||
|
||||
static inline struct timer_base *
|
||||
get_target_base(struct timer_base *base, unsigned tflags)
|
||||
{
|
||||
struct timer_base *target = __get_target_base(base, tflags);
|
||||
|
||||
forward_timer_base(target);
|
||||
return target;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
|
||||
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
|
||||
{
|
||||
for (;;) {
|
||||
struct timer_base *base;
|
||||
u32 tf = timer->flags;
|
||||
u32 tf;
|
||||
|
||||
/*
|
||||
* We need to use READ_ONCE() here, otherwise the compiler
|
||||
* might re-read @tf between the check for TIMER_MIGRATING
|
||||
* and spin_lock().
|
||||
*/
|
||||
tf = READ_ONCE(timer->flags);
|
||||
|
||||
if (!(tf & TIMER_MIGRATING)) {
|
||||
base = get_timer_base(tf);
|
||||
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
unsigned long clk = 0, flags;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!timer->function);
|
||||
|
||||
/*
|
||||
* This is a common optimization triggered by the networking code - if
|
||||
* the timer is re-modified to have the same timeout or ends up in the
|
||||
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
if (timer_pending(timer)) {
|
||||
if (timer->expires == expires)
|
||||
return 1;
|
||||
/*
|
||||
* Take the current timer_jiffies of base, but without holding
|
||||
* the lock!
|
||||
*/
|
||||
base = get_timer_base(timer->flags);
|
||||
clk = base->clk;
|
||||
|
||||
/*
|
||||
* We lock timer base and calculate the bucket index right
|
||||
* here. If the timer ends up in the same bucket, then we
|
||||
* just update the expiry time and avoid the whole
|
||||
* dequeue/enqueue dance.
|
||||
*/
|
||||
base = lock_timer_base(timer, &flags);
|
||||
|
||||
clk = base->clk;
|
||||
idx = calc_wheel_index(expires, clk);
|
||||
|
||||
/*
|
||||
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
*/
|
||||
if (idx == timer_get_idx(timer)) {
|
||||
timer->expires = expires;
|
||||
return 1;
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
} else {
|
||||
base = lock_timer_base(timer, &flags);
|
||||
}
|
||||
|
||||
timer_stats_timer_set_start_info(timer);
|
||||
BUG_ON(!timer->function);
|
||||
|
||||
base = lock_timer_base(timer, &flags);
|
||||
|
||||
ret = detach_if_pending(timer, base, false);
|
||||
if (!ret && pending_only)
|
||||
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to forward a stale timer base clock */
|
||||
forward_timer_base(base);
|
||||
|
||||
timer->expires = expires;
|
||||
/*
|
||||
* If 'idx' was calculated above and the base time did not advance
|
||||
* between calculating 'idx' and taking the lock, only enqueue_timer()
|
||||
* and trigger_dyntick_cpu() is required. Otherwise we need to
|
||||
* (re)calculate the wheel index via internal_add_timer().
|
||||
* between calculating 'idx' and possibly switching the base, only
|
||||
* enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
|
||||
* we need to (re)calculate the wheel index via
|
||||
* internal_add_timer().
|
||||
*/
|
||||
if (idx != UINT_MAX && clk == base->clk) {
|
||||
enqueue_timer(base, timer, idx);
|
||||
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
|
||||
is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
|
||||
base->next_expiry = nextevt;
|
||||
/*
|
||||
* We have a fresh next event. Check whether we can forward the base:
|
||||
* We have a fresh next event. Check whether we can forward the
|
||||
* base. We can only do that when @basej is past base->clk
|
||||
* otherwise we might rewind base->clk.
|
||||
*/
|
||||
if (time_after(nextevt, jiffies))
|
||||
base->clk = jiffies;
|
||||
else if (time_after(nextevt, base->clk))
|
||||
base->clk = nextevt;
|
||||
if (time_after(basej, base->clk)) {
|
||||
if (time_after(nextevt, basej))
|
||||
base->clk = basej;
|
||||
else if (time_after(nextevt, base->clk))
|
||||
base->clk = nextevt;
|
||||
}
|
||||
|
||||
if (time_before_eq(nextevt, basej)) {
|
||||
expires = basem;
|
||||
@@ -1633,7 +1647,7 @@ static inline void __run_timers(struct timer_base *base)
|
||||
/*
|
||||
* This function runs timers and the timer-tq in bottom half context.
|
||||
*/
|
||||
static void run_timer_softirq(struct softirq_action *h)
|
||||
static __latent_entropy void run_timer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user