Merge branch 'bpf: Handle MEM_RCU type properly'
Yonghong Song says: ==================== Patch set [1] added rcu support for bpf programs. In [1], a rcu pointer is considered to be trusted and not null. This is actually not true in some cases. The rcu pointer could be null, and for non-null rcu pointer, it may have reference count of 0. This small patch set fixed this problem. Patch 1 is the kernel fix. Patch 2 adjusted selftests properly. Patch 3 added documentation for newly-introduced KF_RCU flag. [1] https://lore.kernel.org/all/20221124053201.2372298-1-yhs@fb.com/ Changelogs: v1 -> v2: - rcu ptr could be NULL. - non_null_rcu_ptr->rcu_field can be marked as MEM_RCU as well. - Adjust the code to avoid existing error message change. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -191,6 +191,15 @@ rebooting or panicking. Due to this additional restrictions apply to these
|
||||
calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
|
||||
added later.
|
||||
|
||||
2.4.8 KF_RCU flag
|
||||
-----------------
|
||||
|
||||
The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
|
||||
When used together with KF_ACQUIRE, it indicates the kfunc should have a
|
||||
single argument which must be a trusted argument or a MEM_RCU pointer.
|
||||
The argument may have reference count of 0 and the kfunc must take this
|
||||
into consideration.
|
||||
|
||||
2.5 Registering the kfuncs
|
||||
--------------------------
|
||||
|
||||
|
||||
@@ -682,7 +682,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
|
||||
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
|
||||
|
||||
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
|
||||
{
|
||||
|
||||
@@ -70,6 +70,7 @@
|
||||
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
|
||||
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
|
||||
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
|
||||
#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
|
||||
|
||||
/*
|
||||
* Return the name of the passed struct, if exists, or halt the build if for
|
||||
|
||||
@@ -1837,6 +1837,19 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
|
||||
* acquired by this kfunc which is not stored in a map as a kptr, must be
|
||||
* released by calling bpf_task_release().
|
||||
* @p: The task on which a reference is being acquired.
|
||||
*/
|
||||
struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
|
||||
{
|
||||
if (!refcount_inc_not_zero(&p->rcu_users))
|
||||
return NULL;
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task
|
||||
* kptr acquired by this kfunc which is not subsequently stored in a map, must
|
||||
@@ -2013,6 +2026,7 @@ BTF_ID_FLAGS(func, bpf_list_push_back)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
|
||||
#ifdef CONFIG_CGROUPS
|
||||
|
||||
+32
-13
@@ -4275,7 +4275,7 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
|
||||
return true;
|
||||
|
||||
/* If a register is not referenced, it is trusted if it has the
|
||||
* MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
|
||||
* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
|
||||
* other type modifiers may be safe, but we elect to take an opt-in
|
||||
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
|
||||
* not.
|
||||
@@ -4287,6 +4287,11 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
|
||||
!bpf_type_has_unsafe_modifiers(reg->type);
|
||||
}
|
||||
|
||||
static bool is_rcu_reg(const struct bpf_reg_state *reg)
|
||||
{
|
||||
return reg->type & MEM_RCU;
|
||||
}
|
||||
|
||||
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
|
||||
const struct bpf_reg_state *reg,
|
||||
int off, int size, bool strict)
|
||||
@@ -4785,14 +4790,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
|
||||
|
||||
if (flag & MEM_RCU) {
|
||||
/* Mark value register as MEM_RCU only if it is protected by
|
||||
* bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
|
||||
* bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
|
||||
* itself can already indicate trustedness inside the rcu
|
||||
* read lock region. Also mark it as PTR_TRUSTED.
|
||||
* read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
|
||||
* it could be null in some cases.
|
||||
*/
|
||||
if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
|
||||
if (!env->cur_state->active_rcu_lock ||
|
||||
!(is_trusted_reg(reg) || is_rcu_reg(reg)))
|
||||
flag &= ~MEM_RCU;
|
||||
else
|
||||
flag |= PTR_TRUSTED;
|
||||
flag |= PTR_MAYBE_NULL;
|
||||
} else if (reg->type & MEM_RCU) {
|
||||
/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
|
||||
* with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
|
||||
@@ -5957,7 +5964,7 @@ static const struct bpf_reg_types btf_ptr_types = {
|
||||
.types = {
|
||||
PTR_TO_BTF_ID,
|
||||
PTR_TO_BTF_ID | PTR_TRUSTED,
|
||||
PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
|
||||
PTR_TO_BTF_ID | MEM_RCU,
|
||||
},
|
||||
};
|
||||
static const struct bpf_reg_types percpu_btf_ptr_types = {
|
||||
@@ -6136,7 +6143,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
|
||||
case PTR_TO_BTF_ID:
|
||||
case PTR_TO_BTF_ID | MEM_ALLOC:
|
||||
case PTR_TO_BTF_ID | PTR_TRUSTED:
|
||||
case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
|
||||
case PTR_TO_BTF_ID | MEM_RCU:
|
||||
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
|
||||
/* When referenced PTR_TO_BTF_ID is passed to release function,
|
||||
* it's fixed offset must be 0. In the other cases, fixed offset
|
||||
@@ -8038,6 +8045,11 @@ static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
|
||||
return meta->kfunc_flags & KF_DESTRUCTIVE;
|
||||
}
|
||||
|
||||
static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
|
||||
{
|
||||
return meta->kfunc_flags & KF_RCU;
|
||||
}
|
||||
|
||||
static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
|
||||
{
|
||||
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
|
||||
@@ -8722,13 +8734,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
|
||||
switch (kf_arg_type) {
|
||||
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
|
||||
case KF_ARG_PTR_TO_BTF_ID:
|
||||
if (!is_kfunc_trusted_args(meta))
|
||||
if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
|
||||
break;
|
||||
|
||||
if (!is_trusted_reg(reg)) {
|
||||
verbose(env, "R%d must be referenced or trusted\n", regno);
|
||||
return -EINVAL;
|
||||
if (!is_kfunc_rcu(meta)) {
|
||||
verbose(env, "R%d must be referenced or trusted\n", regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!is_rcu_reg(reg)) {
|
||||
verbose(env, "R%d must be a rcu pointer\n", regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
fallthrough;
|
||||
case KF_ARG_PTR_TO_CTX:
|
||||
/* Trusted arguments have the same offset checks as release arguments */
|
||||
@@ -8839,7 +8858,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
|
||||
case KF_ARG_PTR_TO_BTF_ID:
|
||||
/* Only base_type is checked, further checks are done here */
|
||||
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
|
||||
bpf_type_has_unsafe_modifiers(reg->type)) &&
|
||||
(bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
|
||||
!reg2btf_ids[base_type(reg->type)]) {
|
||||
verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
|
||||
verbose(env, "expected %s or socket\n",
|
||||
@@ -8954,7 +8973,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
} else if (rcu_unlock) {
|
||||
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
|
||||
if (reg->type & MEM_RCU) {
|
||||
reg->type &= ~(MEM_RCU | PTR_TRUSTED);
|
||||
reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
|
||||
reg->type |= PTR_UNTRUSTED;
|
||||
}
|
||||
}));
|
||||
@@ -11294,7 +11313,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
|
||||
bool is_null)
|
||||
{
|
||||
if (type_may_be_null(reg->type) && reg->id == id &&
|
||||
!WARN_ON_ONCE(!reg->id)) {
|
||||
(is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
|
||||
/* Old offset (both fixed and variable parts) should have been
|
||||
* known-zero, because we don't allow pointer arithmetic on
|
||||
* pointers that might be NULL. If we see this happening, don't
|
||||
|
||||
@@ -23,13 +23,14 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
|
||||
void bpf_key_put(struct bpf_key *key) __ksym;
|
||||
void bpf_rcu_read_lock(void) __ksym;
|
||||
void bpf_rcu_read_unlock(void) __ksym;
|
||||
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
|
||||
struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym;
|
||||
void bpf_task_release(struct task_struct *p) __ksym;
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
|
||||
int get_cgroup_id(void *ctx)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct css_set *cgroups;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
if (task->pid != target_pid)
|
||||
@@ -37,7 +38,11 @@ int get_cgroup_id(void *ctx)
|
||||
|
||||
/* simulate bpf_get_current_cgroup_id() helper */
|
||||
bpf_rcu_read_lock();
|
||||
cgroup_id = task->cgroups->dfl_cgrp->kn->id;
|
||||
cgroups = task->cgroups;
|
||||
if (!cgroups)
|
||||
goto unlock;
|
||||
cgroup_id = cgroups->dfl_cgrp->kn->id;
|
||||
unlock:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -56,6 +61,8 @@ int task_succ(void *ctx)
|
||||
bpf_rcu_read_lock();
|
||||
/* region including helper using rcu ptr real_parent */
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!ptr)
|
||||
@@ -92,7 +99,10 @@ int two_regions(void *ctx)
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -105,7 +115,10 @@ int non_sleepable_1(void *ctx)
|
||||
task = bpf_get_current_task_btf();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -121,7 +134,10 @@ int non_sleepable_2(void *ctx)
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -129,16 +145,28 @@ int non_sleepable_2(void *ctx)
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
|
||||
int task_acquire(void *ctx)
|
||||
{
|
||||
struct task_struct *task, *real_parent;
|
||||
struct task_struct *task, *real_parent, *gparent;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
|
||||
/* rcu_ptr->rcu_field */
|
||||
gparent = real_parent->real_parent;
|
||||
if (!gparent)
|
||||
goto out;
|
||||
|
||||
/* acquire a reference which can be used outside rcu read lock region */
|
||||
real_parent = bpf_task_acquire(real_parent);
|
||||
gparent = bpf_task_acquire_not_zero(gparent);
|
||||
if (!gparent)
|
||||
goto out;
|
||||
|
||||
(void)bpf_task_storage_get(&map_a, gparent, 0, 0);
|
||||
bpf_task_release(gparent);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
bpf_task_release(real_parent);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -181,9 +209,12 @@ int non_sleepable_rcu_mismatch(void *ctx)
|
||||
/* non-sleepable: missing bpf_rcu_read_unlock() in one path */
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
if (real_parent)
|
||||
bpf_rcu_read_unlock();
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -199,16 +230,17 @@ int inproper_sleepable_helper(void *ctx)
|
||||
/* sleepable helper in rcu read lock region */
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
|
||||
if (!regs) {
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
if (!regs)
|
||||
goto out;
|
||||
|
||||
ptr = (void *)PT_REGS_IP(regs);
|
||||
(void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
|
||||
user_data = value;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -239,7 +271,10 @@ int nested_rcu_region(void *ctx)
|
||||
bpf_rcu_read_lock();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user