Merge branch 'net-memcg-gather-memcg-code-under-config_memcg'
Kuniyuki Iwashima says: ==================== net-memcg: Gather memcg code under CONFIG_MEMCG. This series converts most sk->sk_memcg access to helper functions under CONFIG_MEMCG and finally defines sk_memcg under CONFIG_MEMCG. This is v5 of the series linked below but without core changes that decoupled memcg and global socket memory accounting. I will defer the changes to a follow-up series that will use BPF to store a flag in sk->sk_memcg. Overview of the series: patch 1 is a trivial fix for MPTCP patch 2 ~ 9 move sk->sk_memcg accesses to a single place patch 10 moves sk_memcg under CONFIG_MEMCG v4: https://lore.kernel.org/20250814200912.1040628-1-kuniyu@google.com v3: https://lore.kernel.org/20250812175848.512446-1-kuniyu@google.com v2: https://lore.kernel.org/20250811173116.2829786-1-kuniyu@google.com v1: https://lore.kernel.org/20250721203624.3807041-1-kuniyu@google.com ==================== Link: https://patch.msgid.link/20250815201712.1745332-1-kuniyu@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
+26
-19
@@ -1596,14 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
|
||||
#endif /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
struct sock;
|
||||
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
|
||||
gfp_t gfp_mask);
|
||||
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
|
||||
#ifdef CONFIG_MEMCG
|
||||
extern struct static_key_false memcg_sockets_enabled_key;
|
||||
#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
|
||||
|
||||
void mem_cgroup_sk_alloc(struct sock *sk);
|
||||
void mem_cgroup_sk_free(struct sock *sk);
|
||||
void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk);
|
||||
bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages,
|
||||
gfp_t gfp_mask);
|
||||
void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages);
|
||||
|
||||
#if BITS_PER_LONG < 64
|
||||
static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
|
||||
@@ -1640,32 +1642,37 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG_V1
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
|
||||
return !!memcg->tcpmem_pressure;
|
||||
#endif /* CONFIG_MEMCG_V1 */
|
||||
do {
|
||||
if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
|
||||
return true;
|
||||
} while ((memcg = parent_mem_cgroup(memcg)));
|
||||
return false;
|
||||
}
|
||||
|
||||
int alloc_shrinker_info(struct mem_cgroup *memcg);
|
||||
void free_shrinker_info(struct mem_cgroup *memcg);
|
||||
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
|
||||
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
|
||||
#else
|
||||
#define mem_cgroup_sockets_enabled 0
|
||||
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
|
||||
static inline void mem_cgroup_sk_free(struct sock *sk) { };
|
||||
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
|
||||
|
||||
static inline void mem_cgroup_sk_alloc(struct sock *sk)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_sk_free(struct sock *sk)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_sk_charge(const struct sock *sk,
|
||||
unsigned int nr_pages,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_sk_uncharge(const struct sock *sk,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void set_shrinker_bit(struct mem_cgroup *memcg,
|
||||
int nid, int shrinker_id)
|
||||
{
|
||||
|
||||
@@ -31,8 +31,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
|
||||
if (!sk->sk_prot->memory_pressure)
|
||||
return false;
|
||||
|
||||
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
|
||||
mem_cgroup_under_socket_pressure(sk->sk_memcg))
|
||||
if (mem_cgroup_sk_enabled(sk) &&
|
||||
mem_cgroup_sk_under_memory_pressure(sk))
|
||||
return true;
|
||||
|
||||
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
|
||||
|
||||
@@ -443,7 +443,9 @@ struct sock {
|
||||
__cacheline_group_begin(sock_read_rxtx);
|
||||
int sk_err;
|
||||
struct socket *sk_socket;
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct mem_cgroup *sk_memcg;
|
||||
#endif
|
||||
#ifdef CONFIG_XFRM
|
||||
struct xfrm_policy __rcu *sk_policy[2];
|
||||
#endif
|
||||
@@ -2594,6 +2596,50 @@ static inline gfp_t gfp_memcg_charge(void)
|
||||
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
|
||||
{
|
||||
return sk->sk_memcg;
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
|
||||
{
|
||||
return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
|
||||
|
||||
#ifdef CONFIG_MEMCG_V1
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
|
||||
return !!memcg->tcpmem_pressure;
|
||||
#endif /* CONFIG_MEMCG_V1 */
|
||||
|
||||
do {
|
||||
if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
|
||||
return true;
|
||||
} while ((memcg = parent_mem_cgroup(memcg)));
|
||||
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
|
||||
{
|
||||
return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo);
|
||||
|
||||
+2
-2
@@ -275,8 +275,8 @@ extern unsigned long tcp_memory_pressure;
|
||||
/* optimized version of sk_under_memory_pressure() for TCP sockets */
|
||||
static inline bool tcp_under_memory_pressure(const struct sock *sk)
|
||||
{
|
||||
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
|
||||
mem_cgroup_under_socket_pressure(sk->sk_memcg))
|
||||
if (mem_cgroup_sk_enabled(sk) &&
|
||||
mem_cgroup_sk_under_memory_pressure(sk))
|
||||
return true;
|
||||
|
||||
return READ_ONCE(tcp_memory_pressure);
|
||||
|
||||
+31
-9
@@ -5020,22 +5020,42 @@ out:
|
||||
|
||||
void mem_cgroup_sk_free(struct sock *sk)
|
||||
{
|
||||
if (sk->sk_memcg)
|
||||
css_put(&sk->sk_memcg->css);
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
|
||||
|
||||
if (memcg)
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
if (sk->sk_memcg == newsk->sk_memcg)
|
||||
return;
|
||||
|
||||
mem_cgroup_sk_free(newsk);
|
||||
|
||||
memcg = mem_cgroup_from_sk(sk);
|
||||
if (memcg)
|
||||
css_get(&memcg->css);
|
||||
|
||||
newsk->sk_memcg = sk->sk_memcg;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_charge_skmem - charge socket memory
|
||||
* @memcg: memcg to charge
|
||||
* mem_cgroup_sk_charge - charge socket memory
|
||||
* @sk: socket in memcg to charge
|
||||
* @nr_pages: number of pages to charge
|
||||
* @gfp_mask: reclaim mode
|
||||
*
|
||||
* Charges @nr_pages to @memcg. Returns %true if the charge fit within
|
||||
* @memcg's configured limit, %false if it doesn't.
|
||||
*/
|
||||
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
|
||||
gfp_t gfp_mask)
|
||||
bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
|
||||
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
|
||||
return memcg1_charge_skmem(memcg, nr_pages, gfp_mask);
|
||||
|
||||
@@ -5048,12 +5068,14 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_uncharge_skmem - uncharge socket memory
|
||||
* @memcg: memcg to uncharge
|
||||
* mem_cgroup_sk_uncharge - uncharge socket memory
|
||||
* @sk: socket in memcg to uncharge
|
||||
* @nr_pages: number of pages to uncharge
|
||||
*/
|
||||
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
|
||||
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
|
||||
memcg1_uncharge_skmem(memcg, nr_pages);
|
||||
return;
|
||||
|
||||
+20
-18
@@ -1032,7 +1032,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
|
||||
bool charged;
|
||||
int pages;
|
||||
|
||||
if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
|
||||
if (!mem_cgroup_sk_enabled(sk) || !sk_has_account(sk))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!bytes)
|
||||
@@ -1041,8 +1041,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
|
||||
pages = sk_mem_pages(bytes);
|
||||
|
||||
/* pre-charge to memcg */
|
||||
charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
|
||||
GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
||||
charged = mem_cgroup_sk_charge(sk, pages,
|
||||
GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
||||
if (!charged)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -1054,7 +1054,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
|
||||
*/
|
||||
if (allocated > sk_prot_mem_limits(sk, 1)) {
|
||||
sk_memory_allocated_sub(sk, pages);
|
||||
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
|
||||
mem_cgroup_sk_uncharge(sk, pages);
|
||||
return -ENOMEM;
|
||||
}
|
||||
sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
|
||||
@@ -2512,8 +2512,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
|
||||
|
||||
sock_reset_flag(newsk, SOCK_DONE);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/* sk->sk_memcg will be populated at accept() time */
|
||||
newsk->sk_memcg = NULL;
|
||||
#endif
|
||||
|
||||
cgroup_sk_clone(&newsk->sk_cgrp_data);
|
||||
|
||||
@@ -3263,16 +3265,16 @@ EXPORT_SYMBOL(sk_wait_data);
|
||||
*/
|
||||
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
|
||||
bool memcg_enabled = false, charged = false;
|
||||
struct proto *prot = sk->sk_prot;
|
||||
bool charged = true;
|
||||
long allocated;
|
||||
|
||||
sk_memory_allocated_add(sk, amt);
|
||||
allocated = sk_memory_allocated(sk);
|
||||
|
||||
if (memcg) {
|
||||
charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge());
|
||||
if (mem_cgroup_sk_enabled(sk)) {
|
||||
memcg_enabled = true;
|
||||
charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge());
|
||||
if (!charged)
|
||||
goto suppress_allocation;
|
||||
}
|
||||
@@ -3346,21 +3348,19 @@ suppress_allocation:
|
||||
*/
|
||||
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
|
||||
/* Force charge with __GFP_NOFAIL */
|
||||
if (memcg && !charged) {
|
||||
mem_cgroup_charge_skmem(memcg, amt,
|
||||
gfp_memcg_charge() | __GFP_NOFAIL);
|
||||
}
|
||||
if (memcg_enabled && !charged)
|
||||
mem_cgroup_sk_charge(sk, amt,
|
||||
gfp_memcg_charge() | __GFP_NOFAIL);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
|
||||
trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
|
||||
trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
|
||||
|
||||
sk_memory_allocated_sub(sk, amt);
|
||||
|
||||
if (memcg && charged)
|
||||
mem_cgroup_uncharge_skmem(memcg, amt);
|
||||
if (charged)
|
||||
mem_cgroup_sk_uncharge(sk, amt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3398,8 +3398,8 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
|
||||
{
|
||||
sk_memory_allocated_sub(sk, amount);
|
||||
|
||||
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
|
||||
mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
|
||||
if (mem_cgroup_sk_enabled(sk))
|
||||
mem_cgroup_sk_uncharge(sk, amount);
|
||||
|
||||
if (sk_under_global_memory_pressure(sk) &&
|
||||
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
|
||||
@@ -4454,7 +4454,9 @@ static int __init sock_struct_check(void)
|
||||
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
|
||||
#ifdef CONFIG_MEMCG
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
|
||||
#endif
|
||||
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
|
||||
|
||||
@@ -706,9 +706,9 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
|
||||
spin_unlock_bh(&queue->fastopenq.lock);
|
||||
}
|
||||
|
||||
out:
|
||||
release_sock(sk);
|
||||
if (newsk && mem_cgroup_sockets_enabled) {
|
||||
|
||||
if (mem_cgroup_sockets_enabled) {
|
||||
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
|
||||
int amt = 0;
|
||||
|
||||
@@ -718,7 +718,7 @@ out:
|
||||
lock_sock(newsk);
|
||||
|
||||
mem_cgroup_sk_alloc(newsk);
|
||||
if (newsk->sk_memcg) {
|
||||
if (mem_cgroup_from_sk(newsk)) {
|
||||
/* The socket has not been accepted yet, no need
|
||||
* to look at newsk->sk_wmem_queued.
|
||||
*/
|
||||
@@ -727,23 +727,22 @@ out:
|
||||
}
|
||||
|
||||
if (amt)
|
||||
mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
|
||||
mem_cgroup_sk_charge(newsk, amt, gfp);
|
||||
kmem_cache_charge(newsk, gfp);
|
||||
|
||||
release_sock(newsk);
|
||||
}
|
||||
|
||||
if (req)
|
||||
reqsk_put(req);
|
||||
|
||||
if (newsk)
|
||||
inet_init_csk_locks(newsk);
|
||||
|
||||
inet_init_csk_locks(newsk);
|
||||
return newsk;
|
||||
|
||||
out_err:
|
||||
newsk = NULL;
|
||||
req = NULL;
|
||||
release_sock(sk);
|
||||
arg->err = error;
|
||||
goto out;
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(inet_csk_accept);
|
||||
|
||||
|
||||
@@ -3578,9 +3578,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
|
||||
sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
|
||||
sk_memory_allocated_add(sk, amt);
|
||||
|
||||
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
|
||||
mem_cgroup_charge_skmem(sk->sk_memcg, amt,
|
||||
gfp_memcg_charge() | __GFP_NOFAIL);
|
||||
if (mem_cgroup_sk_enabled(sk))
|
||||
mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL);
|
||||
}
|
||||
|
||||
/* Send a FIN. The caller locks the socket for us.
|
||||
|
||||
@@ -788,9 +788,7 @@ static inline bool mptcp_epollin_ready(const struct sock *sk)
|
||||
* as it can always coalesce them
|
||||
*/
|
||||
return (data_avail >= sk->sk_rcvlowat) ||
|
||||
(mem_cgroup_sockets_enabled && sk->sk_memcg &&
|
||||
mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
|
||||
READ_ONCE(tcp_memory_pressure);
|
||||
tcp_under_memory_pressure(sk);
|
||||
}
|
||||
|
||||
int mptcp_set_rcvlowat(struct sock *sk, int val);
|
||||
|
||||
+3
-8
@@ -1717,19 +1717,14 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
|
||||
/* only the additional subflows created by kworkers have to be modified */
|
||||
if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
|
||||
cgroup_id(sock_cgroup_ptr(child_skcd))) {
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct mem_cgroup *memcg = parent->sk_memcg;
|
||||
|
||||
mem_cgroup_sk_free(child);
|
||||
if (memcg && css_tryget(&memcg->css))
|
||||
child->sk_memcg = memcg;
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
cgroup_sk_free(child_skcd);
|
||||
*child_skcd = *parent_skcd;
|
||||
cgroup_sk_clone(child_skcd);
|
||||
}
|
||||
#endif /* CONFIG_SOCK_CGROUP_DATA */
|
||||
|
||||
if (mem_cgroup_sockets_enabled)
|
||||
mem_cgroup_sk_inherit(parent, child);
|
||||
}
|
||||
|
||||
static void mptcp_subflow_ops_override(struct sock *ssk)
|
||||
|
||||
Reference in New Issue
Block a user