Merge branch 'af_unix-gc-cleanup-and-optimisation'
Kuniyuki Iwashima says: ==================== af_unix: GC cleanup and optimisation. Currently, AF_UNIX GC is triggered from close() and sendmsg() based on the number of inflight AF_UNIX sockets. This is because the old GC implementation had no idea of the shape of the graph formed by SCM_RIGHTS references. The new GC knows whether cyclic references (could) exist. This series refines such conditions not to trigger GC unless really needed. ==================== Link: https://patch.msgid.link/20251115020935.2643121-1-kuniyu@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
+1
-6
@@ -733,8 +733,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
|
||||
|
||||
/* ---- Socket is dead now and most probably destroyed ---- */
|
||||
|
||||
if (READ_ONCE(unix_tot_inflight))
|
||||
unix_gc(); /* Garbage collect fds */
|
||||
unix_schedule_gc(NULL);
|
||||
}
|
||||
|
||||
struct unix_peercred {
|
||||
@@ -2099,8 +2098,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
wait_for_unix_gc(scm.fp);
|
||||
|
||||
if (msg->msg_flags & MSG_OOB) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto out;
|
||||
@@ -2394,8 +2391,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
wait_for_unix_gc(scm.fp);
|
||||
|
||||
if (msg->msg_flags & MSG_OOB) {
|
||||
err = -EOPNOTSUPP;
|
||||
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
|
||||
|
||||
+1
-3
@@ -24,14 +24,12 @@ struct unix_skb_parms {
|
||||
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
|
||||
|
||||
/* GC for SCM_RIGHTS */
|
||||
extern unsigned int unix_tot_inflight;
|
||||
void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
|
||||
void unix_del_edges(struct scm_fp_list *fpl);
|
||||
void unix_update_edges(struct unix_sock *receiver);
|
||||
int unix_prepare_fpl(struct scm_fp_list *fpl);
|
||||
void unix_destroy_fpl(struct scm_fp_list *fpl);
|
||||
void unix_gc(void);
|
||||
void wait_for_unix_gc(struct scm_fp_list *fpl);
|
||||
void unix_schedule_gc(struct user_struct *user);
|
||||
|
||||
/* SOCK_DIAG */
|
||||
long unix_inq_len(struct sock *sk);
|
||||
|
||||
+50
-44
@@ -121,8 +121,13 @@ static struct unix_vertex *unix_edge_successor(struct unix_edge *edge)
|
||||
return edge->successor->vertex;
|
||||
}
|
||||
|
||||
static bool unix_graph_maybe_cyclic;
|
||||
static bool unix_graph_grouped;
|
||||
enum {
|
||||
UNIX_GRAPH_NOT_CYCLIC,
|
||||
UNIX_GRAPH_MAYBE_CYCLIC,
|
||||
UNIX_GRAPH_CYCLIC,
|
||||
};
|
||||
|
||||
static unsigned char unix_graph_state;
|
||||
|
||||
static void unix_update_graph(struct unix_vertex *vertex)
|
||||
{
|
||||
@@ -132,8 +137,7 @@ static void unix_update_graph(struct unix_vertex *vertex)
|
||||
if (!vertex)
|
||||
return;
|
||||
|
||||
unix_graph_maybe_cyclic = true;
|
||||
unix_graph_grouped = false;
|
||||
WRITE_ONCE(unix_graph_state, UNIX_GRAPH_MAYBE_CYCLIC);
|
||||
}
|
||||
|
||||
static LIST_HEAD(unix_unvisited_vertices);
|
||||
@@ -196,7 +200,6 @@ static void unix_free_vertices(struct scm_fp_list *fpl)
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(unix_gc_lock);
|
||||
unsigned int unix_tot_inflight;
|
||||
|
||||
void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
|
||||
{
|
||||
@@ -222,7 +225,6 @@ void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
|
||||
} while (i < fpl->count_unix);
|
||||
|
||||
receiver->scm_stat.nr_unix_fds += fpl->count_unix;
|
||||
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
|
||||
out:
|
||||
WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
|
||||
|
||||
@@ -253,7 +255,6 @@ void unix_del_edges(struct scm_fp_list *fpl)
|
||||
receiver = fpl->edges[0].successor;
|
||||
receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
|
||||
}
|
||||
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
|
||||
out:
|
||||
WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
|
||||
|
||||
@@ -299,6 +300,8 @@ int unix_prepare_fpl(struct scm_fp_list *fpl)
|
||||
if (!fpl->edges)
|
||||
goto err;
|
||||
|
||||
unix_schedule_gc(fpl->user);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@@ -404,9 +407,11 @@ static bool unix_scc_cyclic(struct list_head *scc)
|
||||
static LIST_HEAD(unix_visited_vertices);
|
||||
static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
|
||||
|
||||
static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
|
||||
struct sk_buff_head *hitlist)
|
||||
static unsigned long __unix_walk_scc(struct unix_vertex *vertex,
|
||||
unsigned long *last_index,
|
||||
struct sk_buff_head *hitlist)
|
||||
{
|
||||
unsigned long cyclic_sccs = 0;
|
||||
LIST_HEAD(vertex_stack);
|
||||
struct unix_edge *edge;
|
||||
LIST_HEAD(edge_stack);
|
||||
@@ -497,8 +502,8 @@ prev_vertex:
|
||||
if (unix_vertex_max_scc_index < vertex->scc_index)
|
||||
unix_vertex_max_scc_index = vertex->scc_index;
|
||||
|
||||
if (!unix_graph_maybe_cyclic)
|
||||
unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
|
||||
if (unix_scc_cyclic(&scc))
|
||||
cyclic_sccs++;
|
||||
}
|
||||
|
||||
list_del(&scc);
|
||||
@@ -507,13 +512,17 @@ prev_vertex:
|
||||
/* Need backtracking ? */
|
||||
if (!list_empty(&edge_stack))
|
||||
goto prev_vertex;
|
||||
|
||||
return cyclic_sccs;
|
||||
}
|
||||
|
||||
static unsigned long unix_graph_cyclic_sccs;
|
||||
|
||||
static void unix_walk_scc(struct sk_buff_head *hitlist)
|
||||
{
|
||||
unsigned long last_index = UNIX_VERTEX_INDEX_START;
|
||||
unsigned long cyclic_sccs = 0;
|
||||
|
||||
unix_graph_maybe_cyclic = false;
|
||||
unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START;
|
||||
|
||||
/* Visit every vertex exactly once.
|
||||
@@ -523,18 +532,20 @@ static void unix_walk_scc(struct sk_buff_head *hitlist)
|
||||
struct unix_vertex *vertex;
|
||||
|
||||
vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
|
||||
__unix_walk_scc(vertex, &last_index, hitlist);
|
||||
cyclic_sccs += __unix_walk_scc(vertex, &last_index, hitlist);
|
||||
}
|
||||
|
||||
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
|
||||
swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
|
||||
|
||||
unix_graph_grouped = true;
|
||||
WRITE_ONCE(unix_graph_cyclic_sccs, cyclic_sccs);
|
||||
WRITE_ONCE(unix_graph_state,
|
||||
cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC);
|
||||
}
|
||||
|
||||
static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
|
||||
{
|
||||
unix_graph_maybe_cyclic = false;
|
||||
unsigned long cyclic_sccs = unix_graph_cyclic_sccs;
|
||||
|
||||
while (!list_empty(&unix_unvisited_vertices)) {
|
||||
struct unix_vertex *vertex;
|
||||
@@ -551,34 +562,38 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
|
||||
scc_dead = unix_vertex_dead(vertex);
|
||||
}
|
||||
|
||||
if (scc_dead)
|
||||
if (scc_dead) {
|
||||
cyclic_sccs--;
|
||||
unix_collect_skb(&scc, hitlist);
|
||||
else if (!unix_graph_maybe_cyclic)
|
||||
unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
|
||||
}
|
||||
|
||||
list_del(&scc);
|
||||
}
|
||||
|
||||
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
|
||||
|
||||
WRITE_ONCE(unix_graph_cyclic_sccs, cyclic_sccs);
|
||||
WRITE_ONCE(unix_graph_state,
|
||||
cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC);
|
||||
}
|
||||
|
||||
static bool gc_in_progress;
|
||||
|
||||
static void __unix_gc(struct work_struct *work)
|
||||
static void unix_gc(struct work_struct *work)
|
||||
{
|
||||
struct sk_buff_head hitlist;
|
||||
struct sk_buff *skb;
|
||||
|
||||
spin_lock(&unix_gc_lock);
|
||||
|
||||
if (!unix_graph_maybe_cyclic) {
|
||||
if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) {
|
||||
spin_unlock(&unix_gc_lock);
|
||||
goto skip_gc;
|
||||
}
|
||||
|
||||
__skb_queue_head_init(&hitlist);
|
||||
|
||||
if (unix_graph_grouped)
|
||||
if (unix_graph_state == UNIX_GRAPH_CYCLIC)
|
||||
unix_walk_scc_fast(&hitlist);
|
||||
else
|
||||
unix_walk_scc(&hitlist);
|
||||
@@ -595,36 +610,27 @@ skip_gc:
|
||||
WRITE_ONCE(gc_in_progress, false);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(unix_gc_work, __unix_gc);
|
||||
static DECLARE_WORK(unix_gc_work, unix_gc);
|
||||
|
||||
void unix_gc(void)
|
||||
#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
|
||||
|
||||
void unix_schedule_gc(struct user_struct *user)
|
||||
{
|
||||
WRITE_ONCE(gc_in_progress, true);
|
||||
queue_work(system_dfl_wq, &unix_gc_work);
|
||||
}
|
||||
|
||||
#define UNIX_INFLIGHT_TRIGGER_GC 16000
|
||||
#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
|
||||
|
||||
void wait_for_unix_gc(struct scm_fp_list *fpl)
|
||||
{
|
||||
/* If number of inflight sockets is insane,
|
||||
* force a garbage collect right now.
|
||||
*
|
||||
* Paired with the WRITE_ONCE() in unix_inflight(),
|
||||
* unix_notinflight(), and __unix_gc().
|
||||
*/
|
||||
if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
|
||||
!READ_ONCE(gc_in_progress))
|
||||
unix_gc();
|
||||
if (READ_ONCE(unix_graph_state) == UNIX_GRAPH_NOT_CYCLIC)
|
||||
return;
|
||||
|
||||
/* Penalise users who want to send AF_UNIX sockets
|
||||
* but whose sockets have not been received yet.
|
||||
*/
|
||||
if (!fpl || !fpl->count_unix ||
|
||||
READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
|
||||
if (user &&
|
||||
READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
|
||||
return;
|
||||
|
||||
if (READ_ONCE(gc_in_progress))
|
||||
if (!READ_ONCE(gc_in_progress)) {
|
||||
WRITE_ONCE(gc_in_progress, true);
|
||||
queue_work(system_dfl_wq, &unix_gc_work);
|
||||
}
|
||||
|
||||
if (user && READ_ONCE(unix_graph_cyclic_sccs))
|
||||
flush_work(&unix_gc_work);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user