From 90c36325c796cc7111329607a649c34f4979c78e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:06 +0800 Subject: [PATCH 1/7] tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_CLOSE for active reset Introducing a new type TCP_ABORT_ON_CLOSE for tcp reset reason to handle the case where more data is unread in closing phase. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ net/ipv4/tcp.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 2575c85d7f7a..fa6bfd0d7d69 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -17,6 +17,7 @@ FN(TCP_ABORT_ON_DATA) \ FN(TCP_TIMEWAIT_SOCKET) \ FN(INVALID_SYN) \ + FN(TCP_ABORT_ON_CLOSE) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -84,6 +85,11 @@ enum sk_rst_reason { * an error, send a reset" */ SK_RST_REASON_INVALID_SYN, + /** + * @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close + * corresponding to LINUX_MIB_TCPABORTONCLOSE + */ + SK_RST_REASON_TCP_ABORT_ON_CLOSE, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e03a342c9162..2e010add0317 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2833,7 +2833,7 @@ void __tcp_close(struct sock *sk, long timeout) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_CLOSE); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); From edc92b48abc5b21c98eca5d05b98a560d7df2e4d Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:07 +0800 Subject: [PATCH 2/7] tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_LINGER for active reset Introducing a new type TCP_ABORT_ON_LINGER for tcp reset reason to handle negative linger value case. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ net/ipv4/tcp.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index fa6bfd0d7d69..fbbaeb969e6a 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -18,6 +18,7 @@ FN(TCP_TIMEWAIT_SOCKET) \ FN(INVALID_SYN) \ FN(TCP_ABORT_ON_CLOSE) \ + FN(TCP_ABORT_ON_LINGER) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -90,6 +91,11 @@ enum sk_rst_reason { * corresponding to LINUX_MIB_TCPABORTONCLOSE */ SK_RST_REASON_TCP_ABORT_ON_CLOSE, + /** + * @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger + * corresponding to LINUX_MIB_TCPABORTONLINGER + */ + SK_RST_REASON_TCP_ABORT_ON_LINGER, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2e010add0317..5b0f1d1fc697 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2908,7 +2908,7 @@ adjudge_to_death: if (READ_ONCE(tp->linger2) < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_LINGER); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { From 8407994f0c3594eb3854e3799af86224f4a8e6e6 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:08 +0800 Subject: [PATCH 3/7] tcp: rstreason: introduce SK_RST_REASON_TCP_ABORT_ON_MEMORY for active reset Introducing a new type TCP_ABORT_ON_MEMORY for tcp reset reason to handle out of memory case. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ net/ipv4/tcp.c | 2 +- net/ipv4/tcp_timer.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index fbbaeb969e6a..eef658da8952 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -19,6 +19,7 @@ FN(INVALID_SYN) \ FN(TCP_ABORT_ON_CLOSE) \ FN(TCP_ABORT_ON_LINGER) \ + FN(TCP_ABORT_ON_MEMORY) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -96,6 +97,11 @@ enum sk_rst_reason { * corresponding to LINUX_MIB_TCPABORTONLINGER */ SK_RST_REASON_TCP_ABORT_ON_LINGER, + /** + * @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory + * corresponding to LINUX_MIB_TCPABORTONMEMORY + */ + SK_RST_REASON_TCP_ABORT_ON_MEMORY, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5b0f1d1fc697..fd928c447ce8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2927,7 +2927,7 @@ adjudge_to_death: if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_MEMORY); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4d40615dc8fc..0fba4a4fb988 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) do_reset = true; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_MEMORY); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; From edefba66d929eb2d023df93a0a8175a4ffe82684 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:09 +0800 Subject: [PATCH 4/7] tcp: rstreason: introduce SK_RST_REASON_TCP_STATE for active reset Introducing a new type TCP_STATE to handle some reset conditions appearing in RFC 793 due to its socket state. Actually, we can look into RFC 9293 which has no discrepancy about this part. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 6 ++++++ net/ipv4/tcp.c | 10 ++++++---- net/ipv4/tcp_timer.c | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index eef658da8952..bbf20d0bbde7 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -20,6 +20,7 @@ FN(TCP_ABORT_ON_CLOSE) \ FN(TCP_ABORT_ON_LINGER) \ FN(TCP_ABORT_ON_MEMORY) \ + FN(TCP_STATE) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -102,6 +103,11 @@ enum sk_rst_reason { * corresponding to LINUX_MIB_TCPABORTONMEMORY */ SK_RST_REASON_TCP_ABORT_ON_MEMORY, + /** + * @SK_RST_REASON_TCP_STATE: abort on tcp state + * Please see RFC 9293 for all possible reset conditions + */ + SK_RST_REASON_TCP_STATE, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fd928c447ce8..24777e48bcc8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3025,9 +3025,11 @@ int tcp_disconnect(struct sock *sk, int flags) inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { WRITE_ONCE(sk->sk_err, ECONNABORTED); - } else if (tcp_need_reset(old_state) || - (tp->snd_nxt != tp->write_seq && - (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { + } else if (tcp_need_reset(old_state)) { + tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_TCP_STATE); + WRITE_ONCE(sk->sk_err, ECONNRESET); + } else if (tp->snd_nxt != tp->write_seq && + (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ @@ -4649,7 +4651,7 @@ int tcp_abort(struct sock *sk, int err) if (!sock_flag(sk, SOCK_DEAD)) { if (tcp_need_reset(sk->sk_state)) tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_STATE); tcp_done_with_error(sk, err); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0fba4a4fb988..3910f6d8614e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -779,7 +779,7 @@ static void tcp_keepalive_timer (struct timer_list *t) goto out; } } - tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); + tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE); goto death; } From 0a399892a596055c8f069a17b4775fe5ab66d32a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:10 +0800 Subject: [PATCH 5/7] tcp: rstreason: introduce SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT for active reset Introducing this to show the users the reason of keepalive timeout. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 7 +++++++ net/ipv4/tcp_timer.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index bbf20d0bbde7..9c0c46df0e73 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -21,6 +21,7 @@ FN(TCP_ABORT_ON_LINGER) \ FN(TCP_ABORT_ON_MEMORY) \ FN(TCP_STATE) \ + FN(TCP_KEEPALIVE_TIMEOUT) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -108,6 +109,12 @@ enum sk_rst_reason { * Please see RFC 9293 for all possible reset conditions */ SK_RST_REASON_TCP_STATE, + /** + * @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout + * When we have already run out of all the chances, which means + * keepalive timeout, we have to reset the connection + */ + SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3910f6d8614e..86169127e4d1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -807,7 +807,7 @@ static void tcp_keepalive_timer (struct timer_list *t) (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT); tcp_write_err(sk); goto out; } From c026c6562f86b24dd2dfef501fb1e64cc3884a79 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:11 +0800 Subject: [PATCH 6/7] tcp: rstreason: introduce SK_RST_REASON_TCP_DISCONNECT_WITH_DATA for active reset When user tries to disconnect a socket and there are more data written into tcp write queue, we should tell users about this reset reason. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/rstreason.h | 8 ++++++++ net/ipv4/tcp.c | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 9c0c46df0e73..69cb2e52b7da 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -22,6 +22,7 @@ FN(TCP_ABORT_ON_MEMORY) \ FN(TCP_STATE) \ FN(TCP_KEEPALIVE_TIMEOUT) \ + FN(TCP_DISCONNECT_WITH_DATA) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -115,6 +116,13 @@ enum sk_rst_reason { * keepalive timeout, we have to reset the connection */ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT, + /** + * @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write + * queue is not empty + * It means user has written data into the write queue when doing + * disconnecting, so we have to send an RST. + */ + SK_RST_REASON_TCP_DISCONNECT_WITH_DATA, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 24777e48bcc8..8514257f4ecd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3033,7 +3033,8 @@ int tcp_disconnect(struct sock *sk, int flags) /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ - tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED); + tcp_send_active_reset(sk, gfp_any(), + SK_RST_REASON_TCP_DISCONNECT_WITH_DATA); WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) WRITE_ONCE(sk->sk_err, ECONNRESET); From ba0ca286c919508ac32d036509b082b3968c0bb2 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 2 Aug 2024 18:21:12 +0800 Subject: [PATCH 7/7] tcp: rstreason: let it work finally in tcp_send_active_reset() Now it's time to let it work by using the 'reason' parameter in the trace world :) Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 16c48df8df4c..cdd0def14427 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3649,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority, /* skb of trace_tcp_send_reset() keeps the skb that caused RST, * skb here is different to the troublesome skb, so use NULL */ - trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED); + trace_tcp_send_reset(sk, NULL, reason); } /* Send a crossed SYN-ACK during socket establishment.