Merge branch 'tcp-move-few-fields-for-data-locality'

Eric Dumazet says:

====================
tcp: move few fields for data locality

After recent additions (PSP and AccECN) I wanted to make another
round on fields locations to increase data locality.

This series manages to shrink TCP and TCPv6 objects by 128 bytes,
but more importantly should reduce number of touched cache lines
in TCP fast paths.

There is more to come.

v2: removed tcp CACHELINE_ASSERT_GROUP_SIZE after a kernel build bot
reported an error.
====================

Link: https://patch.msgid.link/20250919204856.2977245-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-22 17:55:28 -07:00
commit 3afb106f3f
7 changed files with 31 additions and 39 deletions

View file

@ -26,8 +26,8 @@ u64 bytes_acked read_w
u32 dsack_dups
u32 snd_una read_mostly read_write tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx)
u32 snd_sml read_write tcp_minshall_check,tcp_minshall_update
u32 rcv_tstamp read_mostly tcp_ack
void * tcp_clean_acked read_mostly tcp_ack
u32 rcv_tstamp read_write read_write tcp_ack
void * tcp_clean_acked read_mostly tcp_ack
u32 lsndtime read_write tcp_slow_start_after_idle_check,tcp_event_data_sent
u32 last_oow_ack_time
u32 compressed_ack_rcv_nxt
@ -57,7 +57,7 @@ u8:1 is_sack_reneg read_m
u8:2 fastopen_client_fail
u8:4 nonagle read_write tcp_skb_entail,tcp_push_pending_frames
u8:1 thin_lto
u8:1 recvmsg_inq
u8:1 recvmsg_inq read_mostly tcp_recvmsg
u8:1 repair read_mostly tcp_write_xmit
u8:1 frto
u8 repair_queue

View file

@ -215,6 +215,9 @@ struct tcp_sock {
u16 gso_segs; /* Max number of segs per GSO packet */
/* from STCP, retrans queue hinting */
struct sk_buff *retransmit_skb_hint;
#if defined(CONFIG_TLS_DEVICE)
void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
#endif
__cacheline_group_end(tcp_sock_read_tx);
/* TXRX read-mostly hotpath cache lines */
@ -232,13 +235,13 @@ struct tcp_sock {
repair : 1,
tcp_usec_ts : 1, /* TSval values in usec */
is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
__cacheline_group_end(tcp_sock_read_txrx);
/* RX read-mostly hotpath cache lines */
__cacheline_group_begin(tcp_sock_read_rx);
u32 copied_seq; /* Head of yet unread data */
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
u32 snd_wl1; /* Sequence for window update */
u32 tlp_high_seq; /* snd_nxt at the time of TLP */
u32 rttvar_us; /* smoothed mdev_max */
@ -246,14 +249,10 @@ struct tcp_sock {
u16 advmss; /* Advertised MSS */
u16 urg_data; /* Saved octet of OOB data and control flags */
u32 lost; /* Total data packets lost incl. rexmits */
u32 snd_ssthresh; /* Slow start size threshold */
struct minmax rtt_min;
/* OOO segments go in this rbtree. Socket lock must be held. */
struct rb_root out_of_order_queue;
#if defined(CONFIG_TLS_DEVICE)
void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
#endif
u32 snd_ssthresh; /* Slow start size threshold */
u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
__cacheline_group_end(tcp_sock_read_rx);
/* TX read-write hotpath cache lines */
@ -319,6 +318,7 @@ struct tcp_sock {
*/
u32 app_limited; /* limited until "delivered" reaches this val */
u32 rcv_wnd; /* Current receiver window */
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
/*
* Options received (usually on last packet, some only on SYN packets).
*/
@ -448,6 +448,9 @@ struct tcp_sock {
* the first SYN. */
u32 undo_marker; /* snd_una upon a new recovery episode. */
int undo_retrans; /* number of undoable retransmissions. */
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user.
*/
u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans
* Total data bytes retransmitted
*/
@ -494,9 +497,6 @@ struct tcp_sock {
u32 probe_seq_end;
} mtu_probe;
u32 plb_rehash; /* PLB-triggered rehash attempts */
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user.
*/
#if IS_ENABLED(CONFIG_MPTCP)
bool is_mptcp;
#endif

View file

@ -185,8 +185,8 @@ struct fastopen_queue {
struct request_sock_queue {
spinlock_t rskq_lock;
u8 rskq_defer_accept;
u8 synflood_warned;
u32 synflood_warned;
atomic_t qlen;
atomic_t young;

View file

@ -467,7 +467,7 @@ struct sock {
__cacheline_group_begin(sock_write_tx);
int sk_write_pending;
atomic_t sk_omem_alloc;
int sk_sndbuf;
int sk_err_soft;
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
@ -492,6 +492,9 @@ struct sock {
long sk_sndtimeo;
u32 sk_priority;
u32 sk_mark;
kuid_t sk_uid;
u16 sk_protocol;
u16 sk_type;
struct dst_entry __rcu *sk_dst_cache;
netdev_features_t sk_route_caps;
#ifdef CONFIG_SOCK_VALIDATE_XMIT
@ -504,6 +507,7 @@ struct sock {
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
u32 sk_txhash;
int sk_sndbuf;
u8 sk_pacing_shift;
bool sk_use_task_frag;
__cacheline_group_end(sock_read_tx);
@ -517,15 +521,11 @@ struct sock {
sk_no_check_tx : 1,
sk_no_check_rx : 1;
u8 sk_shutdown;
u16 sk_type;
u16 sk_protocol;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
int sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
unsigned long sk_ino;
spinlock_t sk_peer_lock;
int sk_bind_phc;

View file

@ -4452,7 +4452,7 @@ static int __init sock_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_err_soft);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
@ -4471,12 +4471,15 @@ static int __init sock_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_uid);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_protocol);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndbuf);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);

View file

@ -5101,7 +5101,9 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32);
#if IS_ENABLED(CONFIG_TLS_DEVICE)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, tcp_clean_acked);
#endif
/* TXRX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
@ -5112,11 +5114,9 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
/* RX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
@ -5127,12 +5127,6 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
#if IS_ENABLED(CONFIG_TLS_DEVICE)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked);
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77);
#else
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
#endif
/* TX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
@ -5151,7 +5145,6 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 97);
/* TXRX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
@ -5170,13 +5163,9 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
/* 32bit arches with 8byte alignment on u64 fields might need padding
* before tcp_clock_cache.
*/
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
@ -5193,7 +5182,6 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 112);
}
void __init tcp_init(void)

View file

@ -4085,7 +4085,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
WRITE_ONCE(sk->sk_err_soft, 0);
if (READ_ONCE(sk->sk_err_soft))
WRITE_ONCE(sk->sk_err_soft, 0);
WRITE_ONCE(icsk->icsk_probes_out, 0);
tp->rcv_tstamp = tcp_jiffies32;
if (!prior_packets)
@ -7281,8 +7282,8 @@ static bool tcp_syn_flood_action(struct sock *sk, const char *proto)
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
if (!READ_ONCE(queue->synflood_warned) && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0) {
if (syncookies != 2 && !READ_ONCE(queue->synflood_warned)) {
WRITE_ONCE(queue->synflood_warned, 1);
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) {
net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n",
proto, inet6_rcv_saddr(sk),