mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 04:04:43 +01:00
ipv6: Move ipv6_fl_list from ipv6_pinfo to inet_sock.
In {tcp6,udp6,raw6}_sock, struct ipv6_pinfo is always placed at
the beginning of a new cache line because
1. __alignof__(struct tcp_sock) is 64 due to ____cacheline_aligned
of __cacheline_group_begin(tcp_sock_write_tx)
2. __alignof__(struct udp_sock) is 64 due to ____cacheline_aligned
of struct numa_drop_counters
3. in raw6_sock, struct numa_drop_counters is placed before
struct ipv6_pinfo
. struct ipv6_pinfo is 136 bytes, but the last cache line is
only used by ipv6_fl_list:
$ pahole -C ipv6_pinfo vmlinux
struct ipv6_pinfo {
...
/* --- cacheline 2 boundary (128 bytes) --- */
struct ipv6_fl_socklist * ipv6_fl_list; /* 128 8 */
/* size: 136, cachelines: 3, members: 23 */
Let's move ipv6_fl_list from struct ipv6_pinfo to struct inet_sock
to save a full cache line for {tcp6,udp6,raw6}_sock.
Now, struct ipv6_pinfo is 128 bytes, and {tcp6,udp6,raw6}_sock have
64 bytes less, while {tcp,udp,raw}_sock retain the same size.
Before:
# grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}'
RAWv6 1408
UDPv6 1472
TCPv6 2560
RAW 1152
UDP 1280
TCP 2368
After:
# grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}'
RAWv6 1344
UDPv6 1408
TCPv6 2496
RAW 1152
UDP 1280
TCP 2368
Also, ipv6_fl_list and inet_flags (SNDFLOW bit) are placed in the
same cache line.
$ pahole -C inet_sock vmlinux
...
/* --- cacheline 11 boundary (704 bytes) was 56 bytes ago --- */
struct ipv6_pinfo * pinet6; /* 760 8 */
/* --- cacheline 12 boundary (768 bytes) --- */
struct ipv6_fl_socklist * ipv6_fl_list; /* 768 8 */
unsigned long inet_flags; /* 776 8 */
Doc churn is due to the insufficient Type column (only 1 space short).
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251014224210.2964778-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
0746da0176
commit
1c17f4373d
7 changed files with 76 additions and 74 deletions
|
|
@ -5,42 +5,43 @@
|
|||
inet_sock struct fast path usage breakdown
|
||||
==========================================
|
||||
|
||||
======================= ===================== =================== =================== ======================================================================================================
|
||||
Type Name fastpath_tx_access fastpath_rx_access comment
|
||||
======================= ===================== =================== =================== ======================================================================================================
|
||||
struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data
|
||||
struct ipv6_pinfo* pinet6
|
||||
be16 inet_sport read_mostly __tcp_transmit_skb
|
||||
be32 inet_daddr read_mostly ip_select_ident_segs
|
||||
be32 inet_rcv_saddr
|
||||
be16 inet_dport read_mostly __tcp_transmit_skb
|
||||
u16 inet_num
|
||||
be32 inet_saddr
|
||||
s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl
|
||||
u16 cmsg_flags
|
||||
struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit
|
||||
u16 inet_id read_mostly ip_select_ident_segs
|
||||
u8 tos read_mostly ip_queue_xmit
|
||||
u8 min_ttl
|
||||
u8 mc_ttl
|
||||
u8 pmtudisc
|
||||
u8:1 recverr
|
||||
u8:1 is_icsk
|
||||
u8:1 freebind
|
||||
u8:1 hdrincl
|
||||
u8:1 mc_loop
|
||||
u8:1 transparent
|
||||
u8:1 mc_all
|
||||
u8:1 nodefrag
|
||||
u8:1 bind_address_no_port
|
||||
u8:1 recverr_rfc4884
|
||||
u8:1 defer_connect read_mostly tcp_sendmsg_fastopen
|
||||
u8 rcv_tos
|
||||
u8 convert_csum
|
||||
int uc_index
|
||||
int mc_index
|
||||
be32 mc_addr
|
||||
struct ip_mc_socklist* mc_list
|
||||
struct inet_cork_full cork read_mostly __tcp_transmit_skb
|
||||
struct local_port_range
|
||||
======================= ===================== =================== =================== ======================================================================================================
|
||||
======================== ===================== =================== =================== ======================================================================================================
|
||||
Type Name fastpath_tx_access fastpath_rx_access comment
|
||||
======================== ===================== =================== =================== ======================================================================================================
|
||||
struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data
|
||||
struct ipv6_pinfo* pinet6
|
||||
struct ipv6_fl_socklist* ipv6_fl_list read_mostly tcp_v6_connect,__ip6_datagram_connect,udpv6_sendmsg,rawv6_sendmsg
|
||||
be16 inet_sport read_mostly __tcp_transmit_skb
|
||||
be32 inet_daddr read_mostly ip_select_ident_segs
|
||||
be32 inet_rcv_saddr
|
||||
be16 inet_dport read_mostly __tcp_transmit_skb
|
||||
u16 inet_num
|
||||
be32 inet_saddr
|
||||
s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl
|
||||
u16 cmsg_flags
|
||||
struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit
|
||||
u16 inet_id read_mostly ip_select_ident_segs
|
||||
u8 tos read_mostly ip_queue_xmit
|
||||
u8 min_ttl
|
||||
u8 mc_ttl
|
||||
u8 pmtudisc
|
||||
u8:1 recverr
|
||||
u8:1 is_icsk
|
||||
u8:1 freebind
|
||||
u8:1 hdrincl
|
||||
u8:1 mc_loop
|
||||
u8:1 transparent
|
||||
u8:1 mc_all
|
||||
u8:1 nodefrag
|
||||
u8:1 bind_address_no_port
|
||||
u8:1 recverr_rfc4884
|
||||
u8:1 defer_connect read_mostly tcp_sendmsg_fastopen
|
||||
u8 rcv_tos
|
||||
u8 convert_csum
|
||||
int uc_index
|
||||
int mc_index
|
||||
be32 mc_addr
|
||||
struct ip_mc_socklist* mc_list
|
||||
struct inet_cork_full cork read_mostly __tcp_transmit_skb
|
||||
struct local_port_range
|
||||
======================== ===================== =================== =================== ======================================================================================================
|
||||
|
|
|
|||
|
|
@ -1199,12 +1199,12 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
|
|||
struct ipv6_pinfo *newnp = inet6_sk(newsk);
|
||||
struct ipv6_pinfo *np = inet6_sk(lsk);
|
||||
|
||||
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
|
||||
newinet->pinet6 = &newtcp6sk->inet6;
|
||||
newinet->ipv6_fl_list = NULL;
|
||||
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
|
||||
newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
|
||||
newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
|
||||
inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
|
||||
newnp->ipv6_fl_list = NULL;
|
||||
newnp->pktoptions = NULL;
|
||||
newsk->sk_bound_dev_if = treq->ir_iif;
|
||||
newinet->inet_opt = NULL;
|
||||
|
|
|
|||
|
|
@ -271,7 +271,6 @@ struct ipv6_pinfo {
|
|||
|
||||
struct ipv6_mc_socklist __rcu *ipv6_mc_list;
|
||||
struct ipv6_ac_socklist *ipv6_ac_list;
|
||||
struct ipv6_fl_socklist __rcu *ipv6_fl_list;
|
||||
};
|
||||
|
||||
/* We currently use available bits from inet_sk(sk)->inet_flags,
|
||||
|
|
|
|||
|
|
@ -214,6 +214,7 @@ struct inet_sock {
|
|||
struct sock sk;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct ipv6_pinfo *pinet6;
|
||||
struct ipv6_fl_socklist __rcu *ipv6_fl_list;
|
||||
#endif
|
||||
/* Socket demultiplex comparisons on incoming packets. */
|
||||
#define inet_daddr sk.__sk_common.skc_daddr
|
||||
|
|
|
|||
|
|
@ -66,8 +66,8 @@ EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
|
|||
fl != NULL; \
|
||||
fl = rcu_dereference(fl->next))
|
||||
|
||||
#define for_each_sk_fl_rcu(np, sfl) \
|
||||
for (sfl = rcu_dereference(np->ipv6_fl_list); \
|
||||
#define for_each_sk_fl_rcu(sk, sfl) \
|
||||
for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \
|
||||
sfl != NULL; \
|
||||
sfl = rcu_dereference(sfl->next))
|
||||
|
||||
|
|
@ -262,12 +262,11 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
|
|||
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
|
||||
{
|
||||
struct ipv6_fl_socklist *sfl;
|
||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
|
||||
label &= IPV6_FLOWLABEL_MASK;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_sk_fl_rcu(np, sfl) {
|
||||
for_each_sk_fl_rcu(sk, sfl) {
|
||||
struct ip6_flowlabel *fl = sfl->fl;
|
||||
|
||||
if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
|
||||
|
|
@ -283,16 +282,16 @@ EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
|
|||
|
||||
void fl6_free_socklist(struct sock *sk)
|
||||
{
|
||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct ipv6_fl_socklist *sfl;
|
||||
|
||||
if (!rcu_access_pointer(np->ipv6_fl_list))
|
||||
if (!rcu_access_pointer(inet->ipv6_fl_list))
|
||||
return;
|
||||
|
||||
spin_lock_bh(&ip6_sk_fl_lock);
|
||||
while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
|
||||
while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list,
|
||||
lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
|
||||
np->ipv6_fl_list = sfl->next;
|
||||
inet->ipv6_fl_list = sfl->next;
|
||||
spin_unlock_bh(&ip6_sk_fl_lock);
|
||||
|
||||
fl_release(sfl->fl);
|
||||
|
|
@ -470,16 +469,15 @@ done:
|
|||
|
||||
static int mem_check(struct sock *sk)
|
||||
{
|
||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
struct ipv6_fl_socklist *sfl;
|
||||
int room = FL_MAX_SIZE - atomic_read(&fl_size);
|
||||
struct ipv6_fl_socklist *sfl;
|
||||
int count = 0;
|
||||
|
||||
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_sk_fl_rcu(np, sfl)
|
||||
for_each_sk_fl_rcu(sk, sfl)
|
||||
count++;
|
||||
rcu_read_unlock();
|
||||
|
||||
|
|
@ -492,13 +490,15 @@ static int mem_check(struct sock *sk)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
|
||||
struct ip6_flowlabel *fl)
|
||||
static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl,
|
||||
struct ip6_flowlabel *fl)
|
||||
{
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
|
||||
spin_lock_bh(&ip6_sk_fl_lock);
|
||||
sfl->fl = fl;
|
||||
sfl->next = np->ipv6_fl_list;
|
||||
rcu_assign_pointer(np->ipv6_fl_list, sfl);
|
||||
sfl->next = inet->ipv6_fl_list;
|
||||
rcu_assign_pointer(inet->ipv6_fl_list, sfl);
|
||||
spin_unlock_bh(&ip6_sk_fl_lock);
|
||||
}
|
||||
|
||||
|
|
@ -520,7 +520,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
|
|||
|
||||
rcu_read_lock();
|
||||
|
||||
for_each_sk_fl_rcu(np, sfl) {
|
||||
for_each_sk_fl_rcu(sk, sfl) {
|
||||
if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
|
||||
spin_lock_bh(&ip6_fl_lock);
|
||||
freq->flr_label = sfl->fl->label;
|
||||
|
|
@ -559,7 +559,7 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
|
|||
}
|
||||
|
||||
spin_lock_bh(&ip6_sk_fl_lock);
|
||||
for (sflp = &np->ipv6_fl_list;
|
||||
for (sflp = &inet_sk(sk)->ipv6_fl_list;
|
||||
(sfl = socklist_dereference(*sflp)) != NULL;
|
||||
sflp = &sfl->next) {
|
||||
if (sfl->fl->label == freq->flr_label)
|
||||
|
|
@ -579,13 +579,12 @@ found:
|
|||
|
||||
static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
|
||||
{
|
||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
struct net *net = sock_net(sk);
|
||||
struct ipv6_fl_socklist *sfl;
|
||||
int err;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_sk_fl_rcu(np, sfl) {
|
||||
for_each_sk_fl_rcu(sk, sfl) {
|
||||
if (sfl->fl->label == freq->flr_label) {
|
||||
err = fl6_renew(sfl->fl, freq->flr_linger,
|
||||
freq->flr_expires);
|
||||
|
|
@ -614,7 +613,6 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
|
|||
{
|
||||
struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
|
||||
struct ip6_flowlabel *fl, *fl1 = NULL;
|
||||
struct ipv6_pinfo *np = inet6_sk(sk);
|
||||
struct net *net = sock_net(sk);
|
||||
int err;
|
||||
|
||||
|
|
@ -645,7 +643,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
|
|||
if (freq->flr_label) {
|
||||
err = -EEXIST;
|
||||
rcu_read_lock();
|
||||
for_each_sk_fl_rcu(np, sfl) {
|
||||
for_each_sk_fl_rcu(sk, sfl) {
|
||||
if (sfl->fl->label == freq->flr_label) {
|
||||
if (freq->flr_flags & IPV6_FL_F_EXCL) {
|
||||
rcu_read_unlock();
|
||||
|
|
@ -682,7 +680,7 @@ recheck:
|
|||
fl1->linger = fl->linger;
|
||||
if ((long)(fl->expires - fl1->expires) > 0)
|
||||
fl1->expires = fl->expires;
|
||||
fl_link(np, sfl1, fl1);
|
||||
fl_link(sk, sfl1, fl1);
|
||||
fl_free(fl);
|
||||
return 0;
|
||||
|
||||
|
|
@ -716,7 +714,7 @@ release:
|
|||
}
|
||||
}
|
||||
|
||||
fl_link(np, sfl1, fl);
|
||||
fl_link(sk, sfl1, fl);
|
||||
return 0;
|
||||
done:
|
||||
fl_free(fl);
|
||||
|
|
|
|||
|
|
@ -1386,7 +1386,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||
if (!newsk)
|
||||
return NULL;
|
||||
|
||||
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
|
||||
newinet = inet_sk(newsk);
|
||||
newinet->pinet6 = tcp_inet6_sk(newsk);
|
||||
newinet->ipv6_fl_list = NULL;
|
||||
|
||||
newnp = tcp_inet6_sk(newsk);
|
||||
newtp = tcp_sk(newsk);
|
||||
|
|
@ -1405,7 +1407,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||
|
||||
newnp->ipv6_mc_list = NULL;
|
||||
newnp->ipv6_ac_list = NULL;
|
||||
newnp->ipv6_fl_list = NULL;
|
||||
newnp->pktoptions = NULL;
|
||||
newnp->opt = NULL;
|
||||
newnp->mcast_oif = inet_iif(skb);
|
||||
|
|
@ -1453,10 +1454,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||
newsk->sk_gso_type = SKB_GSO_TCPV6;
|
||||
inet6_sk_rx_dst_set(newsk, skb);
|
||||
|
||||
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
|
||||
newinet = inet_sk(newsk);
|
||||
newinet->pinet6 = tcp_inet6_sk(newsk);
|
||||
newinet->ipv6_fl_list = NULL;
|
||||
newinet->inet_opt = NULL;
|
||||
|
||||
newtp = tcp_sk(newsk);
|
||||
newinet = inet_sk(newsk);
|
||||
newnp = tcp_inet6_sk(newsk);
|
||||
|
||||
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
|
||||
|
|
@ -1469,10 +1472,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||
|
||||
First: no IPv4 options.
|
||||
*/
|
||||
newinet->inet_opt = NULL;
|
||||
newnp->ipv6_mc_list = NULL;
|
||||
newnp->ipv6_ac_list = NULL;
|
||||
newnp->ipv6_fl_list = NULL;
|
||||
|
||||
/* Clone RX bits */
|
||||
newnp->rxopt.all = np->rxopt.all;
|
||||
|
|
|
|||
|
|
@ -782,9 +782,10 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
|
|||
struct sctp_association *asoc,
|
||||
bool kern)
|
||||
{
|
||||
struct sock *newsk;
|
||||
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
|
||||
struct sctp6_sock *newsctp6sk;
|
||||
struct inet_sock *newinet;
|
||||
struct sock *newsk;
|
||||
|
||||
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
|
||||
if (!newsk)
|
||||
|
|
@ -796,7 +797,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
|
|||
sock_reset_flag(sk, SOCK_ZAPPED);
|
||||
|
||||
newsctp6sk = (struct sctp6_sock *)newsk;
|
||||
inet_sk(newsk)->pinet6 = &newsctp6sk->inet6;
|
||||
newinet = inet_sk(newsk);
|
||||
newinet->pinet6 = &newsctp6sk->inet6;
|
||||
newinet->ipv6_fl_list = NULL;
|
||||
|
||||
sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped;
|
||||
|
||||
|
|
@ -805,7 +808,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
|
|||
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
|
||||
newnp->ipv6_mc_list = NULL;
|
||||
newnp->ipv6_ac_list = NULL;
|
||||
newnp->ipv6_fl_list = NULL;
|
||||
|
||||
sctp_v6_copy_ip_options(sk, newsk);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue