net: use NUMA drop counters for softnet_data.dropped

Hosts under DOS attack can suffer from false sharing
in enqueue_to_backlog() : atomic_inc(&sd->dropped).

This is because sd->dropped can be touched from many cpus,
possibly residing on different NUMA nodes.

Generalize the sk_drop_counters infrastucture
added in commit c51613fa27 ("net: add sk->sk_drop_counters")
and use it to replace softnet_data.dropped
with NUMA friendly softnet_data.drop_counters.

This adds 64 bytes per cpu, maybe more in the future
if we increase the number of counters (currently 2)
per 'struct numa_drop_counters'.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2025-09-09 12:19:42 +00:00 committed by Jakub Kicinski
parent 278289bcec
commit fdae0ab67d
7 changed files with 45 additions and 31 deletions

View file

@ -295,7 +295,7 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
struct socket_drop_counters drop_counters;
struct numa_drop_counters drop_counters;
struct ipv6_pinfo inet6;
};

View file

@ -3459,6 +3459,32 @@ static inline bool dev_has_header(const struct net_device *dev)
return dev->header_ops && dev->header_ops->create;
}
struct numa_drop_counters {
atomic_t drops0 ____cacheline_aligned_in_smp;
atomic_t drops1 ____cacheline_aligned_in_smp;
};
static inline int numa_drop_read(const struct numa_drop_counters *ndc)
{
return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1);
}
static inline void numa_drop_add(struct numa_drop_counters *ndc, int val)
{
int n = numa_node_id() % 2;
if (n)
atomic_add(val, &ndc->drops1);
else
atomic_add(val, &ndc->drops0);
}
static inline void numa_drop_reset(struct numa_drop_counters *ndc)
{
atomic_set(&ndc->drops0, 0);
atomic_set(&ndc->drops1, 0);
}
/*
* Incoming packets are placed on per-CPU queues
*/
@ -3504,7 +3530,7 @@ struct softnet_data {
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
atomic_t dropped ____cacheline_aligned_in_smp;
struct numa_drop_counters drop_counters;
/* Another possibly contended cache line */
spinlock_t defer_lock ____cacheline_aligned_in_smp;

View file

@ -108,7 +108,7 @@ struct udp_sock {
* the last UDP socket cacheline.
*/
struct hlist_node tunnel_list;
struct socket_drop_counters drop_counters;
struct numa_drop_counters drop_counters;
};
#define udp_test_bit(nr, sk) \

View file

@ -81,7 +81,7 @@ struct raw_sock {
struct inet_sock inet;
struct icmp_filter filter;
u32 ipmr_table;
struct socket_drop_counters drop_counters;
struct numa_drop_counters drop_counters;
};
#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)

View file

@ -102,11 +102,6 @@ struct net;
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;
struct socket_drop_counters {
atomic_t drops0 ____cacheline_aligned_in_smp;
atomic_t drops1 ____cacheline_aligned_in_smp;
};
/**
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
@ -287,7 +282,7 @@ struct sk_filter;
* @sk_err_soft: errors that don't cause failure but are the cause of a
* persistent failure not just 'timed out'
* @sk_drops: raw/udp drops counter
* @sk_drop_counters: optional pointer to socket_drop_counters
* @sk_drop_counters: optional pointer to numa_drop_counters
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
@ -456,7 +451,7 @@ struct sock {
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
struct socket_drop_counters *sk_drop_counters;
struct numa_drop_counters *sk_drop_counters;
__cacheline_group_end(sock_read_rxtx);
__cacheline_group_begin(sock_write_rxtx);
@ -2698,18 +2693,12 @@ struct sock_skb_cb {
static inline void sk_drops_add(struct sock *sk, int segs)
{
struct socket_drop_counters *sdc = sk->sk_drop_counters;
struct numa_drop_counters *ndc = sk->sk_drop_counters;
if (sdc) {
int n = numa_node_id() % 2;
if (n)
atomic_add(segs, &sdc->drops1);
else
atomic_add(segs, &sdc->drops0);
} else {
if (ndc)
numa_drop_add(ndc, segs);
else
atomic_add(segs, &sk->sk_drops);
}
}
static inline void sk_drops_inc(struct sock *sk)
@ -2719,23 +2708,21 @@ static inline void sk_drops_inc(struct sock *sk)
static inline int sk_drops_read(const struct sock *sk)
{
const struct socket_drop_counters *sdc = sk->sk_drop_counters;
const struct numa_drop_counters *ndc = sk->sk_drop_counters;
if (sdc) {
if (ndc) {
DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
return numa_drop_read(ndc);
}
return atomic_read(&sk->sk_drops);
}
static inline void sk_drops_reset(struct sock *sk)
{
struct socket_drop_counters *sdc = sk->sk_drop_counters;
struct numa_drop_counters *ndc = sk->sk_drop_counters;
if (sdc) {
atomic_set(&sdc->drops0, 0);
atomic_set(&sdc->drops1, 0);
}
if (ndc)
numa_drop_reset(ndc);
atomic_set(&sk->sk_drops, 0);
}

View file

@ -5248,7 +5248,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
backlog_unlock_irq_restore(sd, &flags);
cpu_backlog_drop:
atomic_inc(&sd->dropped);
numa_drop_add(&sd->drop_counters, 1);
bad_dev:
dev_core_stats_rx_dropped_inc(skb->dev);
kfree_skb_reason(skb, reason);

View file

@ -145,7 +145,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x\n",
READ_ONCE(sd->processed), atomic_read(&sd->dropped),
READ_ONCE(sd->processed),
numa_drop_read(&sd->drop_counters),
READ_ONCE(sd->time_squeeze), 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */