From 7711f4bb4b360d9c0ff84db1c0ec91e385625047 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Dec 2025 12:20:35 +0100 Subject: [PATCH 01/70] netfilter: nft_set_pipapo: fix range overlap detection set->klen has to be used, not sizeof(). The latter only compares a single register but a full check of the entire key is needed. Example: table ip t { map s { typeof iifname . ip saddr : verdict flags interval } } nft add element t s '{ "lo" . 10.0.0.0/24 : drop }' # no error, expected nft add element t s '{ "lo" . 10.0.0.0/24 : drop }' # no error, expected nft add element t s '{ "lo" . 10.0.0.0/8 : drop }' # bug: no error The 3rd 'add element' should be rejected via -ENOTEMPTY, not -EEXIST, so userspace / nft can report an error to the user. The latter is only correct for the 2nd case (re-add of existing element). As-is, userspace is told that the command was successful, but no elements were added. After this patch, 3rd command gives: Error: Could not process rule: File exists add element t s { "lo" . 127.0.0.0/8 . "lo" : drop } ^^^^^^^^^^^^^^^^^^^^^^^^^ Fixes: 0eb4b5ee33f2 ("netfilter: nft_set_pipapo: Separate partial and complete overlap cases on insertion") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 112fe46788b6..6d77a5f0088a 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1317,8 +1317,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else dup_end = dup_key; - if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) && - !memcmp(end, dup_end->data, sizeof(*dup_end->data))) { + if (!memcmp(start, dup_key->data, set->klen) && + !memcmp(end, dup_end->data, set->klen)) { *elem_priv = &dup->priv; return -EEXIST; } From a675d1caa2041f05f6343fad67b04f8babf32217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Dec 2025 12:20:36 +0100 Subject: [PATCH 02/70] selftests: netfilter: nft_concat_range.sh: add check for overlap detection bug without 'netfilter: nft_set_pipapo: fix range overlap detection': reject overlapping range on add 0s [FAIL] Returned success for add { 1.2.3.4 . 1.2.4.1-1.2.4.2 } given set: table inet filter { [..] elements = { 1.2.3.4 . 1.2.4.1 counter packets 0 bytes 0, 1.2.3.0-1.2.3.4 . 1.2.4.2 counter packets 0 bytes 0 } } The element collides with existing ones and was not added, but kernel returned success to userspace. Signed-off-by: Florian Westphal --- .../net/netfilter/nft_concat_range.sh | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index ad97c6227f35..394166f224a4 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -420,6 +420,18 @@ race_repeat 0 perf_duration 0 " +TYPE_insert_overlap=" +display reject overlapping range on add +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1954,6 +1966,37 @@ EOF return 0 } +add_fail() +{ + if nft add element inet filter test "$1" 2>/dev/null ; then + err "Returned success for add ${1} given set:" + err "$(nft -a list set inet filter test )" + return 1 + fi + + return 0 +} + +test_bug_insert_overlap() +{ + local elements="1.2.3.4 . 1.2.4.1" + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + + elements="1.2.3.0-1.2.3.4 . 1.2.4.1" + add_fail "{ $elements }" || return 1 + + elements="1.2.3.0-1.2.3.4 . 1.2.4.2" + add "{ $elements }" || return 1 + + elements="1.2.3.4 . 1.2.4.1-1.2.4.2" + add_fail "{ $elements }" || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } From 36a3200575642846a96436d503d46544533bb943 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 17 Dec 2025 21:21:59 +0100 Subject: [PATCH 03/70] netfilter: nft_synproxy: avoid possible data-race on update operation During nft_synproxy eval we are reading nf_synproxy_info struct which can be modified on update operation concurrently. As nf_synproxy_info struct fits in 32 bits, use READ_ONCE/WRITE_ONCE annotations. Fixes: ee394f96ad75 ("netfilter: nft_synproxy: add synproxy stateful object support") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nft_synproxy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 5d3e51825985..4d3e5a31b412 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -48,7 +48,7 @@ static void nft_synproxy_eval_v4(const struct nft_synproxy *priv, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nf_synproxy_info info = priv->info; + struct nf_synproxy_info info = READ_ONCE(priv->info); struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *skb = pkt->skb; @@ -79,7 +79,7 @@ static void nft_synproxy_eval_v6(const struct nft_synproxy *priv, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nf_synproxy_info info = priv->info; + struct nf_synproxy_info info = READ_ONCE(priv->info); struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *skb = pkt->skb; @@ -340,7 +340,7 @@ static void nft_synproxy_obj_update(struct nft_object *obj, struct nft_synproxy *newpriv = nft_obj_data(newobj); struct nft_synproxy *priv = nft_obj_data(obj); - priv->info = newpriv->info; + WRITE_ONCE(priv->info, newpriv->info); } static struct nft_object_type nft_synproxy_obj_type; From 2bafeb8d2f380c3a81d98bd7b78b854b564f9cd4 Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Fri, 19 Dec 2025 06:13:20 +0100 Subject: [PATCH 04/70] netfilter: replace -EEXIST with -EBUSY The -EEXIST error code is reserved by the module loading infrastructure to indicate that a module is already loaded. When a module's init function returns -EEXIST, userspace tools like kmod interpret this as "module already loaded" and treat the operation as successful, returning 0 to the user even though the module initialization actually failed. Replace -EEXIST with -EBUSY to ensure correct error reporting in the module initialization path. Affected modules: * ebtable_broute ebtable_filter ebtable_nat arptable_filter * ip6table_filter ip6table_mangle ip6table_nat ip6table_raw * ip6table_security iptable_filter iptable_mangle iptable_nat * iptable_raw iptable_security Signed-off-by: Daniel Gomez Signed-off-by: Florian Westphal --- net/bridge/netfilter/ebtables.c | 2 +- net/netfilter/nf_log.c | 4 ++-- net/netfilter/x_tables.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5697e3949a36..a04fc1757528 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1299,7 +1299,7 @@ int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct ne list_for_each_entry(tmpl, &template_tables, list) { if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) { mutex_unlock(&ebt_mutex); - return -EEXIST; + return -EBUSY; } } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 74cef8bf554c..62cf6a30875e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -89,7 +89,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) if (pf == NFPROTO_UNSPEC) { for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { if (rcu_access_pointer(loggers[i][logger->type])) { - ret = -EEXIST; + ret = -EBUSY; goto unlock; } } @@ -97,7 +97,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) rcu_assign_pointer(loggers[i][logger->type], logger); } else { if (rcu_access_pointer(loggers[pf][logger->type])) { - ret = -EEXIST; + ret = -EBUSY; goto unlock; } rcu_assign_pointer(loggers[pf][logger->type], logger); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 90b7630421c4..48105ea3df15 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1764,7 +1764,7 @@ EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); int xt_register_template(const struct xt_table *table, int (*table_init)(struct net *net)) { - int ret = -EEXIST, af = table->af; + int ret = -EBUSY, af = table->af; struct xt_template *t; mutex_lock(&xt[af].mutex); From d077e8119ddbb4fca67540f1a52453631a47f221 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 24 Dec 2025 12:48:26 +0000 Subject: [PATCH 05/70] netfilter: nf_tables: fix memory leak in nf_tables_newrule() In nf_tables_newrule(), if nft_use_inc() fails, the function jumps to the err_release_rule label without freeing the allocated flow, leading to a memory leak. Fix this by adding a new label err_destroy_flow and jumping to it when nft_use_inc() fails. This ensures that the flow is properly released in this error case. Fixes: 1689f25924ada ("netfilter: nf_tables: report use refcount overflow") Signed-off-by: Zilin Guan Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 618af6e90773..729a92781a1a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4439,7 +4439,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&chain->use)) { err = -EMFILE; - goto err_release_rule; + goto err_destroy_flow; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { @@ -4489,6 +4489,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, err_destroy_flow_rule: nft_use_dec_restore(&chain->use); +err_destroy_flow: if (flow) nft_flow_rule_destroy(flow); err_release_rule: From 7811ba452402d58628e68faedf38745b3d485e3c Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 17 Dec 2025 15:46:40 +0100 Subject: [PATCH 06/70] netfilter: nf_conncount: update last_gc only when GC has been performed Currently last_gc is being updated everytime a new connection is tracked, that means that it is updated even if a GC wasn't performed. With a sufficiently high packet rate, it is possible to always bypass the GC, causing the list to grow infinitely. Update the last_gc value only when a GC has been actually performed. Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nf_conncount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 3654f1e8976c..8487808c8761 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -229,6 +229,7 @@ static int __nf_conncount_add(struct net *net, nf_ct_put(found_ct); } + list->last_gc = (u32)jiffies; add_new_node: if (WARN_ON_ONCE(list->count > INT_MAX)) { @@ -248,7 +249,6 @@ add_new_node: conn->jiffies32 = (u32)jiffies; list_add_tail(&conn->node, &list->head); list->count++; - list->last_gc = (u32)jiffies; out_put: if (refcounted) From a428e0da1248c353557970848994f35fd3f005e2 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 29 Dec 2025 21:21:18 -0800 Subject: [PATCH 07/70] net: marvell: prestera: fix NULL dereference on devlink_alloc() failure devlink_alloc() may return NULL on allocation failure, but prestera_devlink_alloc() unconditionally calls devlink_priv() on the returned pointer. This leads to a NULL pointer dereference if devlink allocation fails. Add a check for a NULL devlink pointer and return NULL early to avoid the crash. Fixes: 34dd1710f5a3 ("net: marvell: prestera: Add basic devlink support") Signed-off-by: Alok Tiwari Acked-by: Elad Nachman Link: https://patch.msgid.link/20251230052124.897012-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index 2a4c9df4eb79..e63d95c1842f 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -387,6 +387,8 @@ struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev) dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch), dev->dev); + if (!dl) + return NULL; return devlink_priv(dl); } From d7065436e8a04520f60c18240b775861be7999d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20Bl=C3=B6chl?= Date: Sun, 28 Dec 2025 16:52:59 +0100 Subject: [PATCH 08/70] net: bnge: add AUXILIARY_BUS to Kconfig dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build can currently fail with ld: drivers/net/ethernet/broadcom/bnge/bnge_auxr.o: in function `bnge_rdma_aux_device_add': bnge_auxr.c:(.text+0x366): undefined reference to `__auxiliary_device_add' ld: drivers/net/ethernet/broadcom/bnge/bnge_auxr.o: in function `bnge_rdma_aux_device_init': bnge_auxr.c:(.text+0x43c): undefined reference to `auxiliary_device_init' if BNGE is enabled but no other driver pulls in AUXILIARY_BUS. Select AUXILIARY_BUS in BNGE like in all other drivers which create an auxiliary_device. Fixes: 8ac050ec3b1c ("bng_en: Add RoCE aux device support") Signed-off-by: Markus Blöchl Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20251228-bnge_aux_bus-v1-1-82e273ebfdac@blochl.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index ca565ace6e6a..cd7dddeb91dd 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -259,6 +259,7 @@ config BNGE depends on PCI select NET_DEVLINK select PAGE_POOL + select AUXILIARY_BUS help This driver supports Broadcom ThorUltra 50/100/200/400/800 gigabit Ethernet cards. The module will be called bng_en. To compile this From 3128df6be147768fe536986fbb85db1d37806a9f Mon Sep 17 00:00:00 2001 From: Alexandre Knecht Date: Sun, 28 Dec 2025 03:00:57 +0100 Subject: [PATCH 09/70] bridge: fix C-VLAN preservation in 802.1ad vlan_tunnel egress When using an 802.1ad bridge with vlan_tunnel, the C-VLAN tag is incorrectly stripped from frames during egress processing. br_handle_egress_vlan_tunnel() uses skb_vlan_pop() to remove the S-VLAN from hwaccel before VXLAN encapsulation. However, skb_vlan_pop() also moves any "next" VLAN from the payload into hwaccel: /* move next vlan tag to hw accel tag */ __skb_vlan_pop(skb, &vlan_tci); __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); For QinQ frames where the C-VLAN sits in the payload, this moves it to hwaccel where it gets lost during VXLAN encapsulation. Fix by calling __vlan_hwaccel_clear_tag() directly, which clears only the hwaccel S-VLAN and leaves the payload untouched. This path is only taken when vlan_tunnel is enabled and tunnel_info is configured, so 802.1Q bridges are unaffected. Tested with 802.1ad bridge + VXLAN vlan_tunnel, verified C-VLAN preserved in VXLAN payload via tcpdump. Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths") Signed-off-by: Alexandre Knecht Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251228020057.2788865-1-knecht.alexandre@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_vlan_tunnel.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index a966a6ec8263..257cae9f1569 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -189,7 +189,6 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tunnel_dst; __be64 tunnel_id; - int err; if (!vlan) return 0; @@ -199,9 +198,13 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, return 0; skb_dst_drop(skb); - err = skb_vlan_pop(skb); - if (err) - return err; + /* For 802.1ad (QinQ), skb_vlan_pop() incorrectly moves the C-VLAN + * from payload to hwaccel after clearing S-VLAN. We only need to + * clear the hwaccel S-VLAN; the C-VLAN must stay in payload for + * correct VXLAN encapsulation. This is also correct for 802.1Q + * where no C-VLAN exists in payload. + */ + __vlan_hwaccel_clear_tag(skb); if (BR_INPUT_SKB_CB(skb)->backup_nhid) { __set_bit(IP_TUNNEL_KEY_BIT, flags); From 34f3ff52cb9fa7dbf04f5c734fcc4cb6ed5d1a95 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Thu, 25 Dec 2025 20:36:17 +0000 Subject: [PATCH 10/70] net: mscc: ocelot: Fix crash when adding interface under a lag Commit 15faa1f67ab4 ("lan966x: Fix crash when adding interface under a lag") fixed a similar issue in the lan966x driver caused by a NULL pointer dereference. The ocelot_set_aggr_pgids() function in the ocelot driver has similar logic and is susceptible to the same crash. This issue specifically affects the ocelot_vsc7514.c frontend, which leaves unused ports as NULL pointers. The felix_vsc9959.c frontend is unaffected as it uses the DSA framework which registers all ports. Fix this by checking if the port pointer is valid before accessing it. Fixes: 528d3f190c98 ("net: mscc: ocelot: drop the use of the "lags" array") Signed-off-by: Jerry Wu Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/tencent_75EF812B305E26B0869C673DD1160866C90A@qq.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 08bee56aea35..c345d9b17c89 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2307,14 +2307,16 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < ocelot->num_phys_ports; lag++) { - struct net_device *bond = ocelot->ports[lag]->bond; + struct ocelot_port *ocelot_port = ocelot->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!ocelot_port || !ocelot_port->bond || (visited & BIT(lag))) continue; + bond = ocelot_port->bond; bond_mask = ocelot_get_bond_mask(ocelot, bond); for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) { From 4c0856c225b39b1def6c9a6bc56faca79550da13 Mon Sep 17 00:00:00 2001 From: "yuan.gao" Date: Wed, 24 Dec 2025 14:31:45 +0800 Subject: [PATCH 11/70] inet: ping: Fix icmp out counting When the ping program uses an IPPROTO_ICMP socket to send ICMP_ECHO messages, ICMP_MIB_OUTMSGS is counted twice. ping_v4_sendmsg ping_v4_push_pending_frames ip_push_pending_frames ip_finish_skb __ip_make_skb icmp_out_count(net, icmp_type); // first count icmp_out_count(sock_net(sk), user_icmph.type); // second count However, when the ping program uses an IPPROTO_RAW socket, ICMP_MIB_OUTMSGS is counted correctly only once. Therefore, the first count should be removed. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: yuan.gao Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20251224063145.3615282-1-yuan.gao@ucloud.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ad56588107cc..cfbd563498e8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -828,10 +828,8 @@ out: out_free: if (free) kfree(ipc.opt); - if (!err) { - icmp_out_count(sock_net(sk), user_icmph.type); + if (!err) return len; - } return err; do_confirm: From 62f7edd59964eb588e96fce1ad35a2327ea54424 Mon Sep 17 00:00:00 2001 From: Stefano Radaelli Date: Tue, 23 Dec 2025 13:09:39 +0100 Subject: [PATCH 12/70] net: phy: mxl-86110: Add power management and soft reset support Implement soft_reset, suspend, and resume callbacks using genphy_soft_reset(), genphy_suspend(), and genphy_resume() to fix PHY initialization and power management issues. The soft_reset callback is needed to properly recover the PHY after an ifconfig down/up cycle. Without it, the PHY can remain in power-down state, causing MDIO register access failures during config_init(). The soft reset ensures the PHY is operational before configuration. The suspend/resume callbacks enable proper power management during system suspend/resume cycles. Fixes: b2908a989c59 ("net: phy: add driver for MaxLinear MxL86110 PHY") Signed-off-by: Stefano Radaelli Link: https://patch.msgid.link/20251223120940.407195-1-stefano.r@variscite.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mxl-86110.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/mxl-86110.c b/drivers/net/phy/mxl-86110.c index e5d137a37a1d..42a5fe3f115f 100644 --- a/drivers/net/phy/mxl-86110.c +++ b/drivers/net/phy/mxl-86110.c @@ -938,6 +938,9 @@ static struct phy_driver mxl_phy_drvs[] = { PHY_ID_MATCH_EXACT(PHY_ID_MXL86110), .name = "MXL86110 Gigabit Ethernet", .config_init = mxl86110_config_init, + .suspend = genphy_suspend, + .resume = genphy_resume, + .soft_reset = genphy_soft_reset, .get_wol = mxl86110_get_wol, .set_wol = mxl86110_set_wol, .led_brightness_set = mxl86110_led_brightness_set, From 2a71a1a8d0ed718b1c7a9ac61f07e5755c47ae20 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 24 Dec 2025 04:35:35 +0800 Subject: [PATCH 13/70] net: sock: fix hardened usercopy panic in sock_recv_errqueue skbuff_fclone_cache was created without defining a usercopy region, [1] unlike skbuff_head_cache which properly whitelists the cb[] field. [2] This causes a usercopy BUG() when CONFIG_HARDENED_USERCOPY is enabled and the kernel attempts to copy sk_buff.cb data to userspace via sock_recv_errqueue() -> put_cmsg(). The crash occurs when: 1. TCP allocates an skb using alloc_skb_fclone() (from skbuff_fclone_cache) [1] 2. The skb is cloned via skb_clone() using the pre-allocated fclone [3] 3. The cloned skb is queued to sk_error_queue for timestamp reporting 4. Userspace reads the error queue via recvmsg(MSG_ERRQUEUE) 5. sock_recv_errqueue() calls put_cmsg() to copy serr->ee from skb->cb [4] 6. __check_heap_object() fails because skbuff_fclone_cache has no usercopy whitelist [5] When cloned skbs allocated from skbuff_fclone_cache are used in the socket error queue, accessing the sock_exterr_skb structure in skb->cb via put_cmsg() triggers a usercopy hardening violation: [ 5.379589] usercopy: Kernel memory exposure attempt detected from SLUB object 'skbuff_fclone_cache' (offset 296, size 16)! [ 5.382796] kernel BUG at mm/usercopy.c:102! [ 5.383923] Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI [ 5.384903] CPU: 1 UID: 0 PID: 138 Comm: poc_put_cmsg Not tainted 6.12.57 #7 [ 5.384903] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 5.384903] RIP: 0010:usercopy_abort+0x6c/0x80 [ 5.384903] Code: 1a 86 51 48 c7 c2 40 15 1a 86 41 52 48 c7 c7 c0 15 1a 86 48 0f 45 d6 48 c7 c6 80 15 1a 86 48 89 c1 49 0f 45 f3 e8 84 27 88 ff <0f> 0b 490 [ 5.384903] RSP: 0018:ffffc900006f77a8 EFLAGS: 00010246 [ 5.384903] RAX: 000000000000006f RBX: ffff88800f0ad2a8 RCX: 1ffffffff0f72e74 [ 5.384903] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff87b973a0 [ 5.384903] RBP: 0000000000000010 R08: 0000000000000000 R09: fffffbfff0f72e74 [ 5.384903] R10: 0000000000000003 R11: 79706f6372657375 R12: 0000000000000001 [ 5.384903] R13: ffff88800f0ad2b8 R14: ffffea00003c2b40 R15: ffffea00003c2b00 [ 5.384903] FS: 0000000011bc4380(0000) GS:ffff8880bf100000(0000) knlGS:0000000000000000 [ 5.384903] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.384903] CR2: 000056aa3b8e5fe4 CR3: 000000000ea26004 CR4: 0000000000770ef0 [ 5.384903] PKRU: 55555554 [ 5.384903] Call Trace: [ 5.384903] [ 5.384903] __check_heap_object+0x9a/0xd0 [ 5.384903] __check_object_size+0x46c/0x690 [ 5.384903] put_cmsg+0x129/0x5e0 [ 5.384903] sock_recv_errqueue+0x22f/0x380 [ 5.384903] tls_sw_recvmsg+0x7ed/0x1960 [ 5.384903] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5.384903] ? schedule+0x6d/0x270 [ 5.384903] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5.384903] ? mutex_unlock+0x81/0xd0 [ 5.384903] ? __pfx_mutex_unlock+0x10/0x10 [ 5.384903] ? __pfx_tls_sw_recvmsg+0x10/0x10 [ 5.384903] ? _raw_spin_lock_irqsave+0x8f/0xf0 [ 5.384903] ? _raw_read_unlock_irqrestore+0x20/0x40 [ 5.384903] ? srso_alias_return_thunk+0x5/0xfbef5 The crash offset 296 corresponds to skb2->cb within skbuff_fclones: - sizeof(struct sk_buff) = 232 - offsetof(struct sk_buff, cb) = 40 - offset of skb2.cb in fclones = 232 + 40 = 272 - crash offset 296 = 272 + 24 (inside sock_exterr_skb.ee) This patch uses a local stack variable as a bounce buffer to avoid the hardened usercopy check failure. [1] https://elixir.bootlin.com/linux/v6.12.62/source/net/ipv4/tcp.c#L885 [2] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5104 [3] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5566 [4] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5491 [5] https://elixir.bootlin.com/linux/v6.12.62/source/mm/slub.c#L5719 Fixes: 6d07d1cd300f ("usercopy: Restrict non-usercopy caches to size 0") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251223203534.1392218-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 45c98bf524b2..a1c8b47b0d56 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3896,7 +3896,7 @@ void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type) { - struct sock_exterr_skb *serr; + struct sock_extended_err ee; struct sk_buff *skb; int copied, err; @@ -3916,8 +3916,9 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, sock_recv_timestamp(msg, sk, skb); - serr = SKB_EXT_ERR(skb); - put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee); + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ + ee = SKB_EXT_ERR(skb)->ee; + put_cmsg(msg, level, type, sizeof(ee), &ee); msg->msg_flags |= MSG_ERRQUEUE; err = copied; From 02d1e1a3f9239cdb3ecf2c6d365fb959d1bf39df Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Wed, 24 Dec 2025 09:22:24 +0800 Subject: [PATCH 14/70] netdev: preserve NETIF_F_ALL_FOR_ALL across TSO updates Directly increment the TSO features incurs a side effect: it will also directly clear the flags in NETIF_F_ALL_FOR_ALL on the master device, which can cause issues such as the inability to enable the nocache copy feature on the bonding driver. The fix is to include NETIF_F_ALL_FOR_ALL in the update mask, thereby preventing it from being cleared. Fixes: b0ce3508b25e ("bonding: allow TSO being set on bonding master") Signed-off-by: Di Zhu Link: https://patch.msgid.link/20251224012224.56185-1-zhud@hygon.cn Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5870a9e514a5..d99b0fbc1942 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5323,7 +5323,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, static inline netdev_features_t netdev_add_tso_features(netdev_features_t features, netdev_features_t mask) { - return netdev_increment_features(features, NETIF_F_ALL_TSO, mask); + return netdev_increment_features(features, NETIF_F_ALL_TSO | + NETIF_F_ALL_FOR_ALL, mask); } int __netdev_update_features(struct net_device *dev); From 31057979cdadfee9f934746fd84046b43506ba61 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 25 Dec 2025 15:27:13 +0200 Subject: [PATCH 15/70] net/mlx5: Lag, multipath, give priority for routes with smaller network prefix Today multipath offload is controlled by a single route and the route controlling is selected if it meets one of the following criteria: 1. No controlling route is set. 2. New route destination is the same as old one. 3. New route metric is lower than old route metric. This can cause unwanted behaviour in case a new route is added with a smaller network prefix which should get the priority. Fix this by adding a new criteria to give priority to new route with a smaller network prefix. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Patrisious Haddad Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index aee17fcf3b36..cdc99fe5c956 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -173,10 +173,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, } /* Handle multipath entry with lower priority value */ - if (mp->fib.mfi && mp->fib.mfi != fi && + if (mp->fib.mfi && (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && - fi->fib_priority >= mp->fib.priority) + mp->fib.dst_len <= fen_info->dst_len && + !(mp->fib.dst_len == fen_info->dst_len && + fi->fib_priority < mp->fib.priority)) { + mlx5_core_dbg(ldev->pf[idx].dev, + "Multipath entry with lower priority was rejected\n"); return; + } nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL); nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0); From 6c75dc9de40ff91ec2b621b78f6cd9031762067c Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Thu, 25 Dec 2025 15:27:14 +0200 Subject: [PATCH 16/70] net/mlx5e: Don't gate FEC histograms on ppcnt_statistical_group Currently, the ppcnt_statistical_group capability check incorrectly gates access to FEC histogram statistics. This capability applies only to statistical and physical counter groups, not for histogram data. Restrict the ppcnt_statistical_group check to the Physical_Layer_Counters and Physical_Layer_Statistical_Counters groups. Histogram statistics access remains gated by the pphcr capability. The issue is harmless as of today, as it happens that ppcnt_statistical_group is set on all existing devices that have pphcr set. Fixes: 6b81b8a0b197 ("net/mlx5e: Don't query FEC statistics when FEC is disabled") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index a2802cfc9b98..a8af84fc9763 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1608,12 +1608,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, { int mode = fec_active_mode(priv->mdev); - if (mode == MLX5E_FEC_NOFEC || - !MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + if (mode == MLX5E_FEC_NOFEC) return; - fec_set_corrected_bits_total(priv, fec_stats); - fec_set_block_stats(priv, mode, fec_stats); + if (MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) { + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, mode, fec_stats); + } if (MLX5_CAP_PCAM_REG(priv->mdev, pphcr)) fec_set_histograms_stats(priv, mode, hist); From 7d36a4a8bf62dc508bc6bb4b59727aec25064ca5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Dec 2025 15:27:15 +0200 Subject: [PATCH 17/70] net/mlx5e: Fix NULL pointer dereference in ioctl module EEPROM query The mlx5_query_mcia() function unconditionally dereferences the status pointer to store the MCIA register status value. However, mlx5e_get_module_id() passes NULL since it doesn't need the status value. Add a NULL check before dereferencing the status pointer to prevent a NULL pointer dereference. Fixes: 2e4c44b12f4d ("net/mlx5: Refactor EEPROM query error handling to return status separately") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 85a9e534f442..8f36454dd196 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -393,9 +393,11 @@ static int mlx5_query_mcia(struct mlx5_core_dev *dev, if (err) return err; - *status = MLX5_GET(mcia_reg, out, status); - if (*status) + if (MLX5_GET(mcia_reg, out, status)) { + if (status) + *status = MLX5_GET(mcia_reg, out, status); return -EIO; + } ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); memcpy(data, ptr, size); From 144297e2a24e3e54aee1180ec21120ea38822b97 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Dec 2025 15:27:16 +0200 Subject: [PATCH 18/70] net/mlx5e: Don't print error message due to invalid module Dumping module EEPROM on newer modules is supported through the netlink interface only. Querying with old userspace ethtool (or other tools, such as 'lshw') which still uses the ioctl interface results in an error message that could flood dmesg (in addition to the expected error return value). The original message was added under the assumption that the driver should be able to handle all module types, but now that such flows are easily triggered from userspace, it doesn't serve its purpose. Change the log level of the print in mlx5_query_module_eeprom() to debug. Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 8f36454dd196..7f8bed353e67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -431,7 +431,8 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; default: - mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + mlx5_core_dbg(dev, "Module ID not recognized: 0x%x\n", + module_id); return -EINVAL; } From 0462a15d2d1fafd3d48cf3c7c67393e42d03908c Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 25 Dec 2025 15:27:17 +0200 Subject: [PATCH 19/70] net/mlx5e: Dealloc forgotten PSP RX modify header The commit which added RX steering rules for PSP forgot to free a modify header HW object on the cleanup path, which lead to health errors when reloading the driver and uninitializing the device: mlx5_core 0000:08:00.0: poll_health:803:(pid 3021): Fatal error 3 detected Fix that by saving the modify header pointer in the PSP steering struct and deallocating it after freeing the rule which references it. Fixes: 9536fbe10c9d ("net/mlx5e: Add PSP steering in local NIC RX") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_accel/psp.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 38e7c77cc851..9a74438ce10a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -44,6 +44,7 @@ struct mlx5e_accel_fs_psp_prot { struct mlx5_flow_table *ft; struct mlx5_flow_group *miss_group; struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *rx_modify_hdr; struct mlx5_flow_destination default_dest; struct mlx5e_psp_rx_err rx_err; u32 refcnt; @@ -286,13 +287,19 @@ out_err: return err; } -static void accel_psp_fs_rx_fs_destroy(struct mlx5e_accel_fs_psp_prot *fs_prot) +static void accel_psp_fs_rx_fs_destroy(struct mlx5e_psp_fs *fs, + struct mlx5e_accel_fs_psp_prot *fs_prot) { if (fs_prot->def_rule) { mlx5_del_flow_rules(fs_prot->def_rule); fs_prot->def_rule = NULL; } + if (fs_prot->rx_modify_hdr) { + mlx5_modify_header_dealloc(fs->mdev, fs_prot->rx_modify_hdr); + fs_prot->rx_modify_hdr = NULL; + } + if (fs_prot->miss_rule) { mlx5_del_flow_rules(fs_prot->miss_rule); fs_prot->miss_rule = NULL; @@ -396,6 +403,7 @@ static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs, modify_hdr = NULL; goto out_err; } + fs_prot->rx_modify_hdr = modify_hdr; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | @@ -416,7 +424,7 @@ static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs, goto out; out_err: - accel_psp_fs_rx_fs_destroy(fs_prot); + accel_psp_fs_rx_fs_destroy(fs, fs_prot); out: kvfree(flow_group_in); kvfree(spec); @@ -433,7 +441,7 @@ static int accel_psp_fs_rx_destroy(struct mlx5e_psp_fs *fs, enum accel_fs_psp_ty /* The netdev unreg already happened, so all offloaded rule are already removed */ fs_prot = &accel_psp->fs_prot[type]; - accel_psp_fs_rx_fs_destroy(fs_prot); + accel_psp_fs_rx_fs_destroy(fs, fs_prot); accel_psp_fs_rx_err_destroy_ft(fs, &fs_prot->rx_err); From 8da901ffe497a53fa4ecc3ceed0e6d771586f88e Mon Sep 17 00:00:00 2001 From: Frank Liang Date: Wed, 31 Dec 2025 22:58:08 +0800 Subject: [PATCH 20/70] net/ena: fix missing lock when update devlink params Fix assert lock warning while calling devl_param_driverinit_value_set() in ena. WARNING: net/devlink/core.c:261 at devl_assert_locked+0x62/0x90, CPU#0: kworker/0:0/9 CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.19.0-rc2+ #1 PREEMPT(lazy) Hardware name: Amazon EC2 m8i-flex.4xlarge/, BIOS 1.0 10/16/2017 Workqueue: events work_for_cpu_fn RIP: 0010:devl_assert_locked+0x62/0x90 Call Trace: devl_param_driverinit_value_set+0x15/0x1c0 ena_devlink_alloc+0x18c/0x220 [ena] ? __pfx_ena_devlink_alloc+0x10/0x10 [ena] ? trace_hardirqs_on+0x18/0x140 ? lockdep_hardirqs_on+0x8c/0x130 ? __raw_spin_unlock_irqrestore+0x5d/0x80 ? __raw_spin_unlock_irqrestore+0x46/0x80 ? devm_ioremap_wc+0x9a/0xd0 ena_probe+0x4d2/0x1b20 [ena] ? __lock_acquire+0x56a/0xbd0 ? __pfx_ena_probe+0x10/0x10 [ena] ? local_clock+0x15/0x30 ? __lock_release.isra.0+0x1c9/0x340 ? mark_held_locks+0x40/0x70 ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170 ? trace_hardirqs_on+0x18/0x140 ? lockdep_hardirqs_on+0x8c/0x130 ? __raw_spin_unlock_irqrestore+0x5d/0x80 ? __raw_spin_unlock_irqrestore+0x46/0x80 ? __pfx_ena_probe+0x10/0x10 [ena] ...... Fixes: 816b52624cf6 ("net: ena: Control PHC enable through devlink") Signed-off-by: Frank Liang Reviewed-by: David Arinzon Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251231145808.6103-1-xiliang@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_devlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_devlink.c b/drivers/net/ethernet/amazon/ena/ena_devlink.c index ac81c24016dd..4772185e669d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_devlink.c +++ b/drivers/net/ethernet/amazon/ena/ena_devlink.c @@ -53,10 +53,12 @@ void ena_devlink_disable_phc_param(struct devlink *devlink) { union devlink_param_value value; + devl_lock(devlink); value.vbool = false; devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, value); + devl_unlock(devlink); } static void ena_devlink_port_register(struct devlink *devlink) @@ -145,10 +147,12 @@ static int ena_devlink_configure_params(struct devlink *devlink) return rc; } + devl_lock(devlink); value.vbool = ena_phc_is_enabled(adapter); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, value); + devl_unlock(devlink); return 0; } From 92e6e0a87f6860a4710f9494f8c704d498ae60f8 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Tue, 30 Dec 2025 07:18:53 +0000 Subject: [PATCH 21/70] net: wwan: iosm: Fix memory leak in ipc_mux_deinit() Commit 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") allocated memory for pp_qlt in ipc_mux_init() but did not free it in ipc_mux_deinit(). This results in a memory leak when the driver is unloaded. Free the allocated memory in ipc_mux_deinit() to fix the leak. Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Co-developed-by: Jianhao Xu Signed-off-by: Jianhao Xu Signed-off-by: Zilin Guan Reviewed-by: Loic Poulain Link: https://patch.msgid.link/20251230071853.1062223-1-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_mux.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c index fc928b298a98..b846889fcb09 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c @@ -456,6 +456,7 @@ void ipc_mux_deinit(struct iosm_mux *ipc_mux) struct sk_buff_head *free_list; union mux_msg mux_msg; struct sk_buff *skb; + int i; if (!ipc_mux->initialized) return; @@ -479,5 +480,10 @@ void ipc_mux_deinit(struct iosm_mux *ipc_mux) ipc_mux->channel->dl_pipe.is_open = false; } + if (ipc_mux->protocol != MUX_LITE) { + for (i = 0; i < IPC_MEM_MUX_IP_SESSION_ENTRIES; i++) + kfree(ipc_mux->ul_adb.pp_qlt[i]); + } + kfree(ipc_mux); } From ffeafa65b2b26df2f5b5a6118d3174f17bd12ec5 Mon Sep 17 00:00:00 2001 From: Srijit Bose Date: Wed, 31 Dec 2025 00:36:25 -0800 Subject: [PATCH 22/70] bnxt_en: Fix potential data corruption with HW GRO/LRO Fix the max number of bits passed to find_first_zero_bit() in bnxt_alloc_agg_idx(). We were incorrectly passing the number of long words. find_first_zero_bit() may fail to find a zero bit and cause a wrong ID to be used. If the wrong ID is already in use, this can cause data corruption. Sometimes an error like this can also be seen: bnxt_en 0000:83:00.0 enp131s0np0: TPA end agg_buf 2 != expected agg_bufs 1 Fix it by passing the correct number of bits MAX_TPA_P5. Use DECLARE_BITMAP() to more cleanly define the bitmap. Add a sanity check to warn if a bit cannot be found and reset the ring [MChan]. Fixes: ec4d8e7cf024 ("bnxt_en: Add TPA ID mapping logic for 57500 chips.") Reviewed-by: Ray Jui Signed-off-by: Srijit Bose Signed-off-by: Michael Chan Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251231083625.3911652-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 +--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d17d0ea89c36..d160e54ac121 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1482,9 +1482,11 @@ static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id) struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; u16 idx = agg_id & MAX_TPA_P5_MASK; - if (test_bit(idx, map->agg_idx_bmap)) - idx = find_first_zero_bit(map->agg_idx_bmap, - BNXT_AGG_IDX_BMAP_SIZE); + if (test_bit(idx, map->agg_idx_bmap)) { + idx = find_first_zero_bit(map->agg_idx_bmap, MAX_TPA_P5); + if (idx >= MAX_TPA_P5) + return INVALID_HW_RING_ID; + } __set_bit(idx, map->agg_idx_bmap); map->agg_id_tbl[agg_id] = idx; return idx; @@ -1548,6 +1550,13 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { agg_id = TPA_START_AGG_ID_P5(tpa_start); agg_id = bnxt_alloc_agg_idx(rxr, agg_id); + if (unlikely(agg_id == INVALID_HW_RING_ID)) { + netdev_warn(bp->dev, "Unable to allocate agg ID for ring %d, agg 0x%x\n", + rxr->bnapi->index, + TPA_START_AGG_ID_P5(tpa_start)); + bnxt_sched_reset_rxr(bp, rxr); + return; + } } else { agg_id = TPA_START_AGG_ID(tpa_start); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index f5f07a7e6b29..f88e7769a838 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1080,11 +1080,9 @@ struct bnxt_tpa_info { struct rx_agg_cmp *agg_arr; }; -#define BNXT_AGG_IDX_BMAP_SIZE (MAX_TPA_P5 / BITS_PER_LONG) - struct bnxt_tpa_idx_map { u16 agg_id_tbl[1024]; - unsigned long agg_idx_bmap[BNXT_AGG_IDX_BMAP_SIZE]; + DECLARE_BITMAP(agg_idx_bmap, MAX_TPA_P5); }; struct bnxt_rx_ring_info { From acb4bc6e1ba34ae1a34a9334a1ce8474c909466e Mon Sep 17 00:00:00 2001 From: Kommula Shiva Shankar Date: Fri, 2 Jan 2026 15:49:00 +0530 Subject: [PATCH 23/70] virtio_net: fix device mismatch in devm_kzalloc/devm_kfree Initial rss_hdr allocation uses virtio_device->device, but virtnet_set_queues() frees using net_device->device. This device mismatch causing below devres warning [ 3788.514041] ------------[ cut here ]------------ [ 3788.514044] WARNING: drivers/base/devres.c:1095 at devm_kfree+0x84/0x98, CPU#16: vdpa/1463 [ 3788.514054] Modules linked in: octep_vdpa virtio_net virtio_vdpa [last unloaded: virtio_vdpa] [ 3788.514064] CPU: 16 UID: 0 PID: 1463 Comm: vdpa Tainted: G W 6.18.0 #10 PREEMPT [ 3788.514067] Tainted: [W]=WARN [ 3788.514069] Hardware name: Marvell CN106XX board (DT) [ 3788.514071] pstate: 63400009 (nZCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) [ 3788.514074] pc : devm_kfree+0x84/0x98 [ 3788.514076] lr : devm_kfree+0x54/0x98 [ 3788.514079] sp : ffff800084e2f220 [ 3788.514080] x29: ffff800084e2f220 x28: ffff0003b2366000 x27: 000000000000003f [ 3788.514085] x26: 000000000000003f x25: ffff000106f17c10 x24: 0000000000000080 [ 3788.514089] x23: ffff00045bb8ab08 x22: ffff00045bb8a000 x21: 0000000000000018 [ 3788.514093] x20: ffff0004355c3080 x19: ffff00045bb8aa00 x18: 0000000000080000 [ 3788.514098] x17: 0000000000000040 x16: 000000000000001f x15: 000000000007ffff [ 3788.514102] x14: 0000000000000488 x13: 0000000000000005 x12: 00000000000fffff [ 3788.514106] x11: ffffffffffffffff x10: 0000000000000005 x9 : ffff800080c8c05c [ 3788.514110] x8 : ffff800084e2eeb8 x7 : 0000000000000000 x6 : 000000000000003f [ 3788.514115] x5 : ffff8000831bafe0 x4 : ffff800080c8b010 x3 : ffff0004355c3080 [ 3788.514119] x2 : ffff0004355c3080 x1 : 0000000000000000 x0 : 0000000000000000 [ 3788.514123] Call trace: [ 3788.514125] devm_kfree+0x84/0x98 (P) [ 3788.514129] virtnet_set_queues+0x134/0x2e8 [virtio_net] [ 3788.514135] virtnet_probe+0x9c0/0xe00 [virtio_net] [ 3788.514139] virtio_dev_probe+0x1e0/0x338 [ 3788.514144] really_probe+0xc8/0x3a0 [ 3788.514149] __driver_probe_device+0x84/0x170 [ 3788.514152] driver_probe_device+0x44/0x120 [ 3788.514155] __device_attach_driver+0xc4/0x168 [ 3788.514158] bus_for_each_drv+0x8c/0xf0 [ 3788.514161] __device_attach+0xa4/0x1c0 [ 3788.514164] device_initial_probe+0x1c/0x30 [ 3788.514168] bus_probe_device+0xb4/0xc0 [ 3788.514170] device_add+0x614/0x828 [ 3788.514173] register_virtio_device+0x214/0x258 [ 3788.514175] virtio_vdpa_probe+0xa0/0x110 [virtio_vdpa] [ 3788.514179] vdpa_dev_probe+0xa8/0xd8 [ 3788.514183] really_probe+0xc8/0x3a0 [ 3788.514186] __driver_probe_device+0x84/0x170 [ 3788.514189] driver_probe_device+0x44/0x120 [ 3788.514192] __device_attach_driver+0xc4/0x168 [ 3788.514195] bus_for_each_drv+0x8c/0xf0 [ 3788.514197] __device_attach+0xa4/0x1c0 [ 3788.514200] device_initial_probe+0x1c/0x30 [ 3788.514203] bus_probe_device+0xb4/0xc0 [ 3788.514206] device_add+0x614/0x828 [ 3788.514209] _vdpa_register_device+0x58/0x88 [ 3788.514211] octep_vdpa_dev_add+0x104/0x228 [octep_vdpa] [ 3788.514215] vdpa_nl_cmd_dev_add_set_doit+0x2d0/0x3c0 [ 3788.514218] genl_family_rcv_msg_doit+0xe4/0x158 [ 3788.514222] genl_rcv_msg+0x218/0x298 [ 3788.514225] netlink_rcv_skb+0x64/0x138 [ 3788.514229] genl_rcv+0x40/0x60 [ 3788.514233] netlink_unicast+0x32c/0x3b0 [ 3788.514237] netlink_sendmsg+0x170/0x3b8 [ 3788.514241] __sys_sendto+0x12c/0x1c0 [ 3788.514246] __arm64_sys_sendto+0x30/0x48 [ 3788.514249] invoke_syscall.constprop.0+0x58/0xf8 [ 3788.514255] do_el0_svc+0x48/0xd0 [ 3788.514259] el0_svc+0x48/0x210 [ 3788.514264] el0t_64_sync_handler+0xa0/0xe8 [ 3788.514268] el0t_64_sync+0x198/0x1a0 [ 3788.514271] ---[ end trace 0000000000000000 ]--- Fix by using virtio_device->device consistently for allocation and deallocation Fixes: 4944be2f5ad8c ("virtio_net: Allocate rss_hdr with devres") Signed-off-by: Kommula Shiva Shankar Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20260102101900.692770-1-kshankar@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1bb3aeca66c6..22d894101c01 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3791,7 +3791,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (vi->has_rss && !netif_is_rxfh_configured(dev)) { old_rss_hdr = vi->rss_hdr; old_rss_trailer = vi->rss_trailer; - vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); + vi->rss_hdr = devm_kzalloc(&vi->vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); if (!vi->rss_hdr) { vi->rss_hdr = old_rss_hdr; return -ENOMEM; @@ -3802,7 +3802,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (!virtnet_commit_rss_command(vi)) { /* restore ctrl_rss if commit_rss_command failed */ - devm_kfree(&dev->dev, vi->rss_hdr); + devm_kfree(&vi->vdev->dev, vi->rss_hdr); vi->rss_hdr = old_rss_hdr; vi->rss_trailer = old_rss_trailer; @@ -3810,7 +3810,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) queue_pairs); return -EINVAL; } - devm_kfree(&dev->dev, old_rss_hdr); + devm_kfree(&vi->vdev->dev, old_rss_hdr); goto succ; } From 2ef02ac38d3c17f34a00c4b267d961a8d4b45d1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 2 Jan 2026 15:00:07 +0100 Subject: [PATCH 24/70] inet: frags: drop fraglist conntrack references Jakub added a warning in nf_conntrack_cleanup_net_list() to make debugging leaked skbs/conntrack references more obvious. syzbot reports this as triggering, and I can also reproduce this via ip_defrag.sh selftest: conntrack cleanup blocked for 60s WARNING: net/netfilter/nf_conntrack_core.c:2512 [..] conntrack clenups gets stuck because there are skbs with still hold nf_conn references via their frag_list. net.core.skb_defer_max=0 makes the hang disappear. Eric Dumazet points out that skb_release_head_state() doesn't follow the fraglist. ip_defrag.sh can only reproduce this problem since commit 6471658dc66c ("udp: use skb_attempt_defer_free()"), but AFAICS this problem could happen with TCP as well if pmtu discovery is off. The relevant problem path for udp is: 1. netns emits fragmented packets 2. nf_defrag_v6_hook reassembles them (in output hook) 3. reassembled skb is tracked (skb owns nf_conn reference) 4. ip6_output refragments 5. refragmented packets also own nf_conn reference (ip6_fragment calls ip6_copy_metadata()) 6. on input path, nf_defrag_v6_hook skips defragmentation: the fragments already have skb->nf_conn attached 7. skbs are reassembled via ipv6_frag_rcv() 8. skb_consume_udp -> skb_attempt_defer_free() -> skb ends up in pcpu freelist, but still has nf_conn reference. Possible solutions: 1 let defrag engine drop nf_conn entry, OR 2 export kick_defer_list_purge() and call it from the conntrack netns exit callback, OR 3 add skb_has_frag_list() check to skb_attempt_defer_free() 2 & 3 also solve ip_defrag.sh hang but share same drawback: Such reassembled skbs, queued to socket, can prevent conntrack module removal until userspace has consumed the packet. While both tcp and udp stack do call nf_reset_ct() before placing skb on socket queue, that function doesn't iterate frag_list skbs. Therefore drop nf_conn entries when they are placed in defrag queue. Keep the nf_conn entry of the first (offset 0) skb so that reassembled skb retains nf_conn entry for sake of TX path. Note that fixes tag is incorrect; it points to the commit introducing the 'ip_defrag.sh reproducible problem': no need to backport this patch to every stable kernel. Reported-by: syzbot+4393c47753b7808dac7d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/693b0fa7.050a0220.4004e.040d.GAE@google.com/ Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()") Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260102140030.32367-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/ipv4/inet_fragment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 001ee5c4d962..4e6d7467ed44 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -488,6 +488,8 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, } FRAG_CB(skb)->ip_defrag_offset = offset; + if (offset) + nf_reset_ct(skb); return IPFRAG_OK; } From 1806d210e5a8f431ad4711766ae4a333d407d972 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sat, 3 Jan 2026 17:53:31 +0100 Subject: [PATCH 25/70] MAINTAINERS: Update email address for Justin Iurman Due to a change of employer, I'll be using a permanent and personal email address. Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20260103165331.20120-1-justin.iurman@gmail.com Signed-off-by: Jakub Kicinski --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 7a6110d0e46d..7354436a19c0 100644 --- a/.mailmap +++ b/.mailmap @@ -416,6 +416,7 @@ Juha Yrjola Juha Yrjola Juha Yrjola Julien Thierry +Justin Iurman Iskren Chernev Kalle Valo Kalle Valo diff --git a/MAINTAINERS b/MAINTAINERS index 765ad2daa218..410fd1f199f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18283,7 +18283,7 @@ X: net/wireless/ X: tools/testing/selftests/net/can/ NETWORKING [IOAM] -M: Justin Iurman +M: Justin Iurman S: Maintained F: Documentation/networking/ioam6* F: include/linux/ioam6* From ce5e612dd411de096aa041b9e9325ba1bec5f9f4 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 29 Dec 2025 20:43:10 +0100 Subject: [PATCH 26/70] vsock: Make accept()ed sockets use custom setsockopt() SO_ZEROCOPY handling in vsock_connectible_setsockopt() does not get called on accept()ed sockets due to a missing flag. Flip it. Fixes: e0718bd82e27 ("vsock: enable setting SO_ZEROCOPY") Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20251229-vsock-child-sock-custom-sockopt-v2-1-64778d6c4f88@rbox.co Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index adcba1b7bf74..a3505a4dcee0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1787,6 +1787,10 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, } else { newsock->state = SS_CONNECTED; sock_graft(connected, newsock); + + set_bit(SOCK_CUSTOM_SOCKOPT, + &connected->sk_socket->flags); + if (vsock_msgzerocopy_allow(vconnected->transport)) set_bit(SOCK_SUPPORT_ZC, &connected->sk_socket->flags); From caa20e9e155b9d2afcf658e2909659c0be45ec12 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 29 Dec 2025 20:43:11 +0100 Subject: [PATCH 27/70] vsock/test: Test setting SO_ZEROCOPY on accept()ed socket Make sure setsockopt(SOL_SOCKET, SO_ZEROCOPY) on an accept()ed socket is handled by vsock's implementation. Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20251229-vsock-child-sock-custom-sockopt-v2-2-64778d6c4f88@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 9e1250790f33..bbe3723babdc 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -2192,6 +2192,33 @@ static void test_stream_nolinger_server(const struct test_opts *opts) close(fd); } +static void test_stream_accepted_setsockopt_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_accepted_setsockopt_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -2371,6 +2398,11 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unread_bytes_client, .run_server = test_seqpacket_unread_bytes_server, }, + { + .name = "SOCK_STREAM accept()ed socket custom setsockopt()", + .run_client = test_stream_accepted_setsockopt_client, + .run_server = test_stream_accepted_setsockopt_server, + }, {}, }; From 9892353726ad222219aa18c329e3a3636134dd56 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 1 Jan 2026 08:56:07 -0500 Subject: [PATCH 28/70] net/sched: act_mirred: Fix leak when redirecting to self on egress Whenever a mirred redirect to self on egress happens, mirred allocates a new skb (skb_to_send). The loop to self check was done after that allocation, but was not freeing the newly allocated skb, causing a leak. Fix this by moving the if-statement to before the allocation of the new skb. The issue was found by running the accompanying tdc test in 2/2 with config kmemleak enabled. After a few minutes the kmemleak thread ran and reported the leak coming from mirred. Fixes: 1d856251a009 ("net/sched: act_mirred: fix loop detection") Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260101135608.253079-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_mirred.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 91c96cc625bd..05e0b14b5773 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -266,21 +266,9 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, goto err_cant_do; } - /* we could easily avoid the clone only if called by ingress and clsact; - * since we can't easily detect the clsact caller, skip clone only for - * ingress - that covers the TC S/W datapath. - */ - at_ingress = skb_at_tc_ingress(skb); - dont_clone = skb_at_tc_ingress(skb) && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!dont_clone) { - skb_to_send = skb_clone(skb, GFP_ATOMIC); - if (!skb_to_send) - goto err_cant_do; - } - want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + at_ingress = skb_at_tc_ingress(skb); if (dev == skb->dev && want_ingress == at_ingress) { pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n", netdev_name(skb->dev), @@ -290,6 +278,18 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, goto err_cant_do; } + /* we could easily avoid the clone only if called by ingress and clsact; + * since we can't easily detect the clsact caller, skip clone only for + * ingress - that covers the TC S/W datapath. + */ + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) + goto err_cant_do; + } + /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ From 4bcd49a03b94a5e16717961a7d750d676f044436 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 1 Jan 2026 08:56:08 -0500 Subject: [PATCH 29/70] selftests/tc-testing: Add test case redirecting to self on egress Add single mirred test case that attempts to redirect to self on egress using clsact Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260101135608.253079-3-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/actions/mirred.json | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index da156feabcbf..b056eb966871 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -1098,5 +1098,52 @@ "teardown": [ "$TC qdisc del dev $DUMMY root" ] + }, + { + "id": "4ed9", + "name": "Try to redirect to self on egress with clsact", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] } + ] From 2fa98059fd5a0936d0951bd14f8990ae0aa5272a Mon Sep 17 00:00:00 2001 From: Ankit Khushwaha Date: Thu, 1 Jan 2026 22:58:40 +0530 Subject: [PATCH 30/70] selftests: mptcp: Mark xerror and die_perror __noreturn Compiler reports potential uses of uninitialized variables in mptcp_connect.c when xerror() is called from failure paths. mptcp_connect.c:1262:11: warning: variable 'raw_addr' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] xerror() terminates execution by calling exit(), but it is not visible to the compiler & assumes control flow may continue past the call. Annotate xerror() with __noreturn so the compiler can correctly reason about control flow and avoid false-positive uninitialized variable warnings. Signed-off-by: Ankit Khushwaha Link: https://patch.msgid.link/20260101172840.90186-1-ankitkhushwaha.linux@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/Makefile | 1 + tools/testing/selftests/net/mptcp/mptcp_connect.c | 3 ++- tools/testing/selftests/net/mptcp/mptcp_diag.c | 3 ++- tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 15d144a25d82..4dd6278cd3dd 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -3,6 +3,7 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -I$(top_srcdir)/tools/include TEST_PROGS := \ diag.sh \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 404a77bf366a..10f6f99cfd4e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -33,6 +33,7 @@ #include #include #include +#include extern int optind; @@ -140,7 +141,7 @@ static void die_usage(void) exit(1); } -static void xerror(const char *fmt, ...) +static void __noreturn xerror(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c index e084796e804d..8e0b1b8d84b6 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_diag.c +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,7 @@ enum { #define rta_getattr(type, value) (*(type *)RTA_DATA(value)) -static void die_perror(const char *msg) +static void __noreturn die_perror(const char *msg) { perror(msg); exit(1); diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 8e8f6441ad8b..5716998da192 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -28,6 +28,7 @@ #include #include +#include #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 @@ -40,7 +41,7 @@ static int pf = AF_INET; static int proto_tx = IPPROTO_MPTCP; static int proto_rx = IPPROTO_MPTCP; -static void die_perror(const char *msg) +static void __noreturn die_perror(const char *msg) { perror(msg); exit(1); @@ -52,7 +53,7 @@ static void die_usage(int r) exit(r); } -static void xerror(const char *fmt, ...) +static void __noreturn xerror(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 286164f7246e..b6e58d936ebe 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -25,6 +25,7 @@ #include #include +#include static int pf = AF_INET; @@ -127,7 +128,7 @@ struct so_state { #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif -static void die_perror(const char *msg) +static void __noreturn die_perror(const char *msg) { perror(msg); exit(1); @@ -139,7 +140,7 @@ static void die_usage(int r) exit(r); } -static void xerror(const char *fmt, ...) +static void __noreturn xerror(const char *fmt, ...) { va_list ap; From a7fc8c641cab855824c45e5e8877e40fd528b5df Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 2 Jan 2026 12:29:38 +0100 Subject: [PATCH 31/70] net: airoha: Fix npu rx DMA definitions Fix typos in npu rx DMA descriptor definitions. Fixes: b3ef7bdec66fb ("net: airoha: Add airoha_offload.h header") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260102-airoha-npu-dma-rx-def-fixes-v1-1-205fc6bf7d94@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 4d23cbb7d407..ab64ecdf39a0 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -71,12 +71,12 @@ static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, #define NPU_RX1_DESC_NUM 512 /* CTRL */ -#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) -#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) -#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) +#define NPU_RX_DMA_DESC_LAST_MASK BIT(27) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(26, 14) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(13, 1) #define NPU_RX_DMA_DESC_DONE_MASK BIT(0) /* INFO */ -#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 28) +#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 29) #define NPU_RX_DMA_PKT_ID_MASK GENMASK(28, 26) #define NPU_RX_DMA_SRC_PORT_MASK GENMASK(25, 21) #define NPU_RX_DMA_CRSN_MASK GENMASK(20, 16) From 86c22d475cbca80532da33ed5df73e6814d7a632 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 4 Jan 2026 08:52:32 -0800 Subject: [PATCH 32/70] netlink: specs: netdev: clarify the page pool API a little The phrasing of the page-pool-get doc is very confusing. It's supposed to highlight that support depends on the driver doing its part but it sounds like orphaned page pools won't be visible. The description of the ifindex is completely wrong. We move the page pool to loopback and skip the attribute if ifindex is loopback. Link: https://lore.kernel.org/20260104084347.5de3a537@kernel.org Reviewed-by: Donald Hunter Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20260104165232.710460-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 82bf5cb2617d..596c306ce52b 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -142,7 +142,7 @@ attribute-sets: name: ifindex doc: | ifindex of the netdev to which the pool belongs. - May be reported as 0 if the page pool was allocated for a netdev + May not be reported if the page pool was allocated for a netdev which got destroyed already (page pools may outlast their netdevs because they wait for all memory to be returned). type: u32 @@ -601,7 +601,9 @@ operations: name: page-pool-get doc: | Get / dump information about Page Pools. - (Only Page Pools associated with a net_device can be listed.) + Only Page Pools associated by the driver with a net_device + can be listed. ifindex will not be reported if the net_device + no longer exists. attribute-set: page-pool do: request: From 238e03d0466239410b72294b79494e43d4fabe77 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Sun, 4 Jan 2026 23:31:01 +0200 Subject: [PATCH 33/70] net: fix memory leak in skb_segment_list for GRO packets When skb_segment_list() is called during packet forwarding, it handles packets that were aggregated by the GRO engine. Historically, the segmentation logic in skb_segment_list assumes that individual segments are split from a parent SKB and may need to carry their own socket memory accounting. Accordingly, the code transfers truesize from the parent to the newly created segments. Prior to commit ed4cccef64c1 ("gro: fix ownership transfer"), this truesize subtraction in skb_segment_list() was valid because fragments still carry a reference to the original socket. However, commit ed4cccef64c1 ("gro: fix ownership transfer") changed this behavior by ensuring that fraglist entries are explicitly orphaned (skb->sk = NULL) to prevent illegal orphaning later in the stack. This change meant that the entire socket memory charge remained with the head SKB, but the corresponding accounting logic in skb_segment_list() was never updated. As a result, the current code unconditionally adds each fragment's truesize to delta_truesize and subtracts it from the parent SKB. Since the fragments are no longer charged to the socket, this subtraction results in an effective under-count of memory when the head is freed. This causes sk_wmem_alloc to remain non-zero, preventing socket destruction and leading to a persistent memory leak. The leak can be observed via KMEMLEAK when tearing down the networking environment: unreferenced object 0xffff8881e6eb9100 (size 2048): comm "ping", pid 6720, jiffies 4295492526 backtrace: kmem_cache_alloc_noprof+0x5c6/0x800 sk_prot_alloc+0x5b/0x220 sk_alloc+0x35/0xa00 inet6_create.part.0+0x303/0x10d0 __sock_create+0x248/0x640 __sys_socket+0x11b/0x1d0 Since skb_segment_list() is exclusively used for SKB_GSO_FRAGLIST packets constructed by GRO, the truesize adjustment is removed. The call to skb_release_head_state() must be preserved. As documented in commit cf673ed0e057 ("net: fix fraglist segmentation reference count leak"), it is still required to correctly drop references to SKB extensions that may be overwritten during __copy_skb_header(). Fixes: ed4cccef64c1 ("gro: fix ownership transfer") Signed-off-by: Mohammad Heib Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260104213101.352887-1-mheib@redhat.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a00808f7be6a..a56133902c0d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4636,12 +4636,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, { struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; unsigned int tnl_hlen = skb_tnl_header_len(skb); - unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; int len_diff, err; + /* Only skb_gro_receive_list generated skbs arrive here */ + DEBUG_NET_WARN_ON_ONCE(!(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)); + skb_push(skb, -skb_network_offset(skb) + offset); /* Ensure the head is writeable before touching the shared info */ @@ -4655,8 +4657,9 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, nskb = list_skb; list_skb = list_skb->next; + DEBUG_NET_WARN_ON_ONCE(nskb->sk); + err = 0; - delta_truesize += nskb->truesize; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { @@ -4699,7 +4702,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, goto err_linearize; } - skb->truesize = skb->truesize - delta_truesize; skb->data_len = skb->data_len - delta_len; skb->len = skb->len - delta_len; From 083029bd8b445595222a3cd14076b880781c1765 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 20 Nov 2025 16:12:14 -0800 Subject: [PATCH 34/70] idpf: keep the netdev when a reset fails During a successful reset the driver would re-allocate vport resources while keeping the netdevs intact. However, in case of an error in the init task, the netdev of the failing vport will be unregistered, effectively removing the network interface: [ 121.211076] idpf 0000:83:00.0: enabling device (0100 -> 0102) [ 121.221976] idpf 0000:83:00.0: Device HW Reset initiated [ 124.161229] idpf 0000:83:00.0 ens801f0: renamed from eth0 [ 124.163364] idpf 0000:83:00.0 ens801f0d1: renamed from eth1 [ 125.934656] idpf 0000:83:00.0 ens801f0d2: renamed from eth2 [ 128.218429] idpf 0000:83:00.0 ens801f0d3: renamed from eth3 ip -br a ens801f0 UP ens801f0d1 UP ens801f0d2 UP ens801f0d3 UP echo 1 > /sys/class/net/ens801f0/device/reset [ 145.885537] idpf 0000:83:00.0: resetting [ 145.990280] idpf 0000:83:00.0: reset done [ 146.284766] idpf 0000:83:00.0: HW reset detected [ 146.296610] idpf 0000:83:00.0: Device HW Reset initiated [ 211.556719] idpf 0000:83:00.0: Transaction timed-out (op:526 cookie:7700 vc_op:526 salt:77 timeout:60000ms) [ 272.996705] idpf 0000:83:00.0: Transaction timed-out (op:502 cookie:7800 vc_op:502 salt:78 timeout:60000ms) ip -br a ens801f0d1 DOWN ens801f0d2 DOWN ens801f0d3 DOWN Re-shuffle the logic in the error path of the init task to make sure the netdevs remain intact. This will allow the driver to attempt recovery via subsequent resets, provided the FW is still functional. The main change is to make sure that idpf_decfg_netdev() is not called should the init task fail during a reset. The error handling is consolidated under unwind_vports, as the removed labels had the same cleanup logic split depending on the point of failure. Fixes: ce1b75d0635c ("idpf: add ptypes and MAC filter support") Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 7ce4eb71a433..313803c08847 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1579,6 +1579,10 @@ void idpf_init_task(struct work_struct *work) goto unwind_vports; } + err = idpf_send_get_rx_ptype_msg(vport); + if (err) + goto unwind_vports; + index = vport->idx; vport_config = adapter->vport_config[index]; @@ -1590,15 +1594,11 @@ void idpf_init_task(struct work_struct *work) err = idpf_check_supported_desc_ids(vport); if (err) { dev_err(&pdev->dev, "failed to get required descriptor ids\n"); - goto cfg_netdev_err; + goto unwind_vports; } if (idpf_cfg_netdev(vport)) - goto cfg_netdev_err; - - err = idpf_send_get_rx_ptype_msg(vport); - if (err) - goto handle_err; + goto unwind_vports; /* Once state is put into DOWN, driver is ready for dev_open */ np = netdev_priv(vport->netdev); @@ -1645,11 +1645,6 @@ void idpf_init_task(struct work_struct *work) return; -handle_err: - idpf_decfg_netdev(vport); -cfg_netdev_err: - idpf_vport_rel(vport); - adapter->vports[index] = NULL; unwind_vports: if (default_vport) { for (index = 0; index < adapter->max_vports; index++) { From 2e281e1155fc476c571c0bd2ffbfe28ab829a5c3 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 20 Nov 2025 16:12:15 -0800 Subject: [PATCH 35/70] idpf: detach and close netdevs while handling a reset Protect the reset path from callbacks by setting the netdevs to detached state and close any netdevs in UP state until the reset handling has completed. During a reset, the driver will de-allocate resources for the vport, and there is no guarantee that those will recover, which is why the existing vport_ctrl_lock does not provide sufficient protection. idpf_detach_and_close() is called right before reset handling. If the reset handling succeeds, the netdevs state is recovered via call to idpf_attach_and_open(). If the reset handling fails the netdevs remain down. The detach/down calls are protected with RTNL lock to avoid racing with callbacks. On the recovery side the attach can be done without holding the RTNL lock as there are no callbacks expected at that point, due to detach/close always being done first in that flow. The previous logic restoring the netdevs state based on the IDPF_VPORT_UP_REQUESTED flag in the init task is not needed anymore, hence the removal of idpf_set_vport_state(). The IDPF_VPORT_UP_REQUESTED is still being used to restore the state of the netdevs following the reset, but has no use outside of the reset handling flow. idpf_init_hard_reset() is converted to void, since it was used as such and there is no error handling being done based on its return value. Before this change, invoking hard and soft resets simultaneously will cause the driver to lose the vport state: ip -br a UP echo 1 > /sys/class/net/ens801f0/device/reset& \ ethtool -L ens801f0 combined 8 ip -br a DOWN ip link set up ip -br a DOWN Also in case of a failure in the reset path, the netdev is left exposed to external callbacks, while vport resources are not initialized, leading to a crash on subsequent ifup/down: [408471.398966] idpf 0000:83:00.0: HW reset detected [408471.411744] idpf 0000:83:00.0: Device HW Reset initiated [408472.277901] idpf 0000:83:00.0: The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x2 [408508.125551] BUG: kernel NULL pointer dereference, address: 0000000000000078 [408508.126112] #PF: supervisor read access in kernel mode [408508.126687] #PF: error_code(0x0000) - not-present page [408508.127256] PGD 2aae2f067 P4D 0 [408508.127824] Oops: Oops: 0000 [#1] SMP NOPTI ... [408508.130871] RIP: 0010:idpf_stop+0x39/0x70 [idpf] ... [408508.139193] Call Trace: [408508.139637] [408508.140077] __dev_close_many+0xbb/0x260 [408508.140533] __dev_change_flags+0x1cf/0x280 [408508.140987] netif_change_flags+0x26/0x70 [408508.141434] dev_change_flags+0x3d/0xb0 [408508.141878] devinet_ioctl+0x460/0x890 [408508.142321] inet_ioctl+0x18e/0x1d0 [408508.142762] ? _copy_to_user+0x22/0x70 [408508.143207] sock_do_ioctl+0x3d/0xe0 [408508.143652] sock_ioctl+0x10e/0x330 [408508.144091] ? find_held_lock+0x2b/0x80 [408508.144537] __x64_sys_ioctl+0x96/0xe0 [408508.144979] do_syscall_64+0x79/0x3d0 [408508.145415] entry_SYSCALL_64_after_hwframe+0x76/0x7e [408508.145860] RIP: 0033:0x7f3e0bb4caff Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Signed-off-by: Emil Tantilov Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 121 ++++++++++++--------- 1 file changed, 72 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 313803c08847..a964e0f5891e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -729,6 +729,65 @@ static int idpf_init_mac_addr(struct idpf_vport *vport, return 0; } +static void idpf_detach_and_close(struct idpf_adapter *adapter) +{ + int max_vports = adapter->max_vports; + + for (int i = 0; i < max_vports; i++) { + struct net_device *netdev = adapter->netdevs[i]; + + /* If the interface is in detached state, that means the + * previous reset was not handled successfully for this + * vport. + */ + if (!netif_device_present(netdev)) + continue; + + /* Hold RTNL to protect racing with callbacks */ + rtnl_lock(); + netif_device_detach(netdev); + if (netif_running(netdev)) { + set_bit(IDPF_VPORT_UP_REQUESTED, + adapter->vport_config[i]->flags); + dev_close(netdev); + } + rtnl_unlock(); + } +} + +static void idpf_attach_and_open(struct idpf_adapter *adapter) +{ + int max_vports = adapter->max_vports; + + for (int i = 0; i < max_vports; i++) { + struct idpf_vport *vport = adapter->vports[i]; + struct idpf_vport_config *vport_config; + struct net_device *netdev; + + /* In case of a critical error in the init task, the vport + * will be freed. Only continue to restore the netdevs + * if the vport is allocated. + */ + if (!vport) + continue; + + /* No need for RTNL on attach as this function is called + * following detach and dev_close(). We do take RTNL for + * dev_open() below as it can race with external callbacks + * following the call to netif_device_attach(). + */ + netdev = adapter->netdevs[i]; + netif_device_attach(netdev); + vport_config = adapter->vport_config[vport->idx]; + if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, + vport_config->flags)) { + rtnl_lock(); + dev_open(netdev, NULL); + rtnl_unlock(); + } + } +} + /** * idpf_cfg_netdev - Allocate, configure and register a netdev * @vport: main vport structure @@ -1041,10 +1100,11 @@ static void idpf_vport_dealloc(struct idpf_vport *vport) idpf_idc_deinit_vport_aux_device(vport->vdev_info); idpf_deinit_mac_addr(vport); - idpf_vport_stop(vport, true); - if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) { + idpf_vport_stop(vport, true); idpf_decfg_netdev(vport); + } if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) idpf_del_all_mac_filters(vport); @@ -1544,7 +1604,6 @@ void idpf_init_task(struct work_struct *work) struct idpf_vport_config *vport_config; struct idpf_vport_max_q max_q; struct idpf_adapter *adapter; - struct idpf_netdev_priv *np; struct idpf_vport *vport; u16 num_default_vports; struct pci_dev *pdev; @@ -1600,12 +1659,6 @@ void idpf_init_task(struct work_struct *work) if (idpf_cfg_netdev(vport)) goto unwind_vports; - /* Once state is put into DOWN, driver is ready for dev_open */ - np = netdev_priv(vport->netdev); - clear_bit(IDPF_VPORT_UP, np->state); - if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); - /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created */ @@ -1781,27 +1834,6 @@ static int idpf_check_reset_complete(struct idpf_hw *hw, return -EBUSY; } -/** - * idpf_set_vport_state - Set the vport state to be after the reset - * @adapter: Driver specific private structure - */ -static void idpf_set_vport_state(struct idpf_adapter *adapter) -{ - u16 i; - - for (i = 0; i < adapter->max_vports; i++) { - struct idpf_netdev_priv *np; - - if (!adapter->netdevs[i]) - continue; - - np = netdev_priv(adapter->netdevs[i]); - if (test_bit(IDPF_VPORT_UP, np->state)) - set_bit(IDPF_VPORT_UP_REQUESTED, - adapter->vport_config[i]->flags); - } -} - /** * idpf_init_hard_reset - Initiate a hardware reset * @adapter: Driver specific private structure @@ -1810,28 +1842,17 @@ static void idpf_set_vport_state(struct idpf_adapter *adapter) * reallocate. Also reinitialize the mailbox. Return 0 on success, * negative on failure. */ -static int idpf_init_hard_reset(struct idpf_adapter *adapter) +static void idpf_init_hard_reset(struct idpf_adapter *adapter) { struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops; struct device *dev = &adapter->pdev->dev; - struct net_device *netdev; int err; - u16 i; + idpf_detach_and_close(adapter); mutex_lock(&adapter->vport_ctrl_lock); dev_info(dev, "Device HW Reset initiated\n"); - /* Avoid TX hangs on reset */ - for (i = 0; i < adapter->max_vports; i++) { - netdev = adapter->netdevs[i]; - if (!netdev) - continue; - - netif_carrier_off(netdev); - netif_tx_disable(netdev); - } - /* Prepare for reset */ if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD); @@ -1840,7 +1861,6 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter) idpf_idc_issue_reset_event(adapter->cdev_info); - idpf_set_vport_state(adapter); idpf_vc_core_deinit(adapter); if (!is_reset) reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET); @@ -1887,11 +1907,14 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter) unlock_mutex: mutex_unlock(&adapter->vport_ctrl_lock); - /* Wait until all vports are created to init RDMA CORE AUX */ - if (!err) - err = idpf_idc_init(adapter); - - return err; + /* Attempt to restore netdevs and initialize RDMA CORE AUX device, + * provided vc_core_init succeeded. It is still possible that + * vports are not allocated at this point if the init task failed. + */ + if (!err) { + idpf_attach_and_open(adapter); + idpf_idc_init(adapter); + } } /** From f6242b354605faff263ca45882b148200915a3f6 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 20 Nov 2025 16:12:16 -0800 Subject: [PATCH 36/70] idpf: fix memory leak in idpf_vport_rel() Free vport->rx_ptype_lkup in idpf_vport_rel() to avoid leaking memory during a reset. Reported by kmemleak: unreferenced object 0xff450acac838a000 (size 4096): comm "kworker/u258:5", pid 7732, jiffies 4296830044 hex dump (first 32 bytes): 00 00 00 00 00 10 00 00 00 10 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00 ................ backtrace (crc 3da81902): __kmalloc_cache_noprof+0x469/0x7a0 idpf_send_get_rx_ptype_msg+0x90/0x570 [idpf] idpf_init_task+0x1ec/0x8d0 [idpf] process_one_work+0x226/0x6d0 worker_thread+0x19e/0x340 kthread+0x10f/0x250 ret_from_fork+0x251/0x2b0 ret_from_fork_asm+0x1a/0x30 Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index a964e0f5891e..04af10cfaa8c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1082,6 +1082,8 @@ static void idpf_vport_rel(struct idpf_vport *vport) kfree(adapter->vport_config[idx]->req_qs_chunks); adapter->vport_config[idx]->req_qs_chunks = NULL; } + kfree(vport->rx_ptype_lkup); + vport->rx_ptype_lkup = NULL; kfree(vport); adapter->num_alloc_vports--; } From e111cbc4adf9f9974eed040aeece7e17460f6bff Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 20 Nov 2025 16:12:17 -0800 Subject: [PATCH 37/70] idpf: fix memory leak in idpf_vc_core_deinit() Make sure to free hw->lan_regs. Reported by kmemleak during reset: unreferenced object 0xff1b913d02a936c0 (size 96): comm "kworker/u258:14", pid 2174, jiffies 4294958305 hex dump (first 32 bytes): 00 00 00 c0 a8 ba 2d ff 00 00 00 00 00 00 00 00 ......-......... 00 00 40 08 00 00 00 00 00 00 25 b3 a8 ba 2d ff ..@.......%...-. backtrace (crc 36063c4f): __kmalloc_noprof+0x48f/0x890 idpf_vc_core_init+0x6ce/0x9b0 [idpf] idpf_vc_event_task+0x1fb/0x350 [idpf] process_one_work+0x226/0x6d0 worker_thread+0x19e/0x340 kthread+0x10f/0x250 ret_from_fork+0x251/0x2b0 ret_from_fork_asm+0x1a/0x30 Fixes: 6aa53e861c1a ("idpf: implement get LAN MMIO memory regions") Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Reviewed-by: Joshua Hay Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 5bbe7d9294c1..01bbd12a642a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3570,6 +3570,7 @@ init_failed: */ void idpf_vc_core_deinit(struct idpf_adapter *adapter) { + struct idpf_hw *hw = &adapter->hw; bool remove_in_prog; if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) @@ -3593,6 +3594,9 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter) idpf_vport_params_buf_rel(adapter); + kfree(hw->lan_regs); + hw->lan_regs = NULL; + kfree(adapter->vports); adapter->vports = NULL; From 4d792219fe6f891b5b557a607ac8a0a14eda6e38 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 20 Nov 2025 16:12:18 -0800 Subject: [PATCH 38/70] idpf: fix error handling in the init_task on load If the init_task fails during a driver load, we end up without vports and netdevs, effectively failing the entire process. In that state a subsequent reset will result in a crash as the service task attempts to access uninitialized resources. Following trace is from an error in the init_task where the CREATE_VPORT (op 501) is rejected by the FW: [40922.763136] idpf 0000:83:00.0: Device HW Reset initiated [40924.449797] idpf 0000:83:00.0: Transaction failed (op 501) [40958.148190] idpf 0000:83:00.0: HW reset detected [40958.161202] BUG: kernel NULL pointer dereference, address: 00000000000000a8 ... [40958.168094] Workqueue: idpf-0000:83:00.0-vc_event idpf_vc_event_task [idpf] [40958.168865] RIP: 0010:idpf_vc_event_task+0x9b/0x350 [idpf] ... [40958.177932] Call Trace: [40958.178491] [40958.179040] process_one_work+0x226/0x6d0 [40958.179609] worker_thread+0x19e/0x340 [40958.180158] ? __pfx_worker_thread+0x10/0x10 [40958.180702] kthread+0x10f/0x250 [40958.181238] ? __pfx_kthread+0x10/0x10 [40958.181774] ret_from_fork+0x251/0x2b0 [40958.182307] ? __pfx_kthread+0x10/0x10 [40958.182834] ret_from_fork_asm+0x1a/0x30 [40958.183370] Fix the error handling in the init_task to make sure the service and mailbox tasks are disabled if the error happens during load. These are started in idpf_vc_core_init(), which spawns the init_task and has no way of knowing if it failed. If the error happens on reset, following successful driver load, the tasks can still run, as that will allow the netdevs to attempt recovery through another reset. Stop the PTP callbacks either way as those will be restarted by the call to idpf_vc_core_init() during a successful reset. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Reported-by: Vivek Kumar Signed-off-by: Emil Tantilov Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 04af10cfaa8c..e2ee8b137421 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1690,10 +1690,9 @@ void idpf_init_task(struct work_struct *work) set_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags); } - /* As all the required vports are created, clear the reset flag - * unconditionally here in case we were in reset and the link was down. - */ + /* Clear the reset and load bits as all vports are created */ clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + clear_bit(IDPF_HR_DRV_LOAD, adapter->flags); /* Start the statistics task now */ queue_delayed_work(adapter->stats_wq, &adapter->stats_task, msecs_to_jiffies(10 * (pdev->devfn & 0x07))); @@ -1707,6 +1706,15 @@ unwind_vports: idpf_vport_dealloc(adapter->vports[index]); } } + /* Cleanup after vc_core_init, which has no way of knowing the + * init task failed on driver load. + */ + if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { + cancel_delayed_work_sync(&adapter->serv_task); + cancel_delayed_work_sync(&adapter->mbx_task); + } + idpf_ptp_release(adapter); + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); } @@ -1856,7 +1864,7 @@ static void idpf_init_hard_reset(struct idpf_adapter *adapter) dev_info(dev, "Device HW Reset initiated\n"); /* Prepare for reset */ - if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { + if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD); } else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) { bool is_reset = idpf_is_reset_detected(adapter); From f9841bd28b600526ca4f6713b0ca49bf7bb98452 Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Tue, 30 Sep 2025 16:23:51 -0500 Subject: [PATCH 39/70] idpf: fix memory leak of flow steer list on rmmod The flow steering list maintains entries that are added and removed as ethtool creates and deletes flow steering rules. Module removal with active entries causes memory leak as the list is not properly cleaned up. Prevent this by iterating through the remaining entries in the list and freeing the associated memory during module removal. Add a spinlock (flow_steer_list_lock) to protect the list access from multiple threads. Fixes: ada3e24b84a0 ("idpf: add flow steering support") Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Sreedevi Joshi Reviewed-by: Simon Horman Tested-by: Mina Almasry Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 2 ++ .../net/ethernet/intel/idpf/idpf_ethtool.c | 15 ++++++++-- drivers/net/ethernet/intel/idpf/idpf_lib.c | 28 ++++++++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 8cfc68cbfa06..a61821333f5d 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -558,6 +558,7 @@ struct idpf_vector_lifo { * @max_q: Maximum possible queues * @req_qs_chunks: Queue chunk data for requested queues * @mac_filter_list_lock: Lock to protect mac filters + * @flow_steer_list_lock: Lock to protect fsteer filters * @flags: See enum idpf_vport_config_flags */ struct idpf_vport_config { @@ -565,6 +566,7 @@ struct idpf_vport_config { struct idpf_vport_max_q max_q; struct virtchnl2_add_queues *req_qs_chunks; spinlock_t mac_filter_list_lock; + spinlock_t flow_steer_list_lock; DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS); }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 2589e124e41c..00481fec8179 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -37,6 +37,7 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, { struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_user_config_data *user_config; + struct idpf_vport_config *vport_config; struct idpf_fsteer_fltr *f; struct idpf_vport *vport; unsigned int cnt = 0; @@ -44,7 +45,8 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - user_config = &np->adapter->vport_config[np->vport_idx]->user_config; + vport_config = np->adapter->vport_config[np->vport_idx]; + user_config = &vport_config->user_config; switch (cmd->cmd) { case ETHTOOL_GRXCLSRLCNT: @@ -53,15 +55,18 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, break; case ETHTOOL_GRXCLSRULE: err = -EINVAL; + spin_lock_bh(&vport_config->flow_steer_list_lock); list_for_each_entry(f, &user_config->flow_steer_list, list) if (f->loc == cmd->fs.location) { cmd->fs.ring_cookie = f->q_index; err = 0; break; } + spin_unlock_bh(&vport_config->flow_steer_list_lock); break; case ETHTOOL_GRXCLSRLALL: cmd->data = idpf_fsteer_max_rules(vport); + spin_lock_bh(&vport_config->flow_steer_list_lock); list_for_each_entry(f, &user_config->flow_steer_list, list) { if (cnt == cmd->rule_cnt) { err = -EMSGSIZE; @@ -72,6 +77,7 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, } if (!err) cmd->rule_cnt = user_config->num_fsteer_fltrs; + spin_unlock_bh(&vport_config->flow_steer_list_lock); break; default: break; @@ -240,6 +246,7 @@ static int idpf_add_flow_steer(struct net_device *netdev, fltr->loc = fsp->location; fltr->q_index = q_index; + spin_lock_bh(&vport_config->flow_steer_list_lock); list_for_each_entry(f, &user_config->flow_steer_list, list) { if (f->loc >= fltr->loc) break; @@ -250,6 +257,7 @@ static int idpf_add_flow_steer(struct net_device *netdev, list_add(&fltr->list, &user_config->flow_steer_list); user_config->num_fsteer_fltrs++; + spin_unlock_bh(&vport_config->flow_steer_list_lock); out: kfree(rule); @@ -302,17 +310,20 @@ static int idpf_del_flow_steer(struct net_device *netdev, goto out; } + spin_lock_bh(&vport_config->flow_steer_list_lock); list_for_each_entry_safe(f, iter, &user_config->flow_steer_list, list) { if (f->loc == fsp->location) { list_del(&f->list); kfree(f); user_config->num_fsteer_fltrs--; - goto out; + goto out_unlock; } } err = -EINVAL; +out_unlock: + spin_unlock_bh(&vport_config->flow_steer_list_lock); out: kfree(rule); return err; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index e2ee8b137421..d56366e676cf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -442,6 +442,29 @@ send_dealloc_vecs: return err; } +/** + * idpf_del_all_flow_steer_filters - Delete all flow steer filters in list + * @vport: main vport struct + * + * Takes flow_steer_list_lock spinlock. Deletes all filters + */ +static void idpf_del_all_flow_steer_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_fsteer_fltr *f, *ftmp; + + vport_config = vport->adapter->vport_config[vport->idx]; + + spin_lock_bh(&vport_config->flow_steer_list_lock); + list_for_each_entry_safe(f, ftmp, &vport_config->user_config.flow_steer_list, + list) { + list_del(&f->list); + kfree(f); + } + vport_config->user_config.num_fsteer_fltrs = 0; + spin_unlock_bh(&vport_config->flow_steer_list_lock); +} + /** * idpf_find_mac_filter - Search filter list for specific mac filter * @vconfig: Vport config structure @@ -1107,8 +1130,10 @@ static void idpf_vport_dealloc(struct idpf_vport *vport) idpf_vport_stop(vport, true); idpf_decfg_netdev(vport); } - if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) { idpf_del_all_mac_filters(vport); + idpf_del_all_flow_steer_filters(vport); + } if (adapter->netdevs[i]) { struct idpf_netdev_priv *np = netdev_priv(adapter->netdevs[i]); @@ -1648,6 +1673,7 @@ void idpf_init_task(struct work_struct *work) vport_config = adapter->vport_config[index]; spin_lock_init(&vport_config->mac_filter_list_lock); + spin_lock_init(&vport_config->flow_steer_list_lock); INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); INIT_LIST_HEAD(&vport_config->user_config.flow_steer_list); From 36aae2ea6bd76b8246caa50e34a4f4824f0a3be8 Mon Sep 17 00:00:00 2001 From: Erik Gabriel Carrillo Date: Tue, 30 Sep 2025 16:23:52 -0500 Subject: [PATCH 40/70] idpf: fix issue with ethtool -n command display When ethtool -n is executed on an interface to display the flow steering rules, "rxclass: Unknown flow type" error is generated. The flow steering list maintained in the driver currently stores only the location and q_index but other fields of the ethtool_rx_flow_spec are not stored. This may be enough for the virtchnl command to delete the entry. However, when the ethtool -n command is used to query the flow steering rules, the ethtool_rx_flow_spec returned is not complete causing the error below. Resolve this by storing the flow spec (fsp) when rules are added and returning the complete flow spec when rules are queried. Also, change the return value from EINVAL to ENOENT when flow steering entry is not found during query by location or when deleting an entry. Add logic to detect and reject duplicate filter entries at the same location and change logic to perform upfront validation of all error conditions before adding flow rules through virtchnl. This avoids the need for additional virtchnl delete messages when subsequent operations fail, which was missing in the original upstream code. Example: Before the fix: ethtool -n eth1 2 RX rings available Total 2 rules rxclass: Unknown flow type rxclass: Unknown flow type After the fix: ethtool -n eth1 2 RX rings available Total 2 rules Filter: 0 Rule Type: TCP over IPv4 Src IP addr: 10.0.0.1 mask: 0.0.0.0 Dest IP addr: 0.0.0.0 mask: 255.255.255.255 TOS: 0x0 mask: 0xff Src port: 0 mask: 0xffff Dest port: 0 mask: 0xffff Action: Direct to queue 0 Filter: 1 Rule Type: UDP over IPv4 Src IP addr: 10.0.0.1 mask: 0.0.0.0 Dest IP addr: 0.0.0.0 mask: 255.255.255.255 TOS: 0x0 mask: 0xff Src port: 0 mask: 0xffff Dest port: 0 mask: 0xffff Action: Direct to queue 0 Fixes: ada3e24b84a0 ("idpf: add flow steering support") Signed-off-by: Erik Gabriel Carrillo Co-developed-by: Sreedevi Joshi Signed-off-by: Sreedevi Joshi Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Mina Almasry Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 3 +- .../net/ethernet/intel/idpf/idpf_ethtool.c | 59 ++++++++++++------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index a61821333f5d..dab36c0c3cdc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -284,8 +284,7 @@ struct idpf_port_stats { struct idpf_fsteer_fltr { struct list_head list; - u32 loc; - u32 q_index; + struct ethtool_rx_flow_spec fs; }; /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 00481fec8179..7000f6283a33 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -54,11 +54,15 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, cmd->data = idpf_fsteer_max_rules(vport); break; case ETHTOOL_GRXCLSRULE: - err = -EINVAL; + err = -ENOENT; spin_lock_bh(&vport_config->flow_steer_list_lock); list_for_each_entry(f, &user_config->flow_steer_list, list) - if (f->loc == cmd->fs.location) { - cmd->fs.ring_cookie = f->q_index; + if (f->fs.location == cmd->fs.location) { + /* Avoid infoleak from padding: zero first, + * then assign fields + */ + memset(&cmd->fs, 0, sizeof(cmd->fs)); + cmd->fs = f->fs; err = 0; break; } @@ -72,7 +76,7 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, err = -EMSGSIZE; break; } - rule_locs[cnt] = f->loc; + rule_locs[cnt] = f->fs.location; cnt++; } if (!err) @@ -174,7 +178,7 @@ static int idpf_add_flow_steer(struct net_device *netdev, struct idpf_vport *vport; u32 flow_type, q_index; u16 num_rxq; - int err; + int err = 0; vport = idpf_netdev_to_vport(netdev); vport_config = vport->adapter->vport_config[np->vport_idx]; @@ -200,6 +204,29 @@ static int idpf_add_flow_steer(struct net_device *netdev, if (!rule) return -ENOMEM; + fltr = kzalloc(sizeof(*fltr), GFP_KERNEL); + if (!fltr) { + err = -ENOMEM; + goto out_free_rule; + } + + /* detect duplicate entry and reject before adding rules */ + spin_lock_bh(&vport_config->flow_steer_list_lock); + list_for_each_entry(f, &user_config->flow_steer_list, list) { + if (f->fs.location == fsp->location) { + err = -EEXIST; + break; + } + + if (f->fs.location > fsp->location) + break; + parent = f; + } + spin_unlock_bh(&vport_config->flow_steer_list_lock); + + if (err) + goto out; + rule->vport_id = cpu_to_le32(vport->vport_id); rule->count = cpu_to_le32(1); info = &rule->rule_info[0]; @@ -238,28 +265,20 @@ static int idpf_add_flow_steer(struct net_device *netdev, goto out; } - fltr = kzalloc(sizeof(*fltr), GFP_KERNEL); - if (!fltr) { - err = -ENOMEM; - goto out; - } + /* Save a copy of the user's flow spec so ethtool can later retrieve it */ + fltr->fs = *fsp; - fltr->loc = fsp->location; - fltr->q_index = q_index; spin_lock_bh(&vport_config->flow_steer_list_lock); - list_for_each_entry(f, &user_config->flow_steer_list, list) { - if (f->loc >= fltr->loc) - break; - parent = f; - } - parent ? list_add(&fltr->list, &parent->list) : list_add(&fltr->list, &user_config->flow_steer_list); user_config->num_fsteer_fltrs++; spin_unlock_bh(&vport_config->flow_steer_list_lock); + goto out_free_rule; out: + kfree(fltr); +out_free_rule: kfree(rule); return err; } @@ -313,14 +332,14 @@ static int idpf_del_flow_steer(struct net_device *netdev, spin_lock_bh(&vport_config->flow_steer_list_lock); list_for_each_entry_safe(f, iter, &user_config->flow_steer_list, list) { - if (f->loc == fsp->location) { + if (f->fs.location == fsp->location) { list_del(&f->list); kfree(f); user_config->num_fsteer_fltrs--; goto out_unlock; } } - err = -EINVAL; + err = -ENOENT; out_unlock: spin_unlock_bh(&vport_config->flow_steer_list_lock); From 83f38f210b85676f40ba8586b5a8edae19b56995 Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Mon, 24 Nov 2025 12:47:48 -0600 Subject: [PATCH 41/70] idpf: Fix RSS LUT NULL pointer crash on early ethtool operations The RSS LUT is not initialized until the interface comes up, causing the following NULL pointer crash when ethtool operations like rxhash on/off are performed before the interface is brought up for the first time. Move RSS LUT initialization from ndo_open to vport creation to ensure LUT is always available. This enables RSS configuration via ethtool before bringing the interface up. Simplify LUT management by maintaining all changes in the driver's soft copy and programming zeros to the indirection table when rxhash is disabled. Defer HW programming until the interface comes up if it is down during rxhash and LUT configuration changes. Steps to reproduce: ** Load idpf driver; interfaces will be created modprobe idpf ** Before bringing the interfaces up, turn rxhash off ethtool -K eth2 rxhash off [89408.371875] BUG: kernel NULL pointer dereference, address: 0000000000000000 [89408.371908] #PF: supervisor read access in kernel mode [89408.371924] #PF: error_code(0x0000) - not-present page [89408.371940] PGD 0 P4D 0 [89408.371953] Oops: Oops: 0000 [#1] SMP NOPTI [89408.372052] RIP: 0010:memcpy_orig+0x16/0x130 [89408.372310] Call Trace: [89408.372317] [89408.372326] ? idpf_set_features+0xfc/0x180 [idpf] [89408.372363] __netdev_update_features+0x295/0xde0 [89408.372384] ethnl_set_features+0x15e/0x460 [89408.372406] genl_family_rcv_msg_doit+0x11f/0x180 [89408.372429] genl_rcv_msg+0x1ad/0x2b0 [89408.372446] ? __pfx_ethnl_set_features+0x10/0x10 [89408.372465] ? __pfx_genl_rcv_msg+0x10/0x10 [89408.372482] netlink_rcv_skb+0x58/0x100 [89408.372502] genl_rcv+0x2c/0x50 [89408.372516] netlink_unicast+0x289/0x3e0 [89408.372533] netlink_sendmsg+0x215/0x440 [89408.372551] __sys_sendto+0x234/0x240 [89408.372571] __x64_sys_sendto+0x28/0x30 [89408.372585] x64_sys_call+0x1909/0x1da0 [89408.372604] do_syscall_64+0x7a/0xfa0 [89408.373140] ? clear_bhb_loop+0x60/0xb0 [89408.373647] entry_SYSCALL_64_after_hwframe+0x76/0x7e [89408.378887] Fixes: a251eee62133 ("idpf: add SRIOV support and other ndo_ops") Signed-off-by: Sreedevi Joshi Reviewed-by: Sridhar Samudrala Reviewed-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 2 - drivers/net/ethernet/intel/idpf/idpf_lib.c | 94 +++++++++---------- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 36 +++---- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 4 +- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 9 +- 5 files changed, 66 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index dab36c0c3cdc..1bf7934d4e28 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -423,14 +423,12 @@ enum idpf_user_flags { * @rss_key: RSS hash key * @rss_lut_size: Size of RSS lookup table * @rss_lut: RSS lookup table - * @cached_lut: Used to restore previously init RSS lut */ struct idpf_rss_data { u16 rss_key_size; u8 *rss_key; u16 rss_lut_size; u32 *rss_lut; - u32 *cached_lut; }; /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index d56366e676cf..51716e5a84ef 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1073,7 +1073,7 @@ static void idpf_vport_rel(struct idpf_vport *vport) u16 idx = vport->idx; vport_config = adapter->vport_config[vport->idx]; - idpf_deinit_rss(vport); + idpf_deinit_rss_lut(vport); rss_data = &vport_config->user_config.rss_data; kfree(rss_data->rss_key); rss_data->rss_key = NULL; @@ -1226,6 +1226,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, u16 idx = adapter->next_vport; struct idpf_vport *vport; u16 num_max_q; + int err; if (idx == IDPF_NO_FREE_SLOT) return NULL; @@ -1276,10 +1277,11 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, idpf_vport_init(vport, max_q); - /* This alloc is done separate from the LUT because it's not strictly - * dependent on how many queues we have. If we change number of queues - * and soft reset we'll need a new LUT but the key can remain the same - * for as long as the vport exists. + /* LUT and key are both initialized here. Key is not strictly dependent + * on how many queues we have. If we change number of queues and soft + * reset is initiated, LUT will be freed and a new LUT will be allocated + * as per the updated number of queues during vport bringup. However, + * the key remains the same for as long as the vport exists. */ rss_data = &adapter->vport_config[idx]->user_config.rss_data; rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); @@ -1289,6 +1291,11 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, /* Initialize default rss key */ netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); + /* Initialize default rss LUT */ + err = idpf_init_rss_lut(vport); + if (err) + goto free_rss_key; + /* fill vport slot in the adapter struct */ adapter->vports[idx] = vport; adapter->vport_ids[idx] = idpf_get_vport_id(vport); @@ -1299,6 +1306,8 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, return vport; +free_rss_key: + kfree(rss_data->rss_key); free_vector_idxs: kfree(vport->q_vector_idxs); free_vport: @@ -1476,6 +1485,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; struct idpf_vport_config *vport_config; + struct idpf_rss_data *rss_data; int err; if (test_bit(IDPF_VPORT_UP, np->state)) @@ -1570,12 +1580,21 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) idpf_restore_features(vport); vport_config = adapter->vport_config[vport->idx]; - if (vport_config->user_config.rss_data.rss_lut) - err = idpf_config_rss(vport); - else - err = idpf_init_rss(vport); + rss_data = &vport_config->user_config.rss_data; + + if (!rss_data->rss_lut) { + err = idpf_init_rss_lut(vport); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to initialize RSS LUT for vport %u: %d\n", + vport->vport_id, err); + goto disable_vport; + } + } + + err = idpf_config_rss(vport); if (err) { - dev_err(&adapter->pdev->dev, "Failed to initialize RSS for vport %u: %d\n", + dev_err(&adapter->pdev->dev, "Failed to configure RSS for vport %u: %d\n", vport->vport_id, err); goto disable_vport; } @@ -1584,7 +1603,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) if (err) { dev_err(&adapter->pdev->dev, "Failed to complete interface up for vport %u: %d\n", vport->vport_id, err); - goto deinit_rss; + goto disable_vport; } if (rtnl) @@ -1592,8 +1611,6 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) return 0; -deinit_rss: - idpf_deinit_rss(vport); disable_vport: idpf_send_disable_vport_msg(vport); disable_queues: @@ -2051,7 +2068,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, idpf_vport_stop(vport, false); } - idpf_deinit_rss(vport); + idpf_deinit_rss_lut(vport); /* We're passing in vport here because we need its wait_queue * to send a message and it should be getting all the vport * config data out of the adapter but we need to be careful not @@ -2219,40 +2236,6 @@ static void idpf_set_rx_mode(struct net_device *netdev) dev_err(dev, "Failed to set promiscuous mode: %d\n", err); } -/** - * idpf_vport_manage_rss_lut - disable/enable RSS - * @vport: the vport being changed - * - * In the event of disable request for RSS, this function will zero out RSS - * LUT, while in the event of enable request for RSS, it will reconfigure RSS - * LUT with the default LUT configuration. - */ -static int idpf_vport_manage_rss_lut(struct idpf_vport *vport) -{ - bool ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH); - struct idpf_rss_data *rss_data; - u16 idx = vport->idx; - int lut_size; - - rss_data = &vport->adapter->vport_config[idx]->user_config.rss_data; - lut_size = rss_data->rss_lut_size * sizeof(u32); - - if (ena) { - /* This will contain the default or user configured LUT */ - memcpy(rss_data->rss_lut, rss_data->cached_lut, lut_size); - } else { - /* Save a copy of the current LUT to be restored later if - * requested. - */ - memcpy(rss_data->cached_lut, rss_data->rss_lut, lut_size); - - /* Zero out the current LUT to disable */ - memset(rss_data->rss_lut, 0, lut_size); - } - - return idpf_config_rss(vport); -} - /** * idpf_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted @@ -2278,10 +2261,19 @@ static int idpf_set_features(struct net_device *netdev, } if (changed & NETIF_F_RXHASH) { + struct idpf_netdev_priv *np = netdev_priv(netdev); + netdev->features ^= NETIF_F_RXHASH; - err = idpf_vport_manage_rss_lut(vport); - if (err) - goto unlock_mutex; + + /* If the interface is not up when changing the rxhash, update + * to the HW is skipped. The updated LUT will be committed to + * the HW when the interface is brought up. + */ + if (test_bit(IDPF_VPORT_UP, np->state)) { + err = idpf_config_rss(vport); + if (err) + goto unlock_mutex; + } } if (changed & NETIF_F_GRO_HW) { diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 1d91c56f7469..8991a891a440 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4650,57 +4650,47 @@ static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport) rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; - for (i = 0; i < rss_data->rss_lut_size; i++) { + for (i = 0; i < rss_data->rss_lut_size; i++) rss_data->rss_lut[i] = i % num_active_rxq; - rss_data->cached_lut[i] = rss_data->rss_lut[i]; - } } /** - * idpf_init_rss - Allocate and initialize RSS resources + * idpf_init_rss_lut - Allocate and initialize RSS LUT * @vport: virtual port * - * Return 0 on success, negative on failure + * Return: 0 on success, negative on failure */ -int idpf_init_rss(struct idpf_vport *vport) +int idpf_init_rss_lut(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; struct idpf_rss_data *rss_data; - u32 lut_size; rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + if (!rss_data->rss_lut) { + u32 lut_size; - lut_size = rss_data->rss_lut_size * sizeof(u32); - rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL); - if (!rss_data->rss_lut) - return -ENOMEM; - - rss_data->cached_lut = kzalloc(lut_size, GFP_KERNEL); - if (!rss_data->cached_lut) { - kfree(rss_data->rss_lut); - rss_data->rss_lut = NULL; - - return -ENOMEM; + lut_size = rss_data->rss_lut_size * sizeof(u32); + rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL); + if (!rss_data->rss_lut) + return -ENOMEM; } /* Fill the default RSS lut values */ idpf_fill_dflt_rss_lut(vport); - return idpf_config_rss(vport); + return 0; } /** - * idpf_deinit_rss - Release RSS resources + * idpf_deinit_rss_lut - Release RSS LUT * @vport: virtual port */ -void idpf_deinit_rss(struct idpf_vport *vport) +void idpf_deinit_rss_lut(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; struct idpf_rss_data *rss_data; rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; - kfree(rss_data->cached_lut); - rss_data->cached_lut = NULL; kfree(rss_data->rss_lut); rss_data->rss_lut = NULL; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 75b977094741..7d20593bd877 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1086,8 +1086,8 @@ void idpf_vport_intr_deinit(struct idpf_vport *vport); int idpf_vport_intr_init(struct idpf_vport *vport); void idpf_vport_intr_ena(struct idpf_vport *vport); int idpf_config_rss(struct idpf_vport *vport); -int idpf_init_rss(struct idpf_vport *vport); -void idpf_deinit_rss(struct idpf_vport *vport); +int idpf_init_rss_lut(struct idpf_vport *vport); +void idpf_deinit_rss_lut(struct idpf_vport *vport); int idpf_rx_bufs_init_all(struct idpf_vport *vport); struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 01bbd12a642a..cb702eac86c8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -2804,6 +2804,10 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport) * @vport: virtual port data structure * @get: flag to set or get rss look up table * + * When rxhash is disabled, RSS LUT will be configured with zeros. If rxhash + * is enabled, the LUT values stored in driver's soft copy will be used to setup + * the HW. + * * Returns 0 on success, negative on failure. */ int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get) @@ -2814,10 +2818,12 @@ int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get) struct idpf_rss_data *rss_data; int buf_size, lut_buf_size; ssize_t reply_sz; + bool rxhash_ena; int i; rss_data = &vport->adapter->vport_config[vport->idx]->user_config.rss_data; + rxhash_ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH); buf_size = struct_size(rl, lut, rss_data->rss_lut_size); rl = kzalloc(buf_size, GFP_KERNEL); if (!rl) @@ -2839,7 +2845,8 @@ int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get) } else { rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size); for (i = 0; i < rss_data->rss_lut_size; i++) - rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]); + rl->lut[i] = rxhash_ena ? + cpu_to_le32(rss_data->rss_lut[i]) : 0; xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_LUT; } From 445b49d13787da2fe8d51891ee196e5077feef44 Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Mon, 24 Nov 2025 12:47:49 -0600 Subject: [PATCH 42/70] idpf: Fix RSS LUT configuration on down interfaces RSS LUT provisioning and queries on a down interface currently return silently without effect. Users should be able to configure RSS settings even when the interface is down. Fix by maintaining RSS configuration changes in the driver's soft copy and deferring HW programming until the interface comes up. Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Signed-off-by: Sreedevi Joshi Reviewed-by: Aleksandr Loktionov Reviewed-by: Sridhar Samudrala Reviewed-by: Emil Tantilov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_ethtool.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 7000f6283a33..2efa3c08aba5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -411,7 +411,10 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) * @netdev: network interface device structure * @rxfh: pointer to param struct (indir, key, hfunc) * - * Reads the indirection table directly from the hardware. Always returns 0. + * RSS LUT and Key information are read from driver's cached + * copy. When rxhash is off, rss lut will be displayed as zeros. + * + * Return: 0 on success, -errno otherwise. */ static int idpf_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) @@ -419,10 +422,13 @@ static int idpf_get_rxfh(struct net_device *netdev, struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_rss_data *rss_data; struct idpf_adapter *adapter; + struct idpf_vport *vport; + bool rxhash_ena; int err = 0; u16 i; idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); adapter = np->adapter; @@ -432,9 +438,8 @@ static int idpf_get_rxfh(struct net_device *netdev, } rss_data = &adapter->vport_config[np->vport_idx]->user_config.rss_data; - if (!test_bit(IDPF_VPORT_UP, np->state)) - goto unlock_mutex; + rxhash_ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH); rxfh->hfunc = ETH_RSS_HASH_TOP; if (rxfh->key) @@ -442,7 +447,7 @@ static int idpf_get_rxfh(struct net_device *netdev, if (rxfh->indir) { for (i = 0; i < rss_data->rss_lut_size; i++) - rxfh->indir[i] = rss_data->rss_lut[i]; + rxfh->indir[i] = rxhash_ena ? rss_data->rss_lut[i] : 0; } unlock_mutex: @@ -482,8 +487,6 @@ static int idpf_set_rxfh(struct net_device *netdev, } rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; - if (!test_bit(IDPF_VPORT_UP, np->state)) - goto unlock_mutex; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && rxfh->hfunc != ETH_RSS_HASH_TOP) { @@ -499,7 +502,8 @@ static int idpf_set_rxfh(struct net_device *netdev, rss_data->rss_lut[lut] = rxfh->indir[lut]; } - err = idpf_config_rss(vport); + if (test_bit(IDPF_VPORT_UP, np->state)) + err = idpf_config_rss(vport); unlock_mutex: idpf_vport_ctrl_unlock(netdev); From ebecca5b093895da801b3eba1a55b4ec4027d196 Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Mon, 24 Nov 2025 12:47:50 -0600 Subject: [PATCH 43/70] idpf: Fix RSS LUT NULL ptr issue after soft reset During soft reset, the RSS LUT is freed and not restored unless the interface is up. If an ethtool command that accesses the rss lut is attempted immediately after reset, it will result in NULL ptr dereference. Also, there is no need to reset the rss lut if the soft reset does not involve queue count change. After soft reset, set the RSS LUT to default values based on the updated queue count only if the reset was a result of a queue count change and the LUT was not configured by the user. In all other cases, don't touch the LUT. Steps to reproduce: ** Bring the interface down (if up) ifconfig eth1 down ** update the queue count (eg., 27->20) ethtool -L eth1 combined 20 ** display the RSS LUT ethtool -x eth1 [82375.558338] BUG: kernel NULL pointer dereference, address: 0000000000000000 [82375.558373] #PF: supervisor read access in kernel mode [82375.558391] #PF: error_code(0x0000) - not-present page [82375.558408] PGD 0 P4D 0 [82375.558421] Oops: Oops: 0000 [#1] SMP NOPTI [82375.558516] RIP: 0010:idpf_get_rxfh+0x108/0x150 [idpf] [82375.558786] Call Trace: [82375.558793] [82375.558804] rss_prepare.isra.0+0x187/0x2a0 [82375.558827] rss_prepare_data+0x3a/0x50 [82375.558845] ethnl_default_doit+0x13d/0x3e0 [82375.558863] genl_family_rcv_msg_doit+0x11f/0x180 [82375.558886] genl_rcv_msg+0x1ad/0x2b0 [82375.558902] ? __pfx_ethnl_default_doit+0x10/0x10 [82375.558920] ? __pfx_genl_rcv_msg+0x10/0x10 [82375.558937] netlink_rcv_skb+0x58/0x100 [82375.558957] genl_rcv+0x2c/0x50 [82375.558971] netlink_unicast+0x289/0x3e0 [82375.558988] netlink_sendmsg+0x215/0x440 [82375.559005] __sys_sendto+0x234/0x240 [82375.559555] __x64_sys_sendto+0x28/0x30 [82375.560068] x64_sys_call+0x1909/0x1da0 [82375.560576] do_syscall_64+0x7a/0xfa0 [82375.561076] ? clear_bhb_loop+0x60/0xb0 [82375.561567] entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Signed-off-by: Sreedevi Joshi Reviewed-by: Aleksandr Loktionov Reviewed-by: Sridhar Samudrala Reviewed-by: Emil Tantilov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 20 ++++---------------- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 2 +- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 1 + 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 51716e5a84ef..003bab3ce5ae 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1484,8 +1484,6 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; - struct idpf_vport_config *vport_config; - struct idpf_rss_data *rss_data; int err; if (test_bit(IDPF_VPORT_UP, np->state)) @@ -1579,19 +1577,6 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) idpf_restore_features(vport); - vport_config = adapter->vport_config[vport->idx]; - rss_data = &vport_config->user_config.rss_data; - - if (!rss_data->rss_lut) { - err = idpf_init_rss_lut(vport); - if (err) { - dev_err(&adapter->pdev->dev, - "Failed to initialize RSS LUT for vport %u: %d\n", - vport->vport_id, err); - goto disable_vport; - } - } - err = idpf_config_rss(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to configure RSS for vport %u: %d\n", @@ -2068,7 +2053,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, idpf_vport_stop(vport, false); } - idpf_deinit_rss_lut(vport); /* We're passing in vport here because we need its wait_queue * to send a message and it should be getting all the vport * config data out of the adapter but we need to be careful not @@ -2094,6 +2078,10 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, if (err) goto err_open; + if (reset_cause == IDPF_SR_Q_CHANGE && + !netif_is_rxfh_configured(vport->netdev)) + idpf_fill_dflt_rss_lut(vport); + if (vport_is_up) err = idpf_vport_open(vport, false); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 8991a891a440..f51d52297e1e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4641,7 +4641,7 @@ int idpf_config_rss(struct idpf_vport *vport) * idpf_fill_dflt_rss_lut - Fill the indirection table with the default values * @vport: virtual port structure */ -static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport) +void idpf_fill_dflt_rss_lut(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; u16 num_active_rxq = vport->num_rxq; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 7d20593bd877..0472698ca192 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1085,6 +1085,7 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector); void idpf_vport_intr_deinit(struct idpf_vport *vport); int idpf_vport_intr_init(struct idpf_vport *vport); void idpf_vport_intr_ena(struct idpf_vport *vport); +void idpf_fill_dflt_rss_lut(struct idpf_vport *vport); int idpf_config_rss(struct idpf_vport *vport); int idpf_init_rss_lut(struct idpf_vport *vport); void idpf_deinit_rss_lut(struct idpf_vport *vport); From 87b8ee64685bc096a087af833d4594b2332bfdb1 Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Tue, 2 Dec 2025 17:12:46 -0600 Subject: [PATCH 44/70] idpf: Fix error handling in idpf_vport_open() Fix error handling to properly cleanup interrupts when idpf_vport_queue_ids_init() or idpf_rx_bufs_init_all() fail. Jump to 'intr_deinit' instead of 'queues_rel' to ensure interrupts are cleaned up before releasing other resources. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Signed-off-by: Sreedevi Joshi Reviewed-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 003bab3ce5ae..131a8121839b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1524,14 +1524,14 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + goto intr_deinit; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + goto intr_deinit; } idpf_rx_init_buf_tail(vport); From 086efe0a1ecc36cffe46640ce12649a4cd3ff171 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Mon, 3 Nov 2025 13:20:36 -0800 Subject: [PATCH 45/70] idpf: cap maximum Rx buffer size The HW only supports a maximum Rx buffer size of 16K-128. On systems using large pages, the libeth logic can configure the buffer size to be larger than this. The upper bound is PAGE_SIZE while the lower bound is MTU rounded up to the nearest power of 2. For example, ARM systems with a 64K page size and an mtu of 9000 will set the Rx buffer size to 16K, which will cause the config Rx queues message to fail. Initialize the bufq/fill queue buf_len field to the maximum supported size. This will trigger the libeth logic to cap the maximum Rx buffer size by reducing the upper bound. Fixes: 74d1412ac8f37 ("idpf: use libeth Rx buffer management for payload buffer") Signed-off-by: Joshua Hay Acked-by: Alexander Lobakin Reviewed-by: Madhu Chittim Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Reviewed-by: David Decotigny Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 8 +++++--- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index f51d52297e1e..7f3933ca9edc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -695,9 +695,10 @@ err: static int idpf_rx_bufs_init_singleq(struct idpf_rx_queue *rxq) { struct libeth_fq fq = { - .count = rxq->desc_count, - .type = LIBETH_FQE_MTU, - .nid = idpf_q_vector_to_mem(rxq->q_vector), + .count = rxq->desc_count, + .type = LIBETH_FQE_MTU, + .buf_len = IDPF_RX_MAX_BUF_SZ, + .nid = idpf_q_vector_to_mem(rxq->q_vector), }; int ret; @@ -754,6 +755,7 @@ static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq, .truesize = bufq->truesize, .count = bufq->desc_count, .type = type, + .buf_len = IDPF_RX_MAX_BUF_SZ, .hsplit = idpf_queue_has(HSPLIT_EN, bufq), .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 0472698ca192..423cc9486dce 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -101,6 +101,7 @@ do { \ idx = 0; \ } while (0) +#define IDPF_RX_MAX_BUF_SZ (16384 - 128) #define IDPF_RX_BUF_STRIDE 32 #define IDPF_RX_BUF_POST_STRIDE 16 #define IDPF_LOW_WATERMARK 64 From 4648fb2f2e7210c53b85220ee07d42d1e4bae3f9 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 17 Nov 2025 08:03:49 +0100 Subject: [PATCH 46/70] idpf: fix aux device unplugging when rdma is not supported by vport If vport flags do not contain VIRTCHNL2_VPORT_ENABLE_RDMA, driver does not allocate vdev_info for this vport. This leads to kernel NULL pointer dereference in idpf_idc_vport_dev_down(), which references vdev_info for every vport regardless. Check, if vdev_info was ever allocated before unplugging aux device. Fixes: be91128c579c ("idpf: implement RDMA vport auxiliary dev create, init, and destroy") Reviewed-by: Madhu Chittim Signed-off-by: Larysa Zaremba Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_idc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 7e20a07e98e5..6dad0593f7f2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -322,7 +322,7 @@ static void idpf_idc_vport_dev_down(struct idpf_adapter *adapter) for (i = 0; i < adapter->num_alloc_vports; i++) { struct idpf_vport *vport = adapter->vports[i]; - if (!vport) + if (!vport || !vport->vdev_info) continue; idpf_unplug_aux_dev(vport->vdev_info->adev); From 44694ffaa4e27dbda1120abfafa00732f2d52760 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 11 Dec 2025 16:58:31 +0530 Subject: [PATCH 47/70] MAINTAINERS: Add an additional maintainer to the AMD XGBE driver Add Raju Rangoju as an additional maintainer to support the AMD XGBE network device driver. Signed-off-by: Shyam Sundar S K Acked-by: Raju Rangoju Link: https://patch.msgid.link/20251211112831.1781030-1-Shyam-sundar.S-k@amd.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 410fd1f199f2..50d7266e1e62 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1283,6 +1283,7 @@ F: include/uapi/drm/amdxdna_accel.h AMD XGBE DRIVER M: "Shyam Sundar S K" +M: Raju Rangoju L: netdev@vger.kernel.org S: Maintained F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi From 7801edc9badd972cb62cf11c0427e70b6dca239d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 4 Jan 2026 11:39:52 +0200 Subject: [PATCH 48/70] Revert "dsa: mv88e6xxx: make serdes SGMII/Fiber tx amplitude configurable" This reverts commit 926eae604403acfa27ba5b072af458e87e634a50, which never could have produced the intended effect: https://lore.kernel.org/netdev/AM0PR06MB10396BBF8B568D77556FC46F8F7DEA@AM0PR06MB10396.eurprd06.prod.outlook.com/ The reason why it is broken beyond repair in this form is that the mv88e6xxx driver outsources its "tx-p2p-microvolt" property to the OF node of an external Ethernet PHY. This: (a) does not work if there is no external PHY (chip-to-chip connection, or SFP module) (b) pollutes the OF property namespace / bindings of said external PHY ("tx-p2p-microvolt" could have meaning for the Ethernet PHY's SerDes interface as well) We can revisit the idea of making SerDes amplitude configurable once we have proper bindings for the mv88e6xxx SerDes. Until then, remove the code that leaves us with unnecessary baggage. Fixes: 926eae604403 ("dsa: mv88e6xxx: make serdes SGMII/Fiber tx amplitude configurable") Cc: Holger Brunck Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260104093952.486606-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 23 --------------- drivers/net/dsa/mv88e6xxx/chip.h | 4 --- drivers/net/dsa/mv88e6xxx/serdes.c | 46 ------------------------------ drivers/net/dsa/mv88e6xxx/serdes.h | 5 ---- 4 files changed, 78 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b4d48997bf46..09002c853b78 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3364,13 +3364,10 @@ static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port) static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { - struct device_node *phy_handle = NULL; struct fwnode_handle *ports_fwnode; struct fwnode_handle *port_fwnode; struct dsa_switch *ds = chip->ds; struct mv88e6xxx_port *p; - struct dsa_port *dp; - int tx_amp; int err; u16 reg; u32 val; @@ -3582,23 +3579,6 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return err; } - if (chip->info->ops->serdes_set_tx_amplitude) { - dp = dsa_to_port(ds, port); - if (dp) - phy_handle = of_parse_phandle(dp->dn, "phy-handle", 0); - - if (phy_handle && !of_property_read_u32(phy_handle, - "tx-p2p-microvolt", - &tx_amp)) - err = chip->info->ops->serdes_set_tx_amplitude(chip, - port, tx_amp); - if (phy_handle) { - of_node_put(phy_handle); - if (err) - return err; - } - } - /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. @@ -4768,7 +4748,6 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .serdes_irq_mapping = mv88e6352_serdes_irq_mapping, .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, - .serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude, .gpio_ops = &mv88e6352_gpio_ops, .phylink_get_caps = mv88e6352_phylink_get_caps, .pcs_ops = &mv88e6352_pcs_ops, @@ -5044,7 +5023,6 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .serdes_irq_mapping = mv88e6352_serdes_irq_mapping, .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, - .serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude, .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, @@ -5481,7 +5459,6 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .serdes_get_stats = mv88e6352_serdes_get_stats, .serdes_get_regs_len = mv88e6352_serdes_get_regs_len, .serdes_get_regs = mv88e6352_serdes_get_regs, - .serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude, .phylink_get_caps = mv88e6352_phylink_get_caps, .pcs_ops = &mv88e6352_pcs_ops, }; diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 2f211e55cb47..e073446ee7d0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -642,10 +642,6 @@ struct mv88e6xxx_ops { void (*serdes_get_regs)(struct mv88e6xxx_chip *chip, int port, void *_p); - /* SERDES SGMII/Fiber Output Amplitude */ - int (*serdes_set_tx_amplitude)(struct mv88e6xxx_chip *chip, int port, - int val); - /* Address Translation Unit operations */ int (*atu_get_hash)(struct mv88e6xxx_chip *chip, u8 *hash); int (*atu_set_hash)(struct mv88e6xxx_chip *chip, u8 hash); diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index b3330211edbc..a936ee80ce00 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -25,14 +25,6 @@ static int mv88e6352_serdes_read(struct mv88e6xxx_chip *chip, int reg, reg, val); } -static int mv88e6352_serdes_write(struct mv88e6xxx_chip *chip, int reg, - u16 val) -{ - return mv88e6xxx_phy_page_write(chip, MV88E6352_ADDR_SERDES, - MV88E6352_SERDES_PAGE_FIBER, - reg, val); -} - static int mv88e6390_serdes_read(struct mv88e6xxx_chip *chip, int lane, int device, int reg, u16 *val) { @@ -506,41 +498,3 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p) p[i] = reg; } } - -static const int mv88e6352_serdes_p2p_to_reg[] = { - /* Index of value in microvolts corresponds to the register value */ - 14000, 112000, 210000, 308000, 406000, 504000, 602000, 700000, -}; - -int mv88e6352_serdes_set_tx_amplitude(struct mv88e6xxx_chip *chip, int port, - int val) -{ - bool found = false; - u16 ctrl, reg; - int err; - int i; - - err = mv88e6352_g2_scratch_port_has_serdes(chip, port); - if (err <= 0) - return err; - - for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_p2p_to_reg); ++i) { - if (mv88e6352_serdes_p2p_to_reg[i] == val) { - reg = i; - found = true; - break; - } - } - - if (!found) - return -EINVAL; - - err = mv88e6352_serdes_read(chip, MV88E6352_SERDES_SPEC_CTRL2, &ctrl); - if (err) - return err; - - ctrl &= ~MV88E6352_SERDES_OUT_AMP_MASK; - ctrl |= reg; - - return mv88e6352_serdes_write(chip, MV88E6352_SERDES_SPEC_CTRL2, ctrl); -} diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index ad887d8601bc..17a3e85fabaa 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -29,8 +29,6 @@ struct phylink_link_state; #define MV88E6352_SERDES_INT_FIBRE_ENERGY BIT(4) #define MV88E6352_SERDES_INT_STATUS 0x13 -#define MV88E6352_SERDES_SPEC_CTRL2 0x1a -#define MV88E6352_SERDES_OUT_AMP_MASK 0x0007 #define MV88E6341_PORT5_LANE 0x15 @@ -140,9 +138,6 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p); int mv88e6390_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port); void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p); -int mv88e6352_serdes_set_tx_amplitude(struct mv88e6xxx_chip *chip, int port, - int val); - /* Return the (first) SERDES lane address a port is using, -errno otherwise. */ static inline int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip, int port) From e5c8eda39a9fc1547d1398d707aa06c1d080abdd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Jan 2026 09:36:30 +0000 Subject: [PATCH 49/70] udp: call skb_orphan() before skb_attempt_defer_free() Standard UDP receive path does not use skb->destructor. But skmsg layer does use it, since it calls skb_set_owner_sk_safe() from udp_read_skb(). This then triggers this warning in skb_attempt_defer_free(): DEBUG_NET_WARN_ON_ONCE(skb->destructor); We must call skb_orphan() to fix this issue. Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()") Reported-by: syzbot+3e68572cf2286ce5ebe9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/695b83bd.050a0220.1c9965.002b.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260105093630.1976085-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ffe074cb5865..ee63af0ef42c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1851,6 +1851,7 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) sk_peek_offset_bwd(sk, len); if (!skb_shared(skb)) { + skb_orphan(skb); skb_attempt_defer_free(skb); return; } From 13ff3e724207f579d3c814ee05516fefcb4f32e8 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 5 Jan 2026 16:18:39 +0100 Subject: [PATCH 50/70] net: sfp: return the number of written bytes for smbus single byte access We expect the SFP write accessors to return the number of written bytes. We fail to do so for single-byte smbus accesses, which may cause errors when setting a module's high-power state and for some cotsworks modules. Let's return the amount of written bytes, as expected. Fixes: 7662abf4db94 ("net: phy: sfp: Add support for SMBus module access") Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260105151840.144552-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 6166e9196364..84bef5099dda 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -765,7 +765,7 @@ static int sfp_smbus_byte_write(struct sfp *sfp, bool a2, u8 dev_addr, dev_addr++; } - return 0; + return data - (u8 *)buf; } static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) From adb25a46dc0a43173f5ea5f5f58fc8ba28970c7c Mon Sep 17 00:00:00 2001 From: Shivani Gupta Date: Mon, 5 Jan 2026 00:59:05 +0000 Subject: [PATCH 51/70] net/sched: act_api: avoid dereferencing ERR_PTR in tcf_idrinfo_destroy syzbot reported a crash in tc_act_in_hw() during netns teardown where tcf_idrinfo_destroy() passed an ERR_PTR(-EBUSY) value as a tc_action pointer, leading to an invalid dereference. Guard against ERR_PTR entries when iterating the action IDR so teardown does not call tc_act_in_hw() on an error pointer. Fixes: 84a7d6797e6a ("net/sched: acp_api: no longer acquire RTNL in tc_action_net_exit()") Link: https://syzkaller.appspot.com/bug?extid=8f1c492ffa4644ff3826 Reported-by: syzbot+8f1c492ffa4644ff3826@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8f1c492ffa4644ff3826 Signed-off-by: Shivani Gupta Link: https://patch.msgid.link/20260105005905.243423-1-shivani07g@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ff6be5cfe2b0..e1ab0faeb811 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -940,6 +940,8 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops, int ret; idr_for_each_entry_ul(idr, p, tmp, id) { + if (IS_ERR(p)) + continue; if (tc_act_in_hw(p) && !mutex_taken) { rtnl_lock(); mutex_taken = true; From 353cfc0ef3f34ef7fe313ae38dac37f2454a7cf5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 5 Jan 2026 18:33:19 +0200 Subject: [PATCH 52/70] selftests: drv-net: Bring back tool() to driver __init__s The pp_alloc_fail.py test (which doesn't run in NIPA CI?) uses tool, add back the import. Resolves: ImportError: cannot import name 'tool' from 'lib.py' Fixes: 68a052239fc4 ("selftests: drv-net: update remaining Python init files") Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20260105163319.47619-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/lib/py/__init__.py | 4 ++-- tools/testing/selftests/net/lib/py/__init__.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 766bfc4ad842..d5d247eca6b7 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -22,7 +22,7 @@ try: NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, wait_port_listen, wait_file + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file, tool from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup, ksft_variants, KsftNamedVariant @@ -37,7 +37,7 @@ try: "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", "fd_read_timeout", "ip", "rand_port", - "wait_port_listen", "wait_file", + "wait_port_listen", "wait_file", "tool", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", "ksft_setup", "ksft_variants", "KsftNamedVariant", diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 40f9ce307dd1..f528b67639de 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -13,7 +13,7 @@ from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ from .netns import NetNS, NetNSEnter from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ - bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file + bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file, tool from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily @@ -26,7 +26,7 @@ __all__ = ["KSRC", "NetNS", "NetNSEnter", "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", "bpftool", "ip", "ethtool", "bpftrace", "rand_port", - "wait_port_listen", "wait_file", + "wait_port_listen", "wait_file", "tool", "NetdevSim", "NetdevSimDev", "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", From d83dddffe1904e4a576d11a541878850a8e64cd2 Mon Sep 17 00:00:00 2001 From: Yohei Kojima Date: Tue, 6 Jan 2026 00:17:32 +0900 Subject: [PATCH 53/70] net: netdevsim: fix inconsistent carrier state after link/unlink This patch fixes the edge case behavior on ifup/ifdown and linking/unlinking two netdevsim interfaces: 1. unlink two interfaces netdevsim1 and netdevsim2 2. ifdown netdevsim1 3. ifup netdevsim1 4. link two interfaces netdevsim1 and netdevsim2 5. (Now two interfaces are linked in terms of netdevsim peer, but carrier state of the two interfaces remains DOWN.) This inconsistent behavior is caused by the current implementation, which only cares about the "link, then ifup" order, not "ifup, then link" order. This patch fixes the inconsistency by calling netif_carrier_on() when two netdevsim interfaces are linked. This patch fixes buggy behavior on NetworkManager-based systems which causes the netdevsim test to fail with the following error: # timeout set to 600 # selftests: drivers/net/netdevsim: peer.sh # 2025/12/25 00:54:03 socat[9115] W address is opened in read-write mode but only supports read-only # 2025/12/25 00:56:17 socat[9115] W connect(7, AF=2 192.168.1.1:1234, 16): Connection timed out # 2025/12/25 00:56:17 socat[9115] E TCP:192.168.1.1:1234: Connection timed out # expected 3 bytes, got 0 # 2025/12/25 00:56:17 socat[9109] W exiting on signal 15 not ok 13 selftests: drivers/net/netdevsim: peer.sh # exit=1 This patch also solves timeout on TCP Fast Open (TFO) test in NetworkManager-based systems because it also depends on netdevsim's carrier consistency. Fixes: 1a8fed52f7be ("netdevsim: set the carrier when the device goes up") Signed-off-by: Yohei Kojima Reviewed-by: Breno Leitao Link: https://patch.msgid.link/602c9e1ba5bb2ee1997bb38b1d866c9c3b807ae9.1767624906.git.yk@y-koj.net Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bus.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 70e8c38ddad6..d16b95304aa7 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -332,6 +332,11 @@ static ssize_t link_device_store(const struct bus_type *bus, const char *buf, si rcu_assign_pointer(nsim_a->peer, nsim_b); rcu_assign_pointer(nsim_b->peer, nsim_a); + if (netif_running(dev_a) && netif_running(dev_b)) { + netif_carrier_on(dev_a); + netif_carrier_on(dev_b); + } + out_err: put_net(ns_b); put_net(ns_a); @@ -381,6 +386,9 @@ static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, if (!peer) goto out_put_netns; + netif_carrier_off(dev); + netif_carrier_off(peer->netdev); + err = 0; RCU_INIT_POINTER(nsim->peer, NULL); RCU_INIT_POINTER(peer->peer, NULL); From 75df712cddfd6c76e0e255584766385648ad9529 Mon Sep 17 00:00:00 2001 From: Yohei Kojima Date: Tue, 6 Jan 2026 00:17:33 +0900 Subject: [PATCH 54/70] selftests: netdevsim: add carrier state consistency test This commit adds a test case for netdevsim carrier state consistency. Specifically, the added test verifies the carrier state during the following operations: 1. Unlink two netdevsims 2. ifdown one netdevsim, then ifup again 3. Link the netdevsims again 4. ifdown one netdevsim, then ifup again These steps verifies that the carrier is UP iff two netdevsims are linked and ifuped. Signed-off-by: Yohei Kojima Link: https://patch.msgid.link/481e2729e53b6074ebfc0ad85764d8feb244de8c.1767624906.git.yk@y-koj.net Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/netdevsim/peer.sh | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index 7f32b5600925..f4721f7636dd 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -52,6 +52,39 @@ cleanup_ns() ip netns del nssv } +is_carrier_up() +{ + local netns="$1" + local nsim_dev="$2" + + test "$(ip netns exec "$netns" \ + cat /sys/class/net/"$nsim_dev"/carrier 2>/dev/null)" -eq 1 +} + +assert_carrier_up() +{ + local netns="$1" + local nsim_dev="$2" + + if ! is_carrier_up "$netns" "$nsim_dev"; then + echo "$nsim_dev's carrier should be UP, but it isn't" + cleanup_ns + exit 1 + fi +} + +assert_carrier_down() +{ + local netns="$1" + local nsim_dev="$2" + + if is_carrier_up "$netns" "$nsim_dev"; then + echo "$nsim_dev's carrier should be DOWN, but it isn't" + cleanup_ns + exit 1 + fi +} + ### ### Code start ### @@ -113,6 +146,32 @@ if [ $? -eq 0 ]; then exit 1 fi +# netdevsim carrier state consistency checking +assert_carrier_up nssv "$NSIM_DEV_1_NAME" +assert_carrier_up nscl "$NSIM_DEV_2_NAME" + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > "$NSIM_DEV_SYS_UNLINK" + +assert_carrier_down nssv "$NSIM_DEV_1_NAME" +assert_carrier_down nscl "$NSIM_DEV_2_NAME" + +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up + +assert_carrier_down nssv "$NSIM_DEV_1_NAME" +assert_carrier_down nscl "$NSIM_DEV_2_NAME" + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK + +assert_carrier_up nssv "$NSIM_DEV_1_NAME" +assert_carrier_up nscl "$NSIM_DEV_2_NAME" + +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up + +assert_carrier_up nssv "$NSIM_DEV_1_NAME" +assert_carrier_up nscl "$NSIM_DEV_2_NAME" + # send/recv packets tmp_file=$(mktemp) From 6abcf751bc084804a9e5b3051442e8a2ce67f48a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 5 Jan 2026 09:43:31 +0100 Subject: [PATCH 55/70] net: airoha: Fix schedule while atomic in airoha_ppe_deinit() airoha_ppe_deinit() runs airoha_npu_ppe_deinit() in atomic context. airoha_npu_ppe_deinit routine allocates ppe_data buffer with GFP_KERNEL flag. Rely on rcu_replace_pointer in airoha_ppe_deinit routine in order to fix schedule while atomic issue in airoha_npu_ppe_deinit() since we do not need atomic context there. Fixes: 00a7678310fe3 ("net: airoha: Introduce flowtable offload support") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260105-airoha-fw-ethtool-v2-1-3b32b158cc31@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 0caabb0c3aa0..2221bafaf7c9 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -1547,13 +1547,16 @@ void airoha_ppe_deinit(struct airoha_eth *eth) { struct airoha_npu *npu; - rcu_read_lock(); - npu = rcu_dereference(eth->npu); + mutex_lock(&flow_offload_mutex); + + npu = rcu_replace_pointer(eth->npu, NULL, + lockdep_is_held(&flow_offload_mutex)); if (npu) { npu->ops.ppe_deinit(npu); airoha_npu_put(npu); } - rcu_read_unlock(); + + mutex_unlock(&flow_offload_mutex); rhashtable_destroy(ð->ppe->l2_flows); rhashtable_destroy(ð->flow_table); From 21cbf883d073abbfe09e3924466aa5e0449e7261 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Jan 2026 10:19:27 +0000 Subject: [PATCH 56/70] wifi: avoid kernel-infoleak from struct iw_point struct iw_point has a 32bit hole on 64bit arches. struct iw_point { void __user *pointer; /* Pointer to the data (in user space) */ __u16 length; /* number of fields or size in bytes */ __u16 flags; /* Optional params */ }; Make sure to zero the structure to avoid disclosing 32bits of kernel data to user space. Fixes: 87de87d5e47f ("wext: Dispatch and handle compat ioctls entirely in net/wireless/wext.c") Reported-by: syzbot+bfc7323743ca6dbcc3d3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/695f83f3.050a0220.1c677c.0392.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260108101927.857582-1-edumazet@google.com Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 4 ++++ net/wireless/wext-priv.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index c32a7c6903d5..7b8e94214b07 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1101,6 +1101,10 @@ static int compat_standard_call(struct net_device *dev, return ioctl_standard_call(dev, iwr, cmd, info, handler); iwp_compat = (struct compat_iw_point *) &iwr->u.data; + + /* struct iw_point has a 32bit hole on 64bit arches. */ + memset(&iwp, 0, sizeof(iwp)); + iwp.pointer = compat_ptr(iwp_compat->pointer); iwp.length = iwp_compat->length; iwp.flags = iwp_compat->flags; diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c index 674d426a9d24..37d1147019c2 100644 --- a/net/wireless/wext-priv.c +++ b/net/wireless/wext-priv.c @@ -228,6 +228,10 @@ int compat_private_call(struct net_device *dev, struct iwreq *iwr, struct iw_point iwp; iwp_compat = (struct compat_iw_point *) &iwr->u.data; + + /* struct iw_point has a 32bit hole on 64bit arches. */ + memset(&iwp, 0, sizeof(iwp)); + iwp.pointer = compat_ptr(iwp_compat->pointer); iwp.length = iwp_compat->length; iwp.flags = iwp_compat->flags; From 333418872bfecf4843f1ded7a4151685dfcf07d5 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 7 Jan 2026 14:36:51 +0100 Subject: [PATCH 57/70] wifi: mac80211_hwsim: fix typo in frequency notification The NAN notification is for 5745 MHz which corresponds to channel 149 and not 5475 which is not actually a valid channel. This could result in a NULL pointer dereference in cfg80211_next_nan_dw_notif. Fixes: a37a6f54439b ("wifi: mac80211_hwsim: Add simulation support for NAN device") Signed-off-by: Benjamin Berg Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20260107143652.7dab2035836f.Iacbaf7bb94ed5c14a0928a625827e4137d8bfede@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 551f5eb4e747..92427f527286 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -4040,7 +4040,7 @@ mac80211_hwsim_nan_dw_start(struct hrtimer *timer) ieee80211_vif_to_wdev(data->nan_device_vif); if (data->nan_curr_dw_band == NL80211_BAND_5GHZ) - ch = ieee80211_get_channel(hw->wiphy, 5475); + ch = ieee80211_get_channel(hw->wiphy, 5745); else ch = ieee80211_get_channel(hw->wiphy, 2437); From c0d82ba9612fb65a8394af639f1427dbe87fb788 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 7 Jan 2026 14:37:36 +0100 Subject: [PATCH 58/70] wifi: mac80211: don't iterate not running interfaces for_each_chanctx_user_* was introdcued as a replacement for for_each_sdata_link, which visits also other chanctx users that are not link. for_each_sdata_link skips not running interfaces, do the same for for_each_chanctx_user_* Fixes: 1ce954c98b89 ("wifi: mac80211: add and use chanctx usage iteration") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260107143736.55c084e2a976.I38b7b904a135dadca339321923b501b2c2c5c8c0@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index d0bfb1216401..d8c5f11afc15 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -90,6 +90,9 @@ next_interface: /* next (or first) interface */ iter->sdata = list_prepare_entry(iter->sdata, &local->interfaces, list); list_for_each_entry_continue(iter->sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(iter->sdata)) + continue; + /* AP_VLAN has a chanctx pointer but follows AP */ if (iter->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; From 6f385937160174b31a5e4105e759406f0b128494 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 7 Jan 2026 14:38:05 +0100 Subject: [PATCH 59/70] wifi: mac80211_hwsim: disable BHs for hwsim_radio_lock The hwsim_radio_lock spinlock expects bottom-half to be disabled, fix the call in mac80211_hwsim_nan_stop to ensure BHs are disabled. Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20260107143805.ce7406511608.I688f8b19346e94c1f8de0cdadde072054d4b861c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 92427f527286..79cc63272134 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -4112,14 +4112,14 @@ static int mac80211_hwsim_stop_nan(struct ieee80211_hw *hw, hrtimer_cancel(&data->nan_timer); data->nan_device_vif = NULL; - spin_lock(&hwsim_radio_lock); + spin_lock_bh(&hwsim_radio_lock); list_for_each_entry(data2, &hwsim_radios, list) { if (data2->nan_device_vif) { nan_cluster_running = true; break; } } - spin_unlock(&hwsim_radio_lock); + spin_unlock_bh(&hwsim_radio_lock); if (!nan_cluster_running) memset(hwsim_nan_cluster_id, 0, ETH_ALEN); From d594cc6f2c588810888df70c83a9654b6bc7942d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Dec 2025 11:52:42 +0100 Subject: [PATCH 60/70] wifi: mac80211: restore non-chanctx injection behaviour During the transition to use channel contexts throughout, the ability to do injection while in monitor mode concurrent with another interface was lost, since the (virtual) monitor won't have a chanctx assigned in this scenario. It's harder to fix drivers that actually transitioned to using channel contexts themselves, such as mt76, but it's easy to do those that are (still) just using the emulation. Do that. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=218763 Reported-and-tested-by: Oscar Alfonso Diaz Fixes: 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx drivers") Link: https://patch.msgid.link/20251216105242.18366-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9d8b0a25f73c..1b55e8340413 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2397,6 +2397,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (chanctx_conf) chandef = &chanctx_conf->def; + else if (local->emulate_chanctx) + chandef = &local->hw.conf.chandef; else goto fail_rcu; From a203dbeeca15a9b924f0d51f510921f4bae96801 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 22 Dec 2025 10:29:07 +0800 Subject: [PATCH 61/70] wifi: mac80211: collect station statistics earlier when disconnect In __sta_info_destroy_part2(), station statistics are requested after the IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST transition. This is problematic because the driver may be unable to handle the request due to the STA being in the NOTEXIST state (i.e. if the driver destroys the underlying data when transitioning to NOTEXIST). Move the statistics collection to before the state transition to avoid this issue. Signed-off-by: Baochen Qiang Link: https://patch.msgid.link/20251222-mac80211-move-station-stats-collection-earlier-v1-1-12cd4e42c633@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f4d3b67fda06..1a995bc301b1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1533,6 +1533,10 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) } } + sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); + if (sinfo) + sta_set_sinfo(sta, sinfo, true); + if (sta->uploaded) { ret = drv_sta_state(local, sdata, sta, IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); @@ -1541,9 +1545,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); - if (sinfo) - sta_set_sinfo(sta, sinfo, true); cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); From c1d73b1480235731e35c81df70b08f4714a7d095 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 5 Jan 2026 20:41:00 -0700 Subject: [PATCH 62/70] net/sched: sch_qfq: Fix NULL deref when deactivating inactive aggregate in qfq_reset `qfq_class->leaf_qdisc->q.qlen > 0` does not imply that the class itself is active. Two qfq_class objects may point to the same leaf_qdisc. This happens when: 1. one QFQ qdisc is attached to the dev as the root qdisc, and 2. another QFQ qdisc is temporarily referenced (e.g., via qdisc_get() / qdisc_put()) and is pending to be destroyed, as in function tc_new_tfilter. When packets are enqueued through the root QFQ qdisc, the shared leaf_qdisc->q.qlen increases. At the same time, the second QFQ qdisc triggers qdisc_put and qdisc_destroy: the qdisc enters qfq_reset() with its own q->q.qlen == 0, but its class's leaf qdisc->q.qlen > 0. Therefore, the qfq_reset would wrongly deactivate an inactive aggregate and trigger a null-deref in qfq_deactivate_agg: [ 0.903172] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 0.903571] #PF: supervisor write access in kernel mode [ 0.903860] #PF: error_code(0x0002) - not-present page [ 0.904177] PGD 10299b067 P4D 10299b067 PUD 10299c067 PMD 0 [ 0.904502] Oops: Oops: 0002 [#1] SMP NOPTI [ 0.904737] CPU: 0 UID: 0 PID: 135 Comm: exploit Not tainted 6.19.0-rc3+ #2 NONE [ 0.905157] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 [ 0.905754] RIP: 0010:qfq_deactivate_agg (include/linux/list.h:992 (discriminator 2) include/linux/list.h:1006 (discriminator 2) net/sched/sch_qfq.c:1367 (discriminator 2) net/sched/sch_qfq.c:1393 (discriminator 2)) [ 0.906046] Code: 0f 84 4d 01 00 00 48 89 70 18 8b 4b 10 48 c7 c2 ff ff ff ff 48 8b 78 08 48 d3 e2 48 21 f2 48 2b 13 48 8b 30 48 d3 ea 8b 4b 18 0 Code starting with the faulting instruction =========================================== 0: 0f 84 4d 01 00 00 je 0x153 6: 48 89 70 18 mov %rsi,0x18(%rax) a: 8b 4b 10 mov 0x10(%rbx),%ecx d: 48 c7 c2 ff ff ff ff mov $0xffffffffffffffff,%rdx 14: 48 8b 78 08 mov 0x8(%rax),%rdi 18: 48 d3 e2 shl %cl,%rdx 1b: 48 21 f2 and %rsi,%rdx 1e: 48 2b 13 sub (%rbx),%rdx 21: 48 8b 30 mov (%rax),%rsi 24: 48 d3 ea shr %cl,%rdx 27: 8b 4b 18 mov 0x18(%rbx),%ecx ... [ 0.907095] RSP: 0018:ffffc900004a39a0 EFLAGS: 00010246 [ 0.907368] RAX: ffff8881043a0880 RBX: ffff888102953340 RCX: 0000000000000000 [ 0.907723] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 0.908100] RBP: ffff888102952180 R08: 0000000000000000 R09: 0000000000000000 [ 0.908451] R10: ffff8881043a0000 R11: 0000000000000000 R12: ffff888102952000 [ 0.908804] R13: ffff888102952180 R14: ffff8881043a0ad8 R15: ffff8881043a0880 [ 0.909179] FS: 000000002a1a0380(0000) GS:ffff888196d8d000(0000) knlGS:0000000000000000 [ 0.909572] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.909857] CR2: 0000000000000000 CR3: 0000000102993002 CR4: 0000000000772ef0 [ 0.910247] PKRU: 55555554 [ 0.910391] Call Trace: [ 0.910527] [ 0.910638] qfq_reset_qdisc (net/sched/sch_qfq.c:357 net/sched/sch_qfq.c:1485) [ 0.910826] qdisc_reset (include/linux/skbuff.h:2195 include/linux/skbuff.h:2501 include/linux/skbuff.h:3424 include/linux/skbuff.h:3430 net/sched/sch_generic.c:1036) [ 0.911040] __qdisc_destroy (net/sched/sch_generic.c:1076) [ 0.911236] tc_new_tfilter (net/sched/cls_api.c:2447) [ 0.911447] rtnetlink_rcv_msg (net/core/rtnetlink.c:6958) [ 0.911663] ? __pfx_rtnetlink_rcv_msg (net/core/rtnetlink.c:6861) [ 0.911894] netlink_rcv_skb (net/netlink/af_netlink.c:2550) [ 0.912100] netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344) [ 0.912296] ? __alloc_skb (net/core/skbuff.c:706) [ 0.912484] netlink_sendmsg (net/netlink/af_netlink.c:1894) [ 0.912682] sock_write_iter (net/socket.c:727 (discriminator 1) net/socket.c:742 (discriminator 1) net/socket.c:1195 (discriminator 1)) [ 0.912880] vfs_write (fs/read_write.c:593 fs/read_write.c:686) [ 0.913077] ksys_write (fs/read_write.c:738) [ 0.913252] do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1)) [ 0.913438] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:131) [ 0.913687] RIP: 0033:0x424c34 [ 0.913844] Code: 89 02 48 c7 c0 ff ff ff ff eb bd 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d 2d 44 09 00 00 74 13 b8 01 00 00 00 0f 05 9 Code starting with the faulting instruction =========================================== 0: 89 02 mov %eax,(%rdx) 2: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 9: eb bd jmp 0xffffffffffffffc8 b: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) 12: 00 00 00 15: 90 nop 16: f3 0f 1e fa endbr64 1a: 80 3d 2d 44 09 00 00 cmpb $0x0,0x9442d(%rip) # 0x9444e 21: 74 13 je 0x36 23: b8 01 00 00 00 mov $0x1,%eax 28: 0f 05 syscall 2a: 09 .byte 0x9 [ 0.914807] RSP: 002b:00007ffea1938b78 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 0.915197] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000424c34 [ 0.915556] RDX: 000000000000003c RSI: 000000002af378c0 RDI: 0000000000000003 [ 0.915912] RBP: 00007ffea1938bc0 R08: 00000000004b8820 R09: 0000000000000000 [ 0.916297] R10: 0000000000000001 R11: 0000000000000202 R12: 00007ffea1938d28 [ 0.916652] R13: 00007ffea1938d38 R14: 00000000004b3828 R15: 0000000000000001 [ 0.917039] [ 0.917158] Modules linked in: [ 0.917316] CR2: 0000000000000000 [ 0.917484] ---[ end trace 0000000000000000 ]--- [ 0.917717] RIP: 0010:qfq_deactivate_agg (include/linux/list.h:992 (discriminator 2) include/linux/list.h:1006 (discriminator 2) net/sched/sch_qfq.c:1367 (discriminator 2) net/sched/sch_qfq.c:1393 (discriminator 2)) [ 0.917978] Code: 0f 84 4d 01 00 00 48 89 70 18 8b 4b 10 48 c7 c2 ff ff ff ff 48 8b 78 08 48 d3 e2 48 21 f2 48 2b 13 48 8b 30 48 d3 ea 8b 4b 18 0 Code starting with the faulting instruction =========================================== 0: 0f 84 4d 01 00 00 je 0x153 6: 48 89 70 18 mov %rsi,0x18(%rax) a: 8b 4b 10 mov 0x10(%rbx),%ecx d: 48 c7 c2 ff ff ff ff mov $0xffffffffffffffff,%rdx 14: 48 8b 78 08 mov 0x8(%rax),%rdi 18: 48 d3 e2 shl %cl,%rdx 1b: 48 21 f2 and %rsi,%rdx 1e: 48 2b 13 sub (%rbx),%rdx 21: 48 8b 30 mov (%rax),%rsi 24: 48 d3 ea shr %cl,%rdx 27: 8b 4b 18 mov 0x18(%rbx),%ecx ... [ 0.918902] RSP: 0018:ffffc900004a39a0 EFLAGS: 00010246 [ 0.919198] RAX: ffff8881043a0880 RBX: ffff888102953340 RCX: 0000000000000000 [ 0.919559] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 0.919908] RBP: ffff888102952180 R08: 0000000000000000 R09: 0000000000000000 [ 0.920289] R10: ffff8881043a0000 R11: 0000000000000000 R12: ffff888102952000 [ 0.920648] R13: ffff888102952180 R14: ffff8881043a0ad8 R15: ffff8881043a0880 [ 0.921014] FS: 000000002a1a0380(0000) GS:ffff888196d8d000(0000) knlGS:0000000000000000 [ 0.921424] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.921710] CR2: 0000000000000000 CR3: 0000000102993002 CR4: 0000000000772ef0 [ 0.922097] PKRU: 55555554 [ 0.922240] Kernel panic - not syncing: Fatal exception [ 0.922590] Kernel Offset: disabled Fixes: 0545a3037773 ("pkt_sched: QFQ - quick fair queue scheduler") Signed-off-by: Xiang Mei Link: https://patch.msgid.link/20260106034100.1780779-1-xmei5@asu.edu Signed-off-by: Jakub Kicinski --- net/sched/sch_qfq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d920f57dc6d7..f4013b547438 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1481,7 +1481,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (cl->qdisc->q.qlen > 0) + if (cl_is_active(cl)) qfq_deactivate_class(q, cl); qdisc_reset(cl->qdisc); From a4e305ed60f7c41bbf9aabc16dd75267194e0de3 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Tue, 6 Jan 2026 10:47:21 +0100 Subject: [PATCH 63/70] net: 3com: 3c59x: fix possible null dereference in vortex_probe1() pdev can be null and free_ring: can be called in 1297 with a null pdev. Fixes: 55c82617c3e8 ("3c59x: convert to generic DMA API") Cc: Signed-off-by: Thomas Fourier Link: https://patch.msgid.link/20260106094731.25819-2-fourier.thomas@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c59x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 8c9cc97efd4e..4fe4efdb3737 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1473,7 +1473,7 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, return 0; free_ring: - dma_free_coherent(&pdev->dev, + dma_free_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + sizeof(struct boom_tx_desc) * TX_RING_SIZE, vp->rx_ring, vp->rx_ring_dma); From afa27621a28af317523e0836dad430bec551eb54 Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Tue, 6 Jan 2026 10:48:21 +0200 Subject: [PATCH 64/70] net: usb: pegasus: fix memory leak in update_eth_regs_async() When asynchronously writing to the device registers and if usb_submit_urb() fail, the code fail to release allocated to this point resources. Fixes: 323b34963d11 ("drivers: net: usb: pegasus: fix control urb submission") Signed-off-by: Petko Manolov Link: https://patch.msgid.link/20260106084821.3746677-1-petko.manolov@konsulko.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/pegasus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 81ca64debc5b..c514483134f0 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -168,6 +168,8 @@ static int update_eth_regs_async(pegasus_t *pegasus) netif_device_detach(pegasus->net); netif_err(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + usb_free_urb(async_urb); + kfree(req); } return ret; } From 3358995b1a7f9dcb52a56ec8251570d71024dad0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 6 Jan 2026 06:31:14 -0800 Subject: [PATCH 65/70] bnxt_en: Fix NULL pointer crash in bnxt_ptp_enable during error cleanup When bnxt_init_one() fails during initialization (e.g., bnxt_init_int_mode returns -ENODEV), the error path calls bnxt_free_hwrm_resources() which destroys the DMA pool and sets bp->hwrm_dma_pool to NULL. Subsequently, bnxt_ptp_clear() is called, which invokes ptp_clock_unregister(). Since commit a60fc3294a37 ("ptp: rework ptp_clock_unregister() to disable events"), ptp_clock_unregister() now calls ptp_disable_all_events(), which in turn invokes the driver's .enable() callback (bnxt_ptp_enable()) to disable PTP events before completing the unregistration. bnxt_ptp_enable() attempts to send HWRM commands via bnxt_ptp_cfg_pin() and bnxt_ptp_cfg_event(), both of which call hwrm_req_init(). This function tries to allocate from bp->hwrm_dma_pool, causing a NULL pointer dereference: bnxt_en 0000:01:00.0 (unnamed net_device) (uninitialized): bnxt_init_int_mode err: ffffffed KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] Call Trace: __hwrm_req_init (drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c:72) bnxt_ptp_enable (drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:323 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:517) ptp_disable_all_events (drivers/ptp/ptp_chardev.c:66) ptp_clock_unregister (drivers/ptp/ptp_clock.c:518) bnxt_ptp_clear (drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:1134) bnxt_init_one (drivers/net/ethernet/broadcom/bnxt/bnxt.c:16889) Lines are against commit f8f9c1f4d0c7 ("Linux 6.19-rc3") Fix this by clearing and unregistering ptp (bnxt_ptp_clear()) before freeing HWRM resources. Suggested-by: Pavan Chebbi Signed-off-by: Breno Leitao Fixes: a60fc3294a37 ("ptp: rework ptp_clock_unregister() to disable events") Cc: stable@vger.kernel.org Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20260106-bnxt-v3-1-71f37e11446a@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d160e54ac121..8419d1eb4035 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16891,12 +16891,12 @@ init_err_dl: init_err_pci_clean: bnxt_hwrm_func_drv_unrgtr(bp); - bnxt_free_hwrm_resources(bp); - bnxt_hwmon_uninit(bp); - bnxt_ethtool_free(bp); bnxt_ptp_clear(bp); kfree(bp->ptp_cfg); bp->ptp_cfg = NULL; + bnxt_free_hwrm_resources(bp); + bnxt_hwmon_uninit(bp); + bnxt_ethtool_free(bp); kfree(bp->fw_health); bp->fw_health = NULL; bnxt_cleanup_pci(bp); From 7d11e047eda5f98514ae62507065ac961981c025 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 6 Jan 2026 10:05:46 -0500 Subject: [PATCH 66/70] net: do not write to msg_get_inq in callee NULL pointer dereference fix. msg_get_inq is an input field from caller to callee. Don't set it in the callee, as the caller may not clear it on struct reuse. This is a kernel-internal variant of msghdr only, and the only user does reinitialize the field. So this is not critical for that reason. But it is more robust to avoid the write, and slightly simpler code. And it fixes a bug, see below. Callers set msg_get_inq to request the input queue length to be returned in msg_inq. This is equivalent to but independent from the SO_INQ request to return that same info as a cmsg (tp->recvmsg_inq). To reduce branching in the hot path the second also sets the msg_inq. That is WAI. This is a fix to commit 4d1442979e4a ("af_unix: don't post cmsg for SO_INQ unless explicitly asked for"), which fixed the inverse. Also avoid NULL pointer dereference in unix_stream_read_generic if state->msg is NULL and msg->msg_get_inq is written. A NULL state->msg can happen when splicing as of commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets"). Also collapse two branches using a bitwise or. Cc: stable@vger.kernel.org Fixes: 4d1442979e4a ("af_unix: don't post cmsg for SO_INQ unless explicitly asked for") Link: https://lore.kernel.org/netdev/willemdebruijn.kernel.24d8030f7a3de@gmail.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Jens Axboe Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260106150626.3944363-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 8 +++----- net/unix/af_unix.c | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f035440c475a..d5319ebe2452 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2652,10 +2652,8 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (sk->sk_state == TCP_LISTEN) goto out; - if (tp->recvmsg_inq) { + if (tp->recvmsg_inq) *cmsg_flags = TCP_CMSG_INQ; - msg->msg_get_inq = 1; - } timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); /* Urgent data needs to be handled specially. */ @@ -2929,10 +2927,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags); release_sock(sk); - if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { + if ((cmsg_flags | msg->msg_get_inq) && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); - if (msg->msg_get_inq) { + if ((cmsg_flags & TCP_CMSG_INQ) | msg->msg_get_inq) { msg->msg_inq = tcp_inq_hint(sk); if (cmsg_flags & TCP_CMSG_INQ) put_cmsg(msg, SOL_TCP, TCP_CM_INQ, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a7ca74653d94..d0511225799b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2904,7 +2904,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, unsigned int last_len; struct unix_sock *u; int copied = 0; - bool do_cmsg; int err = 0; long timeo; int target; @@ -2930,9 +2929,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, u = unix_sk(sk); - do_cmsg = READ_ONCE(u->recvmsg_inq); - if (do_cmsg) - msg->msg_get_inq = 1; redo: /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -3090,9 +3086,11 @@ unlock: mutex_unlock(&u->iolock); if (msg) { + bool do_cmsg = READ_ONCE(u->recvmsg_inq); + scm_recv_unix(sock, msg, &scm, flags); - if (msg->msg_get_inq && (copied ?: err) >= 0) { + if ((do_cmsg | msg->msg_get_inq) && (copied ?: err) >= 0) { msg->msg_inq = READ_ONCE(u->inq_len); if (do_cmsg) put_cmsg(msg, SOL_SOCKET, SCM_INQ, From 790792ebc9603a7ccbf6996cb537d89607e3a75b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Jan 2026 08:34:26 -0800 Subject: [PATCH 67/70] tools: ynl: don't install tests make's install target is meant for installing the production artifacts, AFAIU. Don't install test_ynl_cli and test_ynl_ethtool from under the main YNL install target. The install target under tests/ is retained in case someone wants the tests to be installed. Fixes: 308b7dee3e5c ("tools: ynl: add YNL test framework") Reviewed-by: Hangbin Liu Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20260106163426.1468943-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 7736b492f559..c2f3e8b3f2ac 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -51,7 +51,6 @@ install: libynl.a lib/*.h @echo -e "\tINSTALL pyynl" @pip install --prefix=$(DESTDIR)$(prefix) . @make -C generated install - @make -C tests install run_tests: @$(MAKE) -C tests run_tests From 4d984b0574ff708e66152763fbfdef24ea40933f Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Wed, 7 Jan 2026 10:01:36 +0100 Subject: [PATCH 68/70] atm: Fix dma_free_coherent() size The size of the buffer is not the same when alloc'd with dma_alloc_coherent() in he_init_tpdrq() and freed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Thomas Fourier Link: https://patch.msgid.link/20260107090141.80900-2-fourier.thomas@gmail.com Signed-off-by: Jakub Kicinski --- drivers/atm/he.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index ad91cc6a34fc..92a041d5387b 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1587,7 +1587,8 @@ he_stop(struct he_dev *he_dev) he_dev->tbrq_base, he_dev->tbrq_phys); if (he_dev->tpdrq_base) - dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), + dma_free_coherent(&he_dev->pci_dev->dev, + CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq), he_dev->tpdrq_base, he_dev->tpdrq_phys); dma_pool_destroy(he_dev->tpd_pool); From 4b5bdabb5449b652122e43f507f73789041d4abe Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 7 Jan 2026 17:12:04 +0800 Subject: [PATCH 69/70] net: enetc: fix build warning when PAGE_SIZE is greater than 128K The max buffer size of ENETC RX BD is 0xFFFF bytes, so if the PAGE_SIZE is greater than 128K, ENETC_RXB_DMA_SIZE and ENETC_RXB_DMA_SIZE_XDP will be greater than 0xFFFF, thus causing a build warning. This will not cause any practical issues because ENETC is currently only used on the ARM64 platform, and the max PAGE_SIZE is 64K. So this patch is only for fixing the build warning that occurs when compiling ENETC drivers for other platforms. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601050637.kHEKKOG7-lkp@intel.com/ Fixes: e59bc32df2e9 ("net: enetc: correct the value of ENETC_RXB_TRUESIZE") Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20260107091204.1980222-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index dce27bd67a7d..aecd40aeef9c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -79,9 +79,9 @@ struct enetc_lso_t { #define ENETC_RXB_TRUESIZE (PAGE_SIZE >> 1) #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_DMA_SIZE \ - (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) + min(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD, 0xffff) #define ENETC_RXB_DMA_SIZE_XDP \ - (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM) + min(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM, 0xffff) struct enetc_rx_swbd { dma_addr_t dma; From c92510f5e3f82ba11c95991824a41e59a9c5ed81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Jan 2026 21:22:50 +0000 Subject: [PATCH 70/70] arp: do not assume dev_hard_header() does not change skb->head arp_create() is the only dev_hard_header() caller making assumption about skb->head being unchanged. A recent commit broke this assumption. Initialize @arp pointer after dev_hard_header() call. Fixes: db5b4e39c4e6 ("ip6_gre: make ip6gre_header() robust") Reported-by: syzbot+58b44a770a1585795351@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260107212250.384552-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/arp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7f3863daaa40..c8c3e1713c0e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -564,7 +564,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, skb_reserve(skb, hlen); skb_reset_network_header(skb); - arp = skb_put(skb, arp_hdr_len(dev)); + skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; skb->protocol = htons(ETH_P_ARP); if (!src_hw) @@ -572,12 +572,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, if (!dest_hw) dest_hw = dev->broadcast; - /* - * Fill the device header for the ARP frame + /* Fill the device header for the ARP frame. + * Note: skb->head can be changed. */ if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0) goto out; + arp = arp_hdr(skb); /* * Fill out the arp protocol part. *