From 7711f4bb4b360d9c0ff84db1c0ec91e385625047 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Dec 2025 12:20:35 +0100
Subject: [PATCH 01/70] netfilter: nft_set_pipapo: fix range overlap detection

set->klen has to be used, not sizeof().  The latter only compares a
single register but a full check of the entire key is needed.

Example:
table ip t {
        map s {
                typeof iifname . ip saddr : verdict
                flags interval
        }
}

nft add element t s '{ "lo" . 10.0.0.0/24 : drop }' # no error, expected
nft add element t s '{ "lo" . 10.0.0.0/24 : drop }' # no error, expected
nft add element t s '{ "lo" . 10.0.0.0/8 : drop }' # bug: no error

The 3rd 'add element' should be rejected via -ENOTEMPTY, not -EEXIST,
so userspace / nft can report an error to the user.

The latter is only correct for the 2nd case (re-add of existing element).

As-is, userspace is told that the command was successful, but no elements were
added.

After this patch, 3rd command gives:
Error: Could not process rule: File exists
add element t s { "lo" . 127.0.0.0/8 . "lo"  : drop }
                  ^^^^^^^^^^^^^^^^^^^^^^^^^

Fixes: 0eb4b5ee33f2 ("netfilter: nft_set_pipapo: Separate partial and complete overlap cases on insertion")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nft_set_pipapo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 112fe46788b6..6d77a5f0088a 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1317,8 +1317,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
 		else
 			dup_end = dup_key;
 
-		if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
-		    !memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
+		if (!memcmp(start, dup_key->data, set->klen) &&
+		    !memcmp(end, dup_end->data, set->klen)) {
 			*elem_priv = &dup->priv;
 			return -EEXIST;
 		}

From a675d1caa2041f05f6343fad67b04f8babf32217 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Dec 2025 12:20:36 +0100
Subject: [PATCH 02/70] selftests: netfilter: nft_concat_range.sh: add check
 for overlap detection bug

without 'netfilter: nft_set_pipapo: fix range overlap detection':

  reject overlapping range on add       0s         [FAIL]
Returned success for add { 1.2.3.4 . 1.2.4.1-1.2.4.2 } given set:
table inet filter {
	[..]
       elements = { 1.2.3.4 . 1.2.4.1 counter packets 0 bytes 0,
                    1.2.3.0-1.2.3.4 . 1.2.4.2 counter packets 0 bytes 0 }
}

The element collides with existing ones and was not added, but kernel
returned success to userspace.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 .../net/netfilter/nft_concat_range.sh         | 45 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index ad97c6227f35..394166f224a4 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
        net6_port_net6_port net_port_mac_proto_net"
 
 # Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap"
 
 # List of possible paths to pktgen script from kernel tree for performance tests
 PKTGEN_SCRIPT_PATHS="
@@ -420,6 +420,18 @@ race_repeat	0
 perf_duration	0
 "
 
+TYPE_insert_overlap="
+display		reject overlapping range on add
+type_spec	ipv4_addr . ipv4_addr
+chain_spec	ip saddr . ip daddr
+dst		addr4
+proto		icmp
+
+race_repeat	0
+
+perf_duration	0
+"
+
 # Set template for all tests, types and rules are filled in depending on test
 set_template='
 flush ruleset
@@ -1954,6 +1966,37 @@ EOF
 	return 0
 }
 
+add_fail()
+{
+	if nft add element inet filter test "$1" 2>/dev/null ; then
+		err "Returned success for add ${1} given set:"
+		err "$(nft -a list set inet filter test )"
+		return 1
+	fi
+
+	return 0
+}
+
+test_bug_insert_overlap()
+{
+	local elements="1.2.3.4 . 1.2.4.1"
+
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	add "{ $elements }" || return 1
+
+	elements="1.2.3.0-1.2.3.4 . 1.2.4.1"
+	add_fail "{ $elements }" || return 1
+
+	elements="1.2.3.0-1.2.3.4 . 1.2.4.2"
+	add "{ $elements }" || return 1
+
+	elements="1.2.3.4 . 1.2.4.1-1.2.4.2"
+	add_fail "{ $elements }" || return 1
+
+	return 0
+}
+
 test_reported_issues() {
 	eval test_bug_"${subtest}"
 }

From 36a3200575642846a96436d503d46544533bb943 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <fmancera@suse.de>
Date: Wed, 17 Dec 2025 21:21:59 +0100
Subject: [PATCH 03/70] netfilter: nft_synproxy: avoid possible data-race on
 update operation

During nft_synproxy eval we are reading nf_synproxy_info struct which
can be modified on update operation concurrently. As nf_synproxy_info
struct fits in 32 bits, use READ_ONCE/WRITE_ONCE annotations.

Fixes: ee394f96ad75 ("netfilter: nft_synproxy: add synproxy stateful object support")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nft_synproxy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 5d3e51825985..4d3e5a31b412 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -48,7 +48,7 @@ static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
 				 struct tcphdr *_tcph,
 				 struct synproxy_options *opts)
 {
-	struct nf_synproxy_info info = priv->info;
+	struct nf_synproxy_info info = READ_ONCE(priv->info);
 	struct net *net = nft_net(pkt);
 	struct synproxy_net *snet = synproxy_pernet(net);
 	struct sk_buff *skb = pkt->skb;
@@ -79,7 +79,7 @@ static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
 				 struct tcphdr *_tcph,
 				 struct synproxy_options *opts)
 {
-	struct nf_synproxy_info info = priv->info;
+	struct nf_synproxy_info info = READ_ONCE(priv->info);
 	struct net *net = nft_net(pkt);
 	struct synproxy_net *snet = synproxy_pernet(net);
 	struct sk_buff *skb = pkt->skb;
@@ -340,7 +340,7 @@ static void nft_synproxy_obj_update(struct nft_object *obj,
 	struct nft_synproxy *newpriv = nft_obj_data(newobj);
 	struct nft_synproxy *priv = nft_obj_data(obj);
 
-	priv->info = newpriv->info;
+	WRITE_ONCE(priv->info, newpriv->info);
 }
 
 static struct nft_object_type nft_synproxy_obj_type;

From 2bafeb8d2f380c3a81d98bd7b78b854b564f9cd4 Mon Sep 17 00:00:00 2001
From: Daniel Gomez <da.gomez@samsung.com>
Date: Fri, 19 Dec 2025 06:13:20 +0100
Subject: [PATCH 04/70] netfilter: replace -EEXIST with -EBUSY

The -EEXIST error code is reserved by the module loading infrastructure
to indicate that a module is already loaded. When a module's init
function returns -EEXIST, userspace tools like kmod interpret this as
"module already loaded" and treat the operation as successful, returning
0 to the user even though the module initialization actually failed.

Replace -EEXIST with -EBUSY to ensure correct error reporting in the module
initialization path.

Affected modules:
  * ebtable_broute ebtable_filter ebtable_nat arptable_filter
  * ip6table_filter ip6table_mangle ip6table_nat ip6table_raw
  * ip6table_security iptable_filter iptable_mangle iptable_nat
  * iptable_raw iptable_security

Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/bridge/netfilter/ebtables.c | 2 +-
 net/netfilter/nf_log.c          | 4 ++--
 net/netfilter/x_tables.c        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5697e3949a36..a04fc1757528 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1299,7 +1299,7 @@ int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct ne
 	list_for_each_entry(tmpl, &template_tables, list) {
 		if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) {
 			mutex_unlock(&ebt_mutex);
-			return -EEXIST;
+			return -EBUSY;
 		}
 	}
 
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 74cef8bf554c..62cf6a30875e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -89,7 +89,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 	if (pf == NFPROTO_UNSPEC) {
 		for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
 			if (rcu_access_pointer(loggers[i][logger->type])) {
-				ret = -EEXIST;
+				ret = -EBUSY;
 				goto unlock;
 			}
 		}
@@ -97,7 +97,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 			rcu_assign_pointer(loggers[i][logger->type], logger);
 	} else {
 		if (rcu_access_pointer(loggers[pf][logger->type])) {
-			ret = -EEXIST;
+			ret = -EBUSY;
 			goto unlock;
 		}
 		rcu_assign_pointer(loggers[pf][logger->type], logger);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 90b7630421c4..48105ea3df15 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1764,7 +1764,7 @@ EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
 int xt_register_template(const struct xt_table *table,
 			 int (*table_init)(struct net *net))
 {
-	int ret = -EEXIST, af = table->af;
+	int ret = -EBUSY, af = table->af;
 	struct xt_template *t;
 
 	mutex_lock(&xt[af].mutex);

From d077e8119ddbb4fca67540f1a52453631a47f221 Mon Sep 17 00:00:00 2001
From: Zilin Guan <zilin@seu.edu.cn>
Date: Wed, 24 Dec 2025 12:48:26 +0000
Subject: [PATCH 05/70] netfilter: nf_tables: fix memory leak in
 nf_tables_newrule()

In nf_tables_newrule(), if nft_use_inc() fails, the function jumps to
the err_release_rule label without freeing the allocated flow, leading
to a memory leak.

Fix this by adding a new label err_destroy_flow and jumping to it when
nft_use_inc() fails. This ensures that the flow is properly released
in this error case.

Fixes: 1689f25924ada ("netfilter: nf_tables: report use refcount overflow")
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_tables_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 618af6e90773..729a92781a1a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4439,7 +4439,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 
 	if (!nft_use_inc(&chain->use)) {
 		err = -EMFILE;
-		goto err_release_rule;
+		goto err_destroy_flow;
 	}
 
 	if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
@@ -4489,6 +4489,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 
 err_destroy_flow_rule:
 	nft_use_dec_restore(&chain->use);
+err_destroy_flow:
 	if (flow)
 		nft_flow_rule_destroy(flow);
 err_release_rule:

From 7811ba452402d58628e68faedf38745b3d485e3c Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <fmancera@suse.de>
Date: Wed, 17 Dec 2025 15:46:40 +0100
Subject: [PATCH 06/70] netfilter: nf_conncount: update last_gc only when GC
 has been performed

Currently last_gc is being updated everytime a new connection is
tracked, that means that it is updated even if a GC wasn't performed.
With a sufficiently high packet rate, it is possible to always bypass
the GC, causing the list to grow infinitely.

Update the last_gc value only when a GC has been actually performed.

Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conncount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 3654f1e8976c..8487808c8761 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -229,6 +229,7 @@ static int __nf_conncount_add(struct net *net,
 
 		nf_ct_put(found_ct);
 	}
+	list->last_gc = (u32)jiffies;
 
 add_new_node:
 	if (WARN_ON_ONCE(list->count > INT_MAX)) {
@@ -248,7 +249,6 @@ add_new_node:
 	conn->jiffies32 = (u32)jiffies;
 	list_add_tail(&conn->node, &list->head);
 	list->count++;
-	list->last_gc = (u32)jiffies;
 
 out_put:
 	if (refcounted)

From a428e0da1248c353557970848994f35fd3f005e2 Mon Sep 17 00:00:00 2001
From: Alok Tiwari <alok.a.tiwari@oracle.com>
Date: Mon, 29 Dec 2025 21:21:18 -0800
Subject: [PATCH 07/70] net: marvell: prestera: fix NULL dereference on
 devlink_alloc() failure

devlink_alloc() may return NULL on allocation failure, but
prestera_devlink_alloc() unconditionally calls devlink_priv() on
the returned pointer.

This leads to a NULL pointer dereference if devlink allocation fails.
Add a check for a NULL devlink pointer and return NULL early to avoid
the crash.

Fixes: 34dd1710f5a3 ("net: marvell: prestera: Add basic devlink support")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Acked-by: Elad Nachman <enachman@marvell.com>
Link: https://patch.msgid.link/20251230052124.897012-1-alok.a.tiwari@oracle.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/prestera/prestera_devlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
index 2a4c9df4eb79..e63d95c1842f 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -387,6 +387,8 @@ struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev)
 
 	dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch),
 			   dev->dev);
+	if (!dl)
+		return NULL;
 
 	return devlink_priv(dl);
 }

From d7065436e8a04520f60c18240b775861be7999d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20Bl=C3=B6chl?= <markus@blochl.de>
Date: Sun, 28 Dec 2025 16:52:59 +0100
Subject: [PATCH 08/70] net: bnge: add AUXILIARY_BUS to Kconfig dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The build can currently fail with

    ld: drivers/net/ethernet/broadcom/bnge/bnge_auxr.o: in function `bnge_rdma_aux_device_add':
    bnge_auxr.c:(.text+0x366): undefined reference to `__auxiliary_device_add'
    ld: drivers/net/ethernet/broadcom/bnge/bnge_auxr.o: in function `bnge_rdma_aux_device_init':
    bnge_auxr.c:(.text+0x43c): undefined reference to `auxiliary_device_init'

if BNGE is enabled but no other driver pulls in AUXILIARY_BUS.

Select AUXILIARY_BUS in BNGE like in all other drivers which create
an auxiliary_device.

Fixes: 8ac050ec3b1c ("bng_en: Add RoCE aux device support")
Signed-off-by: Markus Blöchl <markus@blochl.de>
Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com>
Link: https://patch.msgid.link/20251228-bnge_aux_bus-v1-1-82e273ebfdac@blochl.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index ca565ace6e6a..cd7dddeb91dd 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -259,6 +259,7 @@ config BNGE
 	depends on PCI
 	select NET_DEVLINK
 	select PAGE_POOL
+	select AUXILIARY_BUS
 	help
 	  This driver supports Broadcom ThorUltra 50/100/200/400/800 gigabit
 	  Ethernet cards. The module will be called bng_en. To compile this

From 3128df6be147768fe536986fbb85db1d37806a9f Mon Sep 17 00:00:00 2001
From: Alexandre Knecht <knecht.alexandre@gmail.com>
Date: Sun, 28 Dec 2025 03:00:57 +0100
Subject: [PATCH 09/70] bridge: fix C-VLAN preservation in 802.1ad vlan_tunnel
 egress

When using an 802.1ad bridge with vlan_tunnel, the C-VLAN tag is
incorrectly stripped from frames during egress processing.

br_handle_egress_vlan_tunnel() uses skb_vlan_pop() to remove the S-VLAN
from hwaccel before VXLAN encapsulation. However, skb_vlan_pop() also
moves any "next" VLAN from the payload into hwaccel:

    /* move next vlan tag to hw accel tag */
    __skb_vlan_pop(skb, &vlan_tci);
    __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);

For QinQ frames where the C-VLAN sits in the payload, this moves it to
hwaccel where it gets lost during VXLAN encapsulation.

Fix by calling __vlan_hwaccel_clear_tag() directly, which clears only
the hwaccel S-VLAN and leaves the payload untouched.

This path is only taken when vlan_tunnel is enabled and tunnel_info
is configured, so 802.1Q bridges are unaffected.

Tested with 802.1ad bridge + VXLAN vlan_tunnel, verified C-VLAN
preserved in VXLAN payload via tcpdump.

Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
Signed-off-by: Alexandre Knecht <knecht.alexandre@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20251228020057.2788865-1-knecht.alexandre@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/bridge/br_vlan_tunnel.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index a966a6ec8263..257cae9f1569 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -189,7 +189,6 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
 	struct metadata_dst *tunnel_dst;
 	__be64 tunnel_id;
-	int err;
 
 	if (!vlan)
 		return 0;
@@ -199,9 +198,13 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
 		return 0;
 
 	skb_dst_drop(skb);
-	err = skb_vlan_pop(skb);
-	if (err)
-		return err;
+	/* For 802.1ad (QinQ), skb_vlan_pop() incorrectly moves the C-VLAN
+	 * from payload to hwaccel after clearing S-VLAN. We only need to
+	 * clear the hwaccel S-VLAN; the C-VLAN must stay in payload for
+	 * correct VXLAN encapsulation. This is also correct for 802.1Q
+	 * where no C-VLAN exists in payload.
+	 */
+	__vlan_hwaccel_clear_tag(skb);
 
 	if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
 		__set_bit(IP_TUNNEL_KEY_BIT, flags);

From 34f3ff52cb9fa7dbf04f5c734fcc4cb6ed5d1a95 Mon Sep 17 00:00:00 2001
From: Jerry Wu <w.7erry@foxmail.com>
Date: Thu, 25 Dec 2025 20:36:17 +0000
Subject: [PATCH 10/70] net: mscc: ocelot: Fix crash when adding interface
 under a lag

Commit 15faa1f67ab4 ("lan966x: Fix crash when adding interface under a lag")
fixed a similar issue in the lan966x driver caused by a NULL pointer dereference.
The ocelot_set_aggr_pgids() function in the ocelot driver has similar logic
and is susceptible to the same crash.

This issue specifically affects the ocelot_vsc7514.c frontend, which leaves
unused ports as NULL pointers. The felix_vsc9959.c frontend is unaffected as
it uses the DSA framework which registers all ports.

Fix this by checking if the port pointer is valid before accessing it.

Fixes: 528d3f190c98 ("net: mscc: ocelot: drop the use of the "lags" array")
Signed-off-by: Jerry Wu <w.7erry@foxmail.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/tencent_75EF812B305E26B0869C673DD1160866C90A@qq.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mscc/ocelot.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 08bee56aea35..c345d9b17c89 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2307,14 +2307,16 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
 
 	/* Now, set PGIDs for each active LAG */
 	for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
-		struct net_device *bond = ocelot->ports[lag]->bond;
+		struct ocelot_port *ocelot_port = ocelot->ports[lag];
 		int num_active_ports = 0;
+		struct net_device *bond;
 		unsigned long bond_mask;
 		u8 aggr_idx[16];
 
-		if (!bond || (visited & BIT(lag)))
+		if (!ocelot_port || !ocelot_port->bond || (visited & BIT(lag)))
 			continue;
 
+		bond = ocelot_port->bond;
 		bond_mask = ocelot_get_bond_mask(ocelot, bond);
 
 		for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {

From 4c0856c225b39b1def6c9a6bc56faca79550da13 Mon Sep 17 00:00:00 2001
From: "yuan.gao" <yuan.gao@ucloud.cn>
Date: Wed, 24 Dec 2025 14:31:45 +0800
Subject: [PATCH 11/70] inet: ping: Fix icmp out counting

When the ping program uses an IPPROTO_ICMP socket to send ICMP_ECHO
messages, ICMP_MIB_OUTMSGS is counted twice.

    ping_v4_sendmsg
      ping_v4_push_pending_frames
        ip_push_pending_frames
          ip_finish_skb
            __ip_make_skb
              icmp_out_count(net, icmp_type); // first count
      icmp_out_count(sock_net(sk), user_icmph.type); // second count

However, when the ping program uses an IPPROTO_RAW socket,
ICMP_MIB_OUTMSGS is counted correctly only once.

Therefore, the first count should be removed.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Signed-off-by: yuan.gao <yuan.gao@ucloud.cn>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20251224063145.3615282-1-yuan.gao@ucloud.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/ping.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index ad56588107cc..cfbd563498e8 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -828,10 +828,8 @@ out:
 out_free:
 	if (free)
 		kfree(ipc.opt);
-	if (!err) {
-		icmp_out_count(sock_net(sk), user_icmph.type);
+	if (!err)
 		return len;
-	}
 	return err;
 
 do_confirm:

From 62f7edd59964eb588e96fce1ad35a2327ea54424 Mon Sep 17 00:00:00 2001
From: Stefano Radaelli <stefano.r@variscite.com>
Date: Tue, 23 Dec 2025 13:09:39 +0100
Subject: [PATCH 12/70] net: phy: mxl-86110: Add power management and soft
 reset support

Implement soft_reset, suspend, and resume callbacks using
genphy_soft_reset(), genphy_suspend(), and genphy_resume()
to fix PHY initialization and power management issues.

The soft_reset callback is needed to properly recover the PHY after an
ifconfig down/up cycle. Without it, the PHY can remain in power-down
state, causing MDIO register access failures during config_init().
The soft reset ensures the PHY is operational before configuration.

The suspend/resume callbacks enable proper power management during
system suspend/resume cycles.

Fixes: b2908a989c59 ("net: phy: add driver for MaxLinear MxL86110 PHY")
Signed-off-by: Stefano Radaelli <stefano.r@variscite.com>
Link: https://patch.msgid.link/20251223120940.407195-1-stefano.r@variscite.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/mxl-86110.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/mxl-86110.c b/drivers/net/phy/mxl-86110.c
index e5d137a37a1d..42a5fe3f115f 100644
--- a/drivers/net/phy/mxl-86110.c
+++ b/drivers/net/phy/mxl-86110.c
@@ -938,6 +938,9 @@ static struct phy_driver mxl_phy_drvs[] = {
 		PHY_ID_MATCH_EXACT(PHY_ID_MXL86110),
 		.name			= "MXL86110 Gigabit Ethernet",
 		.config_init		= mxl86110_config_init,
+		.suspend		= genphy_suspend,
+		.resume			= genphy_resume,
+		.soft_reset		= genphy_soft_reset,
 		.get_wol		= mxl86110_get_wol,
 		.set_wol		= mxl86110_set_wol,
 		.led_brightness_set	= mxl86110_led_brightness_set,

From 2a71a1a8d0ed718b1c7a9ac61f07e5755c47ae20 Mon Sep 17 00:00:00 2001
From: Weiming Shi <bestswngs@gmail.com>
Date: Wed, 24 Dec 2025 04:35:35 +0800
Subject: [PATCH 13/70] net: sock: fix hardened usercopy panic in
 sock_recv_errqueue

skbuff_fclone_cache was created without defining a usercopy region,
[1] unlike skbuff_head_cache which properly whitelists the cb[] field.
[2] This causes a usercopy BUG() when CONFIG_HARDENED_USERCOPY is
enabled and the kernel attempts to copy sk_buff.cb data to userspace
via sock_recv_errqueue() -> put_cmsg().

The crash occurs when: 1. TCP allocates an skb using alloc_skb_fclone()
   (from skbuff_fclone_cache) [1]
2. The skb is cloned via skb_clone() using the pre-allocated fclone
[3] 3. The cloned skb is queued to sk_error_queue for timestamp
reporting 4. Userspace reads the error queue via recvmsg(MSG_ERRQUEUE)
5. sock_recv_errqueue() calls put_cmsg() to copy serr->ee from skb->cb
[4] 6. __check_heap_object() fails because skbuff_fclone_cache has no
   usercopy whitelist [5]

When cloned skbs allocated from skbuff_fclone_cache are used in the
socket error queue, accessing the sock_exterr_skb structure in skb->cb
via put_cmsg() triggers a usercopy hardening violation:

[    5.379589] usercopy: Kernel memory exposure attempt detected from SLUB object 'skbuff_fclone_cache' (offset 296, size 16)!
[    5.382796] kernel BUG at mm/usercopy.c:102!
[    5.383923] Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
[    5.384903] CPU: 1 UID: 0 PID: 138 Comm: poc_put_cmsg Not tainted 6.12.57 #7
[    5.384903] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[    5.384903] RIP: 0010:usercopy_abort+0x6c/0x80
[    5.384903] Code: 1a 86 51 48 c7 c2 40 15 1a 86 41 52 48 c7 c7 c0 15 1a 86 48 0f 45 d6 48 c7 c6 80 15 1a 86 48 89 c1 49 0f 45 f3 e8 84 27 88 ff <0f> 0b 490
[    5.384903] RSP: 0018:ffffc900006f77a8 EFLAGS: 00010246
[    5.384903] RAX: 000000000000006f RBX: ffff88800f0ad2a8 RCX: 1ffffffff0f72e74
[    5.384903] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff87b973a0
[    5.384903] RBP: 0000000000000010 R08: 0000000000000000 R09: fffffbfff0f72e74
[    5.384903] R10: 0000000000000003 R11: 79706f6372657375 R12: 0000000000000001
[    5.384903] R13: ffff88800f0ad2b8 R14: ffffea00003c2b40 R15: ffffea00003c2b00
[    5.384903] FS:  0000000011bc4380(0000) GS:ffff8880bf100000(0000) knlGS:0000000000000000
[    5.384903] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.384903] CR2: 000056aa3b8e5fe4 CR3: 000000000ea26004 CR4: 0000000000770ef0
[    5.384903] PKRU: 55555554
[    5.384903] Call Trace:
[    5.384903]  <TASK>
[    5.384903]  __check_heap_object+0x9a/0xd0
[    5.384903]  __check_object_size+0x46c/0x690
[    5.384903]  put_cmsg+0x129/0x5e0
[    5.384903]  sock_recv_errqueue+0x22f/0x380
[    5.384903]  tls_sw_recvmsg+0x7ed/0x1960
[    5.384903]  ? srso_alias_return_thunk+0x5/0xfbef5
[    5.384903]  ? schedule+0x6d/0x270
[    5.384903]  ? srso_alias_return_thunk+0x5/0xfbef5
[    5.384903]  ? mutex_unlock+0x81/0xd0
[    5.384903]  ? __pfx_mutex_unlock+0x10/0x10
[    5.384903]  ? __pfx_tls_sw_recvmsg+0x10/0x10
[    5.384903]  ? _raw_spin_lock_irqsave+0x8f/0xf0
[    5.384903]  ? _raw_read_unlock_irqrestore+0x20/0x40
[    5.384903]  ? srso_alias_return_thunk+0x5/0xfbef5

The crash offset 296 corresponds to skb2->cb within skbuff_fclones:
  - sizeof(struct sk_buff) = 232 - offsetof(struct sk_buff, cb) = 40 -
  offset of skb2.cb in fclones = 232 + 40 = 272 - crash offset 296 =
  272 + 24 (inside sock_exterr_skb.ee)

This patch uses a local stack variable as a bounce buffer to avoid the hardened usercopy check failure.

[1] https://elixir.bootlin.com/linux/v6.12.62/source/net/ipv4/tcp.c#L885
[2] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5104
[3] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5566
[4] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5491
[5] https://elixir.bootlin.com/linux/v6.12.62/source/mm/slub.c#L5719

Fixes: 6d07d1cd300f ("usercopy: Restrict non-usercopy caches to size 0")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251223203534.1392218-2-bestswngs@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/sock.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 45c98bf524b2..a1c8b47b0d56 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3896,7 +3896,7 @@ void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
 		       int level, int type)
 {
-	struct sock_exterr_skb *serr;
+	struct sock_extended_err ee;
 	struct sk_buff *skb;
 	int copied, err;
 
@@ -3916,8 +3916,9 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
 
 	sock_recv_timestamp(msg, sk, skb);
 
-	serr = SKB_EXT_ERR(skb);
-	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
+	/* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
+	ee = SKB_EXT_ERR(skb)->ee;
+	put_cmsg(msg, level, type, sizeof(ee), &ee);
 
 	msg->msg_flags |= MSG_ERRQUEUE;
 	err = copied;

From 02d1e1a3f9239cdb3ecf2c6d365fb959d1bf39df Mon Sep 17 00:00:00 2001
From: Di Zhu <zhud@hygon.cn>
Date: Wed, 24 Dec 2025 09:22:24 +0800
Subject: [PATCH 14/70] netdev: preserve NETIF_F_ALL_FOR_ALL across TSO updates

Directly increment the TSO features incurs a side effect: it will also
directly clear the flags in NETIF_F_ALL_FOR_ALL on the master device,
which can cause issues such as the inability to enable the nocache copy
feature on the bonding driver.

The fix is to include NETIF_F_ALL_FOR_ALL in the update mask, thereby
preventing it from being cleared.

Fixes: b0ce3508b25e ("bonding: allow TSO being set on bonding master")
Signed-off-by: Di Zhu <zhud@hygon.cn>
Link: https://patch.msgid.link/20251224012224.56185-1-zhud@hygon.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5870a9e514a5..d99b0fbc1942 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5323,7 +5323,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
 static inline netdev_features_t netdev_add_tso_features(netdev_features_t features,
 							netdev_features_t mask)
 {
-	return netdev_increment_features(features, NETIF_F_ALL_TSO, mask);
+	return netdev_increment_features(features, NETIF_F_ALL_TSO |
+					 NETIF_F_ALL_FOR_ALL, mask);
 }
 
 int __netdev_update_features(struct net_device *dev);

From 31057979cdadfee9f934746fd84046b43506ba61 Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Thu, 25 Dec 2025 15:27:13 +0200
Subject: [PATCH 15/70] net/mlx5: Lag, multipath, give priority for routes with
 smaller network prefix

Today multipath offload is controlled by a single route and the route
controlling is selected if it meets one of the following criteria:
        1. No controlling route is set.
        2. New route destination is the same as old one.
        3. New route metric is lower than old route metric.

This can cause unwanted behaviour in case a new route is added
with a smaller network prefix which should get the priority.

Fix this by adding a new criteria to give priority to new route with
a smaller network prefix.

Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry")
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20251225132717.358820-2-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index aee17fcf3b36..cdc99fe5c956 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -173,10 +173,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
 	}
 
 	/* Handle multipath entry with lower priority value */
-	if (mp->fib.mfi && mp->fib.mfi != fi &&
+	if (mp->fib.mfi &&
 	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
-	    fi->fib_priority >= mp->fib.priority)
+	    mp->fib.dst_len <= fen_info->dst_len &&
+	    !(mp->fib.dst_len == fen_info->dst_len &&
+	      fi->fib_priority < mp->fib.priority)) {
+		mlx5_core_dbg(ldev->pf[idx].dev,
+			      "Multipath entry with lower priority was rejected\n");
 		return;
+	}
 
 	nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
 	nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);

From 6c75dc9de40ff91ec2b621b78f6cd9031762067c Mon Sep 17 00:00:00 2001
From: Alexei Lazar <alazar@nvidia.com>
Date: Thu, 25 Dec 2025 15:27:14 +0200
Subject: [PATCH 16/70] net/mlx5e: Don't gate FEC histograms on
 ppcnt_statistical_group

Currently, the ppcnt_statistical_group capability check
incorrectly gates access to FEC histogram statistics.
This capability applies only to statistical and physical
counter groups, not for histogram data.

Restrict the ppcnt_statistical_group check to the
Physical_Layer_Counters and Physical_Layer_Statistical_Counters
groups.
Histogram statistics access remains gated by the pphcr
capability.

The issue is harmless as of today, as it happens that
ppcnt_statistical_group is set on all existing devices that
have pphcr set.

Fixes: 6b81b8a0b197 ("net/mlx5e: Don't query FEC statistics when FEC is disabled")
Signed-off-by: Alexei Lazar <alazar@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20251225132717.358820-3-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index a2802cfc9b98..a8af84fc9763 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -1608,12 +1608,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
 {
 	int mode = fec_active_mode(priv->mdev);
 
-	if (mode == MLX5E_FEC_NOFEC ||
-	    !MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group))
+	if (mode == MLX5E_FEC_NOFEC)
 		return;
 
-	fec_set_corrected_bits_total(priv, fec_stats);
-	fec_set_block_stats(priv, mode, fec_stats);
+	if (MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) {
+		fec_set_corrected_bits_total(priv, fec_stats);
+		fec_set_block_stats(priv, mode, fec_stats);
+	}
 
 	if (MLX5_CAP_PCAM_REG(priv->mdev, pphcr))
 		fec_set_histograms_stats(priv, mode, hist);

From 7d36a4a8bf62dc508bc6bb4b59727aec25064ca5 Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Thu, 25 Dec 2025 15:27:15 +0200
Subject: [PATCH 17/70] net/mlx5e: Fix NULL pointer dereference in ioctl module
 EEPROM query

The mlx5_query_mcia() function unconditionally dereferences the status
pointer to store the MCIA register status value.
However, mlx5e_get_module_id() passes NULL since it doesn't need the
status value.

Add a NULL check before dereferencing the status pointer to prevent a
NULL pointer dereference.

Fixes: 2e4c44b12f4d ("net/mlx5: Refactor EEPROM query error handling to return status separately")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20251225132717.358820-4-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 85a9e534f442..8f36454dd196 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -393,9 +393,11 @@ static int mlx5_query_mcia(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
-	*status = MLX5_GET(mcia_reg, out, status);
-	if (*status)
+	if (MLX5_GET(mcia_reg, out, status)) {
+		if (status)
+			*status = MLX5_GET(mcia_reg, out, status);
 		return -EIO;
+	}
 
 	ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
 	memcpy(data, ptr, size);

From 144297e2a24e3e54aee1180ec21120ea38822b97 Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Thu, 25 Dec 2025 15:27:16 +0200
Subject: [PATCH 18/70] net/mlx5e: Don't print error message due to invalid
 module

Dumping module EEPROM on newer modules is supported through the netlink
interface only.

Querying with old userspace ethtool (or other tools, such as 'lshw')
which still uses the ioctl interface results in an error message that
could flood dmesg (in addition to the expected error return value).
The original message was added under the assumption that the driver
should be able to handle all module types, but now that such flows are
easily triggered from userspace, it doesn't serve its purpose.

Change the log level of the print in mlx5_query_module_eeprom() to
debug.

Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20251225132717.358820-5-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 8f36454dd196..7f8bed353e67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -431,7 +431,8 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 		mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
 		break;
 	default:
-		mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+		mlx5_core_dbg(dev, "Module ID not recognized: 0x%x\n",
+			      module_id);
 		return -EINVAL;
 	}
 

From 0462a15d2d1fafd3d48cf3c7c67393e42d03908c Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Thu, 25 Dec 2025 15:27:17 +0200
Subject: [PATCH 19/70] net/mlx5e: Dealloc forgotten PSP RX modify header

The commit which added RX steering rules for PSP forgot to free a modify
header HW object on the cleanup path, which lead to health errors when
reloading the driver and uninitializing the device:

mlx5_core 0000:08:00.0: poll_health:803:(pid 3021): Fatal error 3 detected

Fix that by saving the modify header pointer in the PSP steering struct
and deallocating it after freeing the rule which references it.

Fixes: 9536fbe10c9d ("net/mlx5e: Add PSP steering in local NIC RX")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20251225132717.358820-6-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/psp.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
index 38e7c77cc851..9a74438ce10a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
@@ -44,6 +44,7 @@ struct mlx5e_accel_fs_psp_prot {
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *miss_group;
 	struct mlx5_flow_handle *miss_rule;
+	struct mlx5_modify_hdr *rx_modify_hdr;
 	struct mlx5_flow_destination default_dest;
 	struct mlx5e_psp_rx_err rx_err;
 	u32 refcnt;
@@ -286,13 +287,19 @@ out_err:
 	return err;
 }
 
-static void accel_psp_fs_rx_fs_destroy(struct mlx5e_accel_fs_psp_prot *fs_prot)
+static void accel_psp_fs_rx_fs_destroy(struct mlx5e_psp_fs *fs,
+				       struct mlx5e_accel_fs_psp_prot *fs_prot)
 {
 	if (fs_prot->def_rule) {
 		mlx5_del_flow_rules(fs_prot->def_rule);
 		fs_prot->def_rule = NULL;
 	}
 
+	if (fs_prot->rx_modify_hdr) {
+		mlx5_modify_header_dealloc(fs->mdev, fs_prot->rx_modify_hdr);
+		fs_prot->rx_modify_hdr = NULL;
+	}
+
 	if (fs_prot->miss_rule) {
 		mlx5_del_flow_rules(fs_prot->miss_rule);
 		fs_prot->miss_rule = NULL;
@@ -396,6 +403,7 @@ static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs,
 		modify_hdr = NULL;
 		goto out_err;
 	}
+	fs_prot->rx_modify_hdr = modify_hdr;
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 			  MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT |
@@ -416,7 +424,7 @@ static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs,
 	goto out;
 
 out_err:
-	accel_psp_fs_rx_fs_destroy(fs_prot);
+	accel_psp_fs_rx_fs_destroy(fs, fs_prot);
 out:
 	kvfree(flow_group_in);
 	kvfree(spec);
@@ -433,7 +441,7 @@ static int accel_psp_fs_rx_destroy(struct mlx5e_psp_fs *fs, enum accel_fs_psp_ty
 	/* The netdev unreg already happened, so all offloaded rule are already removed */
 	fs_prot = &accel_psp->fs_prot[type];
 
-	accel_psp_fs_rx_fs_destroy(fs_prot);
+	accel_psp_fs_rx_fs_destroy(fs, fs_prot);
 
 	accel_psp_fs_rx_err_destroy_ft(fs, &fs_prot->rx_err);
 

From 8da901ffe497a53fa4ecc3ceed0e6d771586f88e Mon Sep 17 00:00:00 2001
From: Frank Liang <xiliang@redhat.com>
Date: Wed, 31 Dec 2025 22:58:08 +0800
Subject: [PATCH 20/70] net/ena: fix missing lock when update devlink params

Fix assert lock warning while calling devl_param_driverinit_value_set()
in ena.

WARNING: net/devlink/core.c:261 at devl_assert_locked+0x62/0x90, CPU#0: kworker/0:0/9
CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.19.0-rc2+ #1 PREEMPT(lazy)
Hardware name: Amazon EC2 m8i-flex.4xlarge/, BIOS 1.0 10/16/2017
Workqueue: events work_for_cpu_fn
RIP: 0010:devl_assert_locked+0x62/0x90

Call Trace:
 <TASK>
 devl_param_driverinit_value_set+0x15/0x1c0
 ena_devlink_alloc+0x18c/0x220 [ena]
 ? __pfx_ena_devlink_alloc+0x10/0x10 [ena]
 ? trace_hardirqs_on+0x18/0x140
 ? lockdep_hardirqs_on+0x8c/0x130
 ? __raw_spin_unlock_irqrestore+0x5d/0x80
 ? __raw_spin_unlock_irqrestore+0x46/0x80
 ? devm_ioremap_wc+0x9a/0xd0
 ena_probe+0x4d2/0x1b20 [ena]
 ? __lock_acquire+0x56a/0xbd0
 ? __pfx_ena_probe+0x10/0x10 [ena]
 ? local_clock+0x15/0x30
 ? __lock_release.isra.0+0x1c9/0x340
 ? mark_held_locks+0x40/0x70
 ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170
 ? trace_hardirqs_on+0x18/0x140
 ? lockdep_hardirqs_on+0x8c/0x130
 ? __raw_spin_unlock_irqrestore+0x5d/0x80
 ? __raw_spin_unlock_irqrestore+0x46/0x80
 ? __pfx_ena_probe+0x10/0x10 [ena]
 ......
 </TASK>

Fixes: 816b52624cf6 ("net: ena: Control PHC enable through devlink")
Signed-off-by: Frank Liang <xiliang@redhat.com>
Reviewed-by: David Arinzon <darinzon@amazon.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20251231145808.6103-1-xiliang@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amazon/ena/ena_devlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_devlink.c b/drivers/net/ethernet/amazon/ena/ena_devlink.c
index ac81c24016dd..4772185e669d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_devlink.c
+++ b/drivers/net/ethernet/amazon/ena/ena_devlink.c
@@ -53,10 +53,12 @@ void ena_devlink_disable_phc_param(struct devlink *devlink)
 {
 	union devlink_param_value value;
 
+	devl_lock(devlink);
 	value.vbool = false;
 	devl_param_driverinit_value_set(devlink,
 					DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC,
 					value);
+	devl_unlock(devlink);
 }
 
 static void ena_devlink_port_register(struct devlink *devlink)
@@ -145,10 +147,12 @@ static int ena_devlink_configure_params(struct devlink *devlink)
 		return rc;
 	}
 
+	devl_lock(devlink);
 	value.vbool = ena_phc_is_enabled(adapter);
 	devl_param_driverinit_value_set(devlink,
 					DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC,
 					value);
+	devl_unlock(devlink);
 
 	return 0;
 }

From 92e6e0a87f6860a4710f9494f8c704d498ae60f8 Mon Sep 17 00:00:00 2001
From: Zilin Guan <zilin@seu.edu.cn>
Date: Tue, 30 Dec 2025 07:18:53 +0000
Subject: [PATCH 21/70] net: wwan: iosm: Fix memory leak in ipc_mux_deinit()

Commit 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support")
allocated memory for pp_qlt in ipc_mux_init() but did not free it in
ipc_mux_deinit(). This results in a memory leak when the driver is
unloaded.

Free the allocated memory in ipc_mux_deinit() to fix the leak.

Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support")
Co-developed-by: Jianhao Xu <jianhao.xu@seu.edu.cn>
Signed-off-by: Jianhao Xu <jianhao.xu@seu.edu.cn>
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: Loic Poulain <loic.poulain@oss.qualcomm.com>
Link: https://patch.msgid.link/20251230071853.1062223-1-zilin@seu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wwan/iosm/iosm_ipc_mux.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c
index fc928b298a98..b846889fcb09 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c
@@ -456,6 +456,7 @@ void ipc_mux_deinit(struct iosm_mux *ipc_mux)
 	struct sk_buff_head *free_list;
 	union mux_msg mux_msg;
 	struct sk_buff *skb;
+	int i;
 
 	if (!ipc_mux->initialized)
 		return;
@@ -479,5 +480,10 @@ void ipc_mux_deinit(struct iosm_mux *ipc_mux)
 		ipc_mux->channel->dl_pipe.is_open = false;
 	}
 
+	if (ipc_mux->protocol != MUX_LITE) {
+		for (i = 0; i < IPC_MEM_MUX_IP_SESSION_ENTRIES; i++)
+			kfree(ipc_mux->ul_adb.pp_qlt[i]);
+	}
+
 	kfree(ipc_mux);
 }

From ffeafa65b2b26df2f5b5a6118d3174f17bd12ec5 Mon Sep 17 00:00:00 2001
From: Srijit Bose <srijit.bose@broadcom.com>
Date: Wed, 31 Dec 2025 00:36:25 -0800
Subject: [PATCH 22/70] bnxt_en: Fix potential data corruption with HW GRO/LRO

Fix the max number of bits passed to find_first_zero_bit() in
bnxt_alloc_agg_idx().  We were incorrectly passing the number of
long words.  find_first_zero_bit() may fail to find a zero bit and
cause a wrong ID to be used.  If the wrong ID is already in use, this
can cause data corruption.  Sometimes an error like this can also be
seen:

bnxt_en 0000:83:00.0 enp131s0np0: TPA end agg_buf 2 != expected agg_bufs 1

Fix it by passing the correct number of bits MAX_TPA_P5.  Use
DECLARE_BITMAP() to more cleanly define the bitmap.  Add a sanity
check to warn if a bit cannot be found and reset the ring [MChan].

Fixes: ec4d8e7cf024 ("bnxt_en: Add TPA ID mapping logic for 57500 chips.")
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Signed-off-by: Srijit Bose <srijit.bose@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Link: https://patch.msgid.link/20251231083625.3911652-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  4 +---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d17d0ea89c36..d160e54ac121 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1482,9 +1482,11 @@ static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
 	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 	u16 idx = agg_id & MAX_TPA_P5_MASK;
 
-	if (test_bit(idx, map->agg_idx_bmap))
-		idx = find_first_zero_bit(map->agg_idx_bmap,
-					  BNXT_AGG_IDX_BMAP_SIZE);
+	if (test_bit(idx, map->agg_idx_bmap)) {
+		idx = find_first_zero_bit(map->agg_idx_bmap, MAX_TPA_P5);
+		if (idx >= MAX_TPA_P5)
+			return INVALID_HW_RING_ID;
+	}
 	__set_bit(idx, map->agg_idx_bmap);
 	map->agg_id_tbl[agg_id] = idx;
 	return idx;
@@ -1548,6 +1550,13 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		agg_id = TPA_START_AGG_ID_P5(tpa_start);
 		agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
+		if (unlikely(agg_id == INVALID_HW_RING_ID)) {
+			netdev_warn(bp->dev, "Unable to allocate agg ID for ring %d, agg 0x%x\n",
+				    rxr->bnapi->index,
+				    TPA_START_AGG_ID_P5(tpa_start));
+			bnxt_sched_reset_rxr(bp, rxr);
+			return;
+		}
 	} else {
 		agg_id = TPA_START_AGG_ID(tpa_start);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f5f07a7e6b29..f88e7769a838 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1080,11 +1080,9 @@ struct bnxt_tpa_info {
 	struct rx_agg_cmp	*agg_arr;
 };
 
-#define BNXT_AGG_IDX_BMAP_SIZE	(MAX_TPA_P5 / BITS_PER_LONG)
-
 struct bnxt_tpa_idx_map {
 	u16		agg_id_tbl[1024];
-	unsigned long	agg_idx_bmap[BNXT_AGG_IDX_BMAP_SIZE];
+	DECLARE_BITMAP(agg_idx_bmap, MAX_TPA_P5);
 };
 
 struct bnxt_rx_ring_info {

From acb4bc6e1ba34ae1a34a9334a1ce8474c909466e Mon Sep 17 00:00:00 2001
From: Kommula Shiva Shankar <kshankar@marvell.com>
Date: Fri, 2 Jan 2026 15:49:00 +0530
Subject: [PATCH 23/70] virtio_net: fix device mismatch in
 devm_kzalloc/devm_kfree

Initial rss_hdr allocation uses virtio_device->device,
but virtnet_set_queues() frees using net_device->device.
This device mismatch causing below devres warning

[ 3788.514041] ------------[ cut here ]------------
[ 3788.514044] WARNING: drivers/base/devres.c:1095 at devm_kfree+0x84/0x98, CPU#16: vdpa/1463
[ 3788.514054] Modules linked in: octep_vdpa virtio_net virtio_vdpa [last unloaded: virtio_vdpa]
[ 3788.514064] CPU: 16 UID: 0 PID: 1463 Comm: vdpa Tainted: G        W           6.18.0 #10 PREEMPT
[ 3788.514067] Tainted: [W]=WARN
[ 3788.514069] Hardware name: Marvell CN106XX board (DT)
[ 3788.514071] pstate: 63400009 (nZCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
[ 3788.514074] pc : devm_kfree+0x84/0x98
[ 3788.514076] lr : devm_kfree+0x54/0x98
[ 3788.514079] sp : ffff800084e2f220
[ 3788.514080] x29: ffff800084e2f220 x28: ffff0003b2366000 x27: 000000000000003f
[ 3788.514085] x26: 000000000000003f x25: ffff000106f17c10 x24: 0000000000000080
[ 3788.514089] x23: ffff00045bb8ab08 x22: ffff00045bb8a000 x21: 0000000000000018
[ 3788.514093] x20: ffff0004355c3080 x19: ffff00045bb8aa00 x18: 0000000000080000
[ 3788.514098] x17: 0000000000000040 x16: 000000000000001f x15: 000000000007ffff
[ 3788.514102] x14: 0000000000000488 x13: 0000000000000005 x12: 00000000000fffff
[ 3788.514106] x11: ffffffffffffffff x10: 0000000000000005 x9 : ffff800080c8c05c
[ 3788.514110] x8 : ffff800084e2eeb8 x7 : 0000000000000000 x6 : 000000000000003f
[ 3788.514115] x5 : ffff8000831bafe0 x4 : ffff800080c8b010 x3 : ffff0004355c3080
[ 3788.514119] x2 : ffff0004355c3080 x1 : 0000000000000000 x0 : 0000000000000000
[ 3788.514123] Call trace:
[ 3788.514125]  devm_kfree+0x84/0x98 (P)
[ 3788.514129]  virtnet_set_queues+0x134/0x2e8 [virtio_net]
[ 3788.514135]  virtnet_probe+0x9c0/0xe00 [virtio_net]
[ 3788.514139]  virtio_dev_probe+0x1e0/0x338
[ 3788.514144]  really_probe+0xc8/0x3a0
[ 3788.514149]  __driver_probe_device+0x84/0x170
[ 3788.514152]  driver_probe_device+0x44/0x120
[ 3788.514155]  __device_attach_driver+0xc4/0x168
[ 3788.514158]  bus_for_each_drv+0x8c/0xf0
[ 3788.514161]  __device_attach+0xa4/0x1c0
[ 3788.514164]  device_initial_probe+0x1c/0x30
[ 3788.514168]  bus_probe_device+0xb4/0xc0
[ 3788.514170]  device_add+0x614/0x828
[ 3788.514173]  register_virtio_device+0x214/0x258
[ 3788.514175]  virtio_vdpa_probe+0xa0/0x110 [virtio_vdpa]
[ 3788.514179]  vdpa_dev_probe+0xa8/0xd8
[ 3788.514183]  really_probe+0xc8/0x3a0
[ 3788.514186]  __driver_probe_device+0x84/0x170
[ 3788.514189]  driver_probe_device+0x44/0x120
[ 3788.514192]  __device_attach_driver+0xc4/0x168
[ 3788.514195]  bus_for_each_drv+0x8c/0xf0
[ 3788.514197]  __device_attach+0xa4/0x1c0
[ 3788.514200]  device_initial_probe+0x1c/0x30
[ 3788.514203]  bus_probe_device+0xb4/0xc0
[ 3788.514206]  device_add+0x614/0x828
[ 3788.514209]  _vdpa_register_device+0x58/0x88
[ 3788.514211]  octep_vdpa_dev_add+0x104/0x228 [octep_vdpa]
[ 3788.514215]  vdpa_nl_cmd_dev_add_set_doit+0x2d0/0x3c0
[ 3788.514218]  genl_family_rcv_msg_doit+0xe4/0x158
[ 3788.514222]  genl_rcv_msg+0x218/0x298
[ 3788.514225]  netlink_rcv_skb+0x64/0x138
[ 3788.514229]  genl_rcv+0x40/0x60
[ 3788.514233]  netlink_unicast+0x32c/0x3b0
[ 3788.514237]  netlink_sendmsg+0x170/0x3b8
[ 3788.514241]  __sys_sendto+0x12c/0x1c0
[ 3788.514246]  __arm64_sys_sendto+0x30/0x48
[ 3788.514249]  invoke_syscall.constprop.0+0x58/0xf8
[ 3788.514255]  do_el0_svc+0x48/0xd0
[ 3788.514259]  el0_svc+0x48/0x210
[ 3788.514264]  el0t_64_sync_handler+0xa0/0xe8
[ 3788.514268]  el0t_64_sync+0x198/0x1a0
[ 3788.514271] ---[ end trace 0000000000000000 ]---

Fix by using virtio_device->device consistently for
allocation and deallocation

Fixes: 4944be2f5ad8c ("virtio_net: Allocate rss_hdr with devres")
Signed-off-by: Kommula Shiva Shankar <kshankar@marvell.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20260102101900.692770-1-kshankar@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1bb3aeca66c6..22d894101c01 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3791,7 +3791,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 	if (vi->has_rss && !netif_is_rxfh_configured(dev)) {
 		old_rss_hdr = vi->rss_hdr;
 		old_rss_trailer = vi->rss_trailer;
-		vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL);
+		vi->rss_hdr = devm_kzalloc(&vi->vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL);
 		if (!vi->rss_hdr) {
 			vi->rss_hdr = old_rss_hdr;
 			return -ENOMEM;
@@ -3802,7 +3802,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 
 		if (!virtnet_commit_rss_command(vi)) {
 			/* restore ctrl_rss if commit_rss_command failed */
-			devm_kfree(&dev->dev, vi->rss_hdr);
+			devm_kfree(&vi->vdev->dev, vi->rss_hdr);
 			vi->rss_hdr = old_rss_hdr;
 			vi->rss_trailer = old_rss_trailer;
 
@@ -3810,7 +3810,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 				 queue_pairs);
 			return -EINVAL;
 		}
-		devm_kfree(&dev->dev, old_rss_hdr);
+		devm_kfree(&vi->vdev->dev, old_rss_hdr);
 		goto succ;
 	}
 

From 2ef02ac38d3c17f34a00c4b267d961a8d4b45d1a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 2 Jan 2026 15:00:07 +0100
Subject: [PATCH 24/70] inet: frags: drop fraglist conntrack references

Jakub added a warning in nf_conntrack_cleanup_net_list() to make debugging
leaked skbs/conntrack references more obvious.

syzbot reports this as triggering, and I can also reproduce this via
ip_defrag.sh selftest:

 conntrack cleanup blocked for 60s
 WARNING: net/netfilter/nf_conntrack_core.c:2512
 [..]

conntrack clenups gets stuck because there are skbs with still hold nf_conn
references via their frag_list.

   net.core.skb_defer_max=0 makes the hang disappear.

Eric Dumazet points out that skb_release_head_state() doesn't follow the
fraglist.

ip_defrag.sh can only reproduce this problem since
commit 6471658dc66c ("udp: use skb_attempt_defer_free()"), but AFAICS this
problem could happen with TCP as well if pmtu discovery is off.

The relevant problem path for udp is:
1. netns emits fragmented packets
2. nf_defrag_v6_hook reassembles them (in output hook)
3. reassembled skb is tracked (skb owns nf_conn reference)
4. ip6_output refragments
5. refragmented packets also own nf_conn reference (ip6_fragment
   calls ip6_copy_metadata())
6. on input path, nf_defrag_v6_hook skips defragmentation: the
   fragments already have skb->nf_conn attached
7. skbs are reassembled via ipv6_frag_rcv()
8. skb_consume_udp -> skb_attempt_defer_free() -> skb ends up
   in pcpu freelist, but still has nf_conn reference.

Possible solutions:
 1 let defrag engine drop nf_conn entry, OR
 2 export kick_defer_list_purge() and call it from the conntrack
   netns exit callback, OR
 3 add skb_has_frag_list() check to skb_attempt_defer_free()

2 & 3 also solve ip_defrag.sh hang but share same drawback:

Such reassembled skbs, queued to socket, can prevent conntrack module
removal until userspace has consumed the packet. While both tcp and udp
stack do call nf_reset_ct() before placing skb on socket queue, that
function doesn't iterate frag_list skbs.

Therefore drop nf_conn entries when they are placed in defrag queue.
Keep the nf_conn entry of the first (offset 0) skb so that reassembled
skb retains nf_conn entry for sake of TX path.

Note that fixes tag is incorrect; it points to the commit introducing the
'ip_defrag.sh reproducible problem': no need to backport this patch to
every stable kernel.

Reported-by: syzbot+4393c47753b7808dac7d@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/693b0fa7.050a0220.4004e.040d.GAE@google.com/
Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()")
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260102140030.32367-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/inet_fragment.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 001ee5c4d962..4e6d7467ed44 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -488,6 +488,8 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
 	}
 
 	FRAG_CB(skb)->ip_defrag_offset = offset;
+	if (offset)
+		nf_reset_ct(skb);
 
 	return IPFRAG_OK;
 }

From 1806d210e5a8f431ad4711766ae4a333d407d972 Mon Sep 17 00:00:00 2001
From: Justin Iurman <justin.iurman@gmail.com>
Date: Sat, 3 Jan 2026 17:53:31 +0100
Subject: [PATCH 25/70] MAINTAINERS: Update email address for Justin Iurman

Due to a change of employer, I'll be using a permanent and personal
email address.

Signed-off-by: Justin Iurman <justin.iurman@gmail.com>
Link: https://patch.msgid.link/20260103165331.20120-1-justin.iurman@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .mailmap    | 1 +
 MAINTAINERS | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 7a6110d0e46d..7354436a19c0 100644
--- a/.mailmap
+++ b/.mailmap
@@ -416,6 +416,7 @@ Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
 Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
+Justin Iurman <justin.iurman@gmail.com> <justin.iurman@uliege.be>
 Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
 Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
 Kalle Valo <kvalo@kernel.org> <quic_kvalo@quicinc.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 765ad2daa218..410fd1f199f2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18283,7 +18283,7 @@ X:	net/wireless/
 X:	tools/testing/selftests/net/can/
 
 NETWORKING [IOAM]
-M:	Justin Iurman <justin.iurman@uliege.be>
+M:	Justin Iurman <justin.iurman@gmail.com>
 S:	Maintained
 F:	Documentation/networking/ioam6*
 F:	include/linux/ioam6*

From ce5e612dd411de096aa041b9e9325ba1bec5f9f4 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Mon, 29 Dec 2025 20:43:10 +0100
Subject: [PATCH 26/70] vsock: Make accept()ed sockets use custom setsockopt()

SO_ZEROCOPY handling in vsock_connectible_setsockopt() does not get called
on accept()ed sockets due to a missing flag. Flip it.

Fixes: e0718bd82e27 ("vsock: enable setting SO_ZEROCOPY")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Link: https://patch.msgid.link/20251229-vsock-child-sock-custom-sockopt-v2-1-64778d6c4f88@rbox.co
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/vmw_vsock/af_vsock.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index adcba1b7bf74..a3505a4dcee0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1787,6 +1787,10 @@ static int vsock_accept(struct socket *sock, struct socket *newsock,
 		} else {
 			newsock->state = SS_CONNECTED;
 			sock_graft(connected, newsock);
+
+			set_bit(SOCK_CUSTOM_SOCKOPT,
+				&connected->sk_socket->flags);
+
 			if (vsock_msgzerocopy_allow(vconnected->transport))
 				set_bit(SOCK_SUPPORT_ZC,
 					&connected->sk_socket->flags);

From caa20e9e155b9d2afcf658e2909659c0be45ec12 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Mon, 29 Dec 2025 20:43:11 +0100
Subject: [PATCH 27/70] vsock/test: Test setting SO_ZEROCOPY on accept()ed
 socket

Make sure setsockopt(SOL_SOCKET, SO_ZEROCOPY) on an accept()ed socket is
handled by vsock's implementation.

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Link: https://patch.msgid.link/20251229-vsock-child-sock-custom-sockopt-v2-2-64778d6c4f88@rbox.co
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/vsock/vsock_test.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 9e1250790f33..bbe3723babdc 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -2192,6 +2192,33 @@ static void test_stream_nolinger_server(const struct test_opts *opts)
 	close(fd);
 }
 
+static void test_stream_accepted_setsockopt_client(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_accepted_setsockopt_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	enable_so_zerocopy_check(fd);
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -2371,6 +2398,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_unread_bytes_client,
 		.run_server = test_seqpacket_unread_bytes_server,
 	},
+	{
+		.name = "SOCK_STREAM accept()ed socket custom setsockopt()",
+		.run_client = test_stream_accepted_setsockopt_client,
+		.run_server = test_stream_accepted_setsockopt_server,
+	},
 	{},
 };
 

From 9892353726ad222219aa18c329e3a3636134dd56 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Thu, 1 Jan 2026 08:56:07 -0500
Subject: [PATCH 28/70] net/sched: act_mirred: Fix leak when redirecting to
 self on egress

Whenever a mirred redirect to self on egress happens, mirred allocates a
new skb (skb_to_send). The loop to self check was done after that
allocation, but was not freeing the newly allocated skb, causing a leak.

Fix this by moving the if-statement to before the allocation of the new
skb.

The issue was found by running the accompanying tdc test in 2/2
with config kmemleak enabled.
After a few minutes the kmemleak thread ran and reported the leak coming from
mirred.

Fixes: 1d856251a009 ("net/sched: act_mirred: fix loop detection")
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260101135608.253079-2-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_mirred.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 91c96cc625bd..05e0b14b5773 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -266,21 +266,9 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
 		goto err_cant_do;
 	}
 
-	/* we could easily avoid the clone only if called by ingress and clsact;
-	 * since we can't easily detect the clsact caller, skip clone only for
-	 * ingress - that covers the TC S/W datapath.
-	 */
-	at_ingress = skb_at_tc_ingress(skb);
-	dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
-		tcf_mirred_can_reinsert(retval);
-	if (!dont_clone) {
-		skb_to_send = skb_clone(skb, GFP_ATOMIC);
-		if (!skb_to_send)
-			goto err_cant_do;
-	}
-
 	want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
 
+	at_ingress = skb_at_tc_ingress(skb);
 	if (dev == skb->dev && want_ingress == at_ingress) {
 		pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n",
 			       netdev_name(skb->dev),
@@ -290,6 +278,18 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
 		goto err_cant_do;
 	}
 
+	/* we could easily avoid the clone only if called by ingress and clsact;
+	 * since we can't easily detect the clsact caller, skip clone only for
+	 * ingress - that covers the TC S/W datapath.
+	 */
+	dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
+		tcf_mirred_can_reinsert(retval);
+	if (!dont_clone) {
+		skb_to_send = skb_clone(skb, GFP_ATOMIC);
+		if (!skb_to_send)
+			goto err_cant_do;
+	}
+
 	/* All mirred/redirected skbs should clear previous ct info */
 	nf_reset_ct(skb_to_send);
 	if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */

From 4bcd49a03b94a5e16717961a7d750d676f044436 Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Thu, 1 Jan 2026 08:56:08 -0500
Subject: [PATCH 29/70] selftests/tc-testing: Add test case redirecting to self
 on egress

Add single mirred test case that attempts to redirect to self on egress
using clsact

Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260101135608.253079-3-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/actions/mirred.json   | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index da156feabcbf..b056eb966871 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -1098,5 +1098,52 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY root"
         ]
+    },
+    {
+        "id": "4ed9",
+        "name": "Try to redirect to self on egress with clsact",
+        "category": [
+            "filter",
+            "mirred"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin"
+            ]
+        },
+        "setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY clsact",
+            "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1"
+        ],
+        "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC -j -s actions get action mirred index 1",
+        "matchJSON": [
+            {
+                "total acts": 0
+            },
+            {
+                "actions": [
+                    {
+                        "order": 1,
+                        "kind": "mirred",
+                        "mirred_action": "redirect",
+                        "direction": "egress",
+                        "index": 1,
+                        "stats": {
+                            "packets": 1,
+                            "overlimits": 1
+                        },
+                        "not_in_hw": true
+                    }
+                ]
+            }
+        ],
+        "teardown": [
+            "$TC qdisc del dev $DUMMY clsact"
+        ]
     }
+
 ]

From 2fa98059fd5a0936d0951bd14f8990ae0aa5272a Mon Sep 17 00:00:00 2001
From: Ankit Khushwaha <ankitkhushwaha.linux@gmail.com>
Date: Thu, 1 Jan 2026 22:58:40 +0530
Subject: [PATCH 30/70] selftests: mptcp: Mark xerror and die_perror __noreturn

Compiler reports potential uses of uninitialized variables in
mptcp_connect.c when xerror() is called from failure paths.

mptcp_connect.c:1262:11: warning: variable 'raw_addr' is used
      uninitialized whenever 'if' condition is false
      [-Wsometimes-uninitialized]

xerror() terminates execution by calling exit(), but it is not visible
to the compiler & assumes control flow may continue past the call.

Annotate xerror() with __noreturn so the compiler can correctly reason
about control flow and avoid false-positive uninitialized variable
warnings.

Signed-off-by: Ankit Khushwaha <ankitkhushwaha.linux@gmail.com>
Link: https://patch.msgid.link/20260101172840.90186-1-ankitkhushwaha.linux@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/Makefile        | 1 +
 tools/testing/selftests/net/mptcp/mptcp_connect.c | 3 ++-
 tools/testing/selftests/net/mptcp/mptcp_diag.c    | 3 ++-
 tools/testing/selftests/net/mptcp/mptcp_inq.c     | 5 +++--
 tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++--
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 15d144a25d82..4dd6278cd3dd 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -3,6 +3,7 @@
 top_srcdir = ../../../../..
 
 CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -I$(top_srcdir)/tools/include
 
 TEST_PROGS := \
 	diag.sh \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 404a77bf366a..10f6f99cfd4e 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -33,6 +33,7 @@
 #include <linux/tcp.h>
 #include <linux/time_types.h>
 #include <linux/sockios.h>
+#include <linux/compiler.h>
 
 extern int optind;
 
@@ -140,7 +141,7 @@ static void die_usage(void)
 	exit(1);
 }
 
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
 {
 	va_list ap;
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index e084796e804d..8e0b1b8d84b6 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -5,6 +5,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/inet_diag.h>
 #include <linux/netlink.h>
+#include <linux/compiler.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <linux/tcp.h>
@@ -87,7 +88,7 @@ enum {
 
 #define rta_getattr(type, value)		(*(type *)RTA_DATA(value))
 
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
 {
 	perror(msg);
 	exit(1);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 8e8f6441ad8b..5716998da192 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -28,6 +28,7 @@
 
 #include <linux/tcp.h>
 #include <linux/sockios.h>
+#include <linux/compiler.h>
 
 #ifndef IPPROTO_MPTCP
 #define IPPROTO_MPTCP 262
@@ -40,7 +41,7 @@ static int pf = AF_INET;
 static int proto_tx = IPPROTO_MPTCP;
 static int proto_rx = IPPROTO_MPTCP;
 
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
 {
 	perror(msg);
 	exit(1);
@@ -52,7 +53,7 @@ static void die_usage(int r)
 	exit(r);
 }
 
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
 {
 	va_list ap;
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 286164f7246e..b6e58d936ebe 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -25,6 +25,7 @@
 #include <netinet/in.h>
 
 #include <linux/tcp.h>
+#include <linux/compiler.h>
 
 static int pf = AF_INET;
 
@@ -127,7 +128,7 @@ struct so_state {
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #endif
 
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
 {
 	perror(msg);
 	exit(1);
@@ -139,7 +140,7 @@ static void die_usage(int r)
 	exit(r);
 }
 
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
 {
 	va_list ap;
 

From a7fc8c641cab855824c45e5e8877e40fd528b5df Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 2 Jan 2026 12:29:38 +0100
Subject: [PATCH 31/70] net: airoha: Fix npu rx DMA definitions

Fix typos in npu rx DMA descriptor definitions.

Fixes: b3ef7bdec66fb ("net: airoha: Add airoha_offload.h header")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260102-airoha-npu-dma-rx-def-fixes-v1-1-205fc6bf7d94@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/airoha/airoha_offload.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
index 4d23cbb7d407..ab64ecdf39a0 100644
--- a/include/linux/soc/airoha/airoha_offload.h
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -71,12 +71,12 @@ static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev,
 #define NPU_RX1_DESC_NUM	512
 
 /* CTRL */
-#define NPU_RX_DMA_DESC_LAST_MASK	BIT(29)
-#define NPU_RX_DMA_DESC_LEN_MASK	GENMASK(28, 15)
-#define NPU_RX_DMA_DESC_CUR_LEN_MASK	GENMASK(14, 1)
+#define NPU_RX_DMA_DESC_LAST_MASK	BIT(27)
+#define NPU_RX_DMA_DESC_LEN_MASK	GENMASK(26, 14)
+#define NPU_RX_DMA_DESC_CUR_LEN_MASK	GENMASK(13, 1)
 #define NPU_RX_DMA_DESC_DONE_MASK	BIT(0)
 /* INFO */
-#define NPU_RX_DMA_PKT_COUNT_MASK	GENMASK(31, 28)
+#define NPU_RX_DMA_PKT_COUNT_MASK	GENMASK(31, 29)
 #define NPU_RX_DMA_PKT_ID_MASK		GENMASK(28, 26)
 #define NPU_RX_DMA_SRC_PORT_MASK	GENMASK(25, 21)
 #define NPU_RX_DMA_CRSN_MASK		GENMASK(20, 16)

From 86c22d475cbca80532da33ed5df73e6814d7a632 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sun, 4 Jan 2026 08:52:32 -0800
Subject: [PATCH 32/70] netlink: specs: netdev: clarify the page pool API a
 little

The phrasing of the page-pool-get doc is very confusing.
It's supposed to highlight that support depends on the driver
doing its part but it sounds like orphaned page pools won't
be visible.

The description of the ifindex is completely wrong.
We move the page pool to loopback and skip the attribute if
ifindex is loopback.

Link: https://lore.kernel.org/20260104084347.5de3a537@kernel.org
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://patch.msgid.link/20260104165232.710460-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 82bf5cb2617d..596c306ce52b 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -142,7 +142,7 @@ attribute-sets:
         name: ifindex
         doc: |
           ifindex of the netdev to which the pool belongs.
-          May be reported as 0 if the page pool was allocated for a netdev
+          May not be reported if the page pool was allocated for a netdev
           which got destroyed already (page pools may outlast their netdevs
           because they wait for all memory to be returned).
         type: u32
@@ -601,7 +601,9 @@ operations:
       name: page-pool-get
       doc: |
         Get / dump information about Page Pools.
-        (Only Page Pools associated with a net_device can be listed.)
+        Only Page Pools associated by the driver with a net_device
+        can be listed. ifindex will not be reported if the net_device
+        no longer exists.
       attribute-set: page-pool
       do:
         request:

From 238e03d0466239410b72294b79494e43d4fabe77 Mon Sep 17 00:00:00 2001
From: Mohammad Heib <mheib@redhat.com>
Date: Sun, 4 Jan 2026 23:31:01 +0200
Subject: [PATCH 33/70] net: fix memory leak in skb_segment_list for GRO
 packets

When skb_segment_list() is called during packet forwarding, it handles
packets that were aggregated by the GRO engine.

Historically, the segmentation logic in skb_segment_list assumes that
individual segments are split from a parent SKB and may need to carry
their own socket memory accounting. Accordingly, the code transfers
truesize from the parent to the newly created segments.

Prior to commit ed4cccef64c1 ("gro: fix ownership transfer"), this
truesize subtraction in skb_segment_list() was valid because fragments
still carry a reference to the original socket.

However, commit ed4cccef64c1 ("gro: fix ownership transfer") changed
this behavior by ensuring that fraglist entries are explicitly
orphaned (skb->sk = NULL) to prevent illegal orphaning later in the
stack. This change meant that the entire socket memory charge remained
with the head SKB, but the corresponding accounting logic in
skb_segment_list() was never updated.

As a result, the current code unconditionally adds each fragment's
truesize to delta_truesize and subtracts it from the parent SKB. Since
the fragments are no longer charged to the socket, this subtraction
results in an effective under-count of memory when the head is freed.
This causes sk_wmem_alloc to remain non-zero, preventing socket
destruction and leading to a persistent memory leak.

The leak can be observed via KMEMLEAK when tearing down the networking
environment:

unreferenced object 0xffff8881e6eb9100 (size 2048):
  comm "ping", pid 6720, jiffies 4295492526
  backtrace:
    kmem_cache_alloc_noprof+0x5c6/0x800
    sk_prot_alloc+0x5b/0x220
    sk_alloc+0x35/0xa00
    inet6_create.part.0+0x303/0x10d0
    __sock_create+0x248/0x640
    __sys_socket+0x11b/0x1d0

Since skb_segment_list() is exclusively used for SKB_GSO_FRAGLIST
packets constructed by GRO, the truesize adjustment is removed.

The call to skb_release_head_state() must be preserved. As documented in
commit cf673ed0e057 ("net: fix fraglist segmentation reference count
leak"), it is still required to correctly drop references to SKB
extensions that may be overwritten during __copy_skb_header().

Fixes: ed4cccef64c1 ("gro: fix ownership transfer")
Signed-off-by: Mohammad Heib <mheib@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260104213101.352887-1-mheib@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/skbuff.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a00808f7be6a..a56133902c0d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4636,12 +4636,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 {
 	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
 	unsigned int tnl_hlen = skb_tnl_header_len(skb);
-	unsigned int delta_truesize = 0;
 	unsigned int delta_len = 0;
 	struct sk_buff *tail = NULL;
 	struct sk_buff *nskb, *tmp;
 	int len_diff, err;
 
+	/* Only skb_gro_receive_list generated skbs arrive here */
+	DEBUG_NET_WARN_ON_ONCE(!(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST));
+
 	skb_push(skb, -skb_network_offset(skb) + offset);
 
 	/* Ensure the head is writeable before touching the shared info */
@@ -4655,8 +4657,9 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 		nskb = list_skb;
 		list_skb = list_skb->next;
 
+		DEBUG_NET_WARN_ON_ONCE(nskb->sk);
+
 		err = 0;
-		delta_truesize += nskb->truesize;
 		if (skb_shared(nskb)) {
 			tmp = skb_clone(nskb, GFP_ATOMIC);
 			if (tmp) {
@@ -4699,7 +4702,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 			goto err_linearize;
 	}
 
-	skb->truesize = skb->truesize - delta_truesize;
 	skb->data_len = skb->data_len - delta_len;
 	skb->len = skb->len - delta_len;
 

From 083029bd8b445595222a3cd14076b880781c1765 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Thu, 20 Nov 2025 16:12:14 -0800
Subject: [PATCH 34/70] idpf: keep the netdev when a reset fails

During a successful reset the driver would re-allocate vport resources
while keeping the netdevs intact. However, in case of an error in the
init task, the netdev of the failing vport will be unregistered,
effectively removing the network interface:

[  121.211076] idpf 0000:83:00.0: enabling device (0100 -> 0102)
[  121.221976] idpf 0000:83:00.0: Device HW Reset initiated
[  124.161229] idpf 0000:83:00.0 ens801f0: renamed from eth0
[  124.163364] idpf 0000:83:00.0 ens801f0d1: renamed from eth1
[  125.934656] idpf 0000:83:00.0 ens801f0d2: renamed from eth2
[  128.218429] idpf 0000:83:00.0 ens801f0d3: renamed from eth3

ip -br a
ens801f0         UP
ens801f0d1       UP
ens801f0d2       UP
ens801f0d3       UP
echo 1 > /sys/class/net/ens801f0/device/reset

[  145.885537] idpf 0000:83:00.0: resetting
[  145.990280] idpf 0000:83:00.0: reset done
[  146.284766] idpf 0000:83:00.0: HW reset detected
[  146.296610] idpf 0000:83:00.0: Device HW Reset initiated
[  211.556719] idpf 0000:83:00.0: Transaction timed-out (op:526 cookie:7700 vc_op:526 salt:77 timeout:60000ms)
[  272.996705] idpf 0000:83:00.0: Transaction timed-out (op:502 cookie:7800 vc_op:502 salt:78 timeout:60000ms)

ip -br a
ens801f0d1       DOWN
ens801f0d2       DOWN
ens801f0d3       DOWN

Re-shuffle the logic in the error path of the init task to make sure the
netdevs remain intact. This will allow the driver to attempt recovery via
subsequent resets, provided the FW is still functional.

The main change is to make sure that idpf_decfg_netdev() is not called
should the init task fail during a reset. The error handling is
consolidated under unwind_vports, as the removed labels had the same
cleanup logic split depending on the point of failure.

Fixes: ce1b75d0635c ("idpf: add ptypes and MAC filter support")
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 7ce4eb71a433..313803c08847 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1579,6 +1579,10 @@ void idpf_init_task(struct work_struct *work)
 		goto unwind_vports;
 	}
 
+	err = idpf_send_get_rx_ptype_msg(vport);
+	if (err)
+		goto unwind_vports;
+
 	index = vport->idx;
 	vport_config = adapter->vport_config[index];
 
@@ -1590,15 +1594,11 @@ void idpf_init_task(struct work_struct *work)
 	err = idpf_check_supported_desc_ids(vport);
 	if (err) {
 		dev_err(&pdev->dev, "failed to get required descriptor ids\n");
-		goto cfg_netdev_err;
+		goto unwind_vports;
 	}
 
 	if (idpf_cfg_netdev(vport))
-		goto cfg_netdev_err;
-
-	err = idpf_send_get_rx_ptype_msg(vport);
-	if (err)
-		goto handle_err;
+		goto unwind_vports;
 
 	/* Once state is put into DOWN, driver is ready for dev_open */
 	np = netdev_priv(vport->netdev);
@@ -1645,11 +1645,6 @@ void idpf_init_task(struct work_struct *work)
 
 	return;
 
-handle_err:
-	idpf_decfg_netdev(vport);
-cfg_netdev_err:
-	idpf_vport_rel(vport);
-	adapter->vports[index] = NULL;
 unwind_vports:
 	if (default_vport) {
 		for (index = 0; index < adapter->max_vports; index++) {

From 2e281e1155fc476c571c0bd2ffbfe28ab829a5c3 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Thu, 20 Nov 2025 16:12:15 -0800
Subject: [PATCH 35/70] idpf: detach and close netdevs while handling a reset

Protect the reset path from callbacks by setting the netdevs to detached
state and close any netdevs in UP state until the reset handling has
completed. During a reset, the driver will de-allocate resources for the
vport, and there is no guarantee that those will recover, which is why the
existing vport_ctrl_lock does not provide sufficient protection.

idpf_detach_and_close() is called right before reset handling. If the
reset handling succeeds, the netdevs state is recovered via call to
idpf_attach_and_open(). If the reset handling fails the netdevs remain
down. The detach/down calls are protected with RTNL lock to avoid racing
with callbacks. On the recovery side the attach can be done without
holding the RTNL lock as there are no callbacks expected at that point,
due to detach/close always being done first in that flow.

The previous logic restoring the netdevs state based on the
IDPF_VPORT_UP_REQUESTED flag in the init task is not needed anymore, hence
the removal of idpf_set_vport_state(). The IDPF_VPORT_UP_REQUESTED is
still being used to restore the state of the netdevs following the reset,
but has no use outside of the reset handling flow.

idpf_init_hard_reset() is converted to void, since it was used as such and
there is no error handling being done based on its return value.

Before this change, invoking hard and soft resets simultaneously will
cause the driver to lose the vport state:
ip -br a
<inf>	UP
echo 1 > /sys/class/net/ens801f0/device/reset& \
ethtool -L ens801f0 combined 8
ip -br a
<inf>	DOWN
ip link set <inf> up
ip -br a
<inf>	DOWN

Also in case of a failure in the reset path, the netdev is left
exposed to external callbacks, while vport resources are not
initialized, leading to a crash on subsequent ifup/down:
[408471.398966] idpf 0000:83:00.0: HW reset detected
[408471.411744] idpf 0000:83:00.0: Device HW Reset initiated
[408472.277901] idpf 0000:83:00.0: The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x2
[408508.125551] BUG: kernel NULL pointer dereference, address: 0000000000000078
[408508.126112] #PF: supervisor read access in kernel mode
[408508.126687] #PF: error_code(0x0000) - not-present page
[408508.127256] PGD 2aae2f067 P4D 0
[408508.127824] Oops: Oops: 0000 [#1] SMP NOPTI
...
[408508.130871] RIP: 0010:idpf_stop+0x39/0x70 [idpf]
...
[408508.139193] Call Trace:
[408508.139637]  <TASK>
[408508.140077]  __dev_close_many+0xbb/0x260
[408508.140533]  __dev_change_flags+0x1cf/0x280
[408508.140987]  netif_change_flags+0x26/0x70
[408508.141434]  dev_change_flags+0x3d/0xb0
[408508.141878]  devinet_ioctl+0x460/0x890
[408508.142321]  inet_ioctl+0x18e/0x1d0
[408508.142762]  ? _copy_to_user+0x22/0x70
[408508.143207]  sock_do_ioctl+0x3d/0xe0
[408508.143652]  sock_ioctl+0x10e/0x330
[408508.144091]  ? find_held_lock+0x2b/0x80
[408508.144537]  __x64_sys_ioctl+0x96/0xe0
[408508.144979]  do_syscall_64+0x79/0x3d0
[408508.145415]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[408508.145860] RIP: 0033:0x7f3e0bb4caff

Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration")
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c | 121 ++++++++++++---------
 1 file changed, 72 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 313803c08847..a964e0f5891e 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -729,6 +729,65 @@ static int idpf_init_mac_addr(struct idpf_vport *vport,
 	return 0;
 }
 
+static void idpf_detach_and_close(struct idpf_adapter *adapter)
+{
+	int max_vports = adapter->max_vports;
+
+	for (int i = 0; i < max_vports; i++) {
+		struct net_device *netdev = adapter->netdevs[i];
+
+		/* If the interface is in detached state, that means the
+		 * previous reset was not handled successfully for this
+		 * vport.
+		 */
+		if (!netif_device_present(netdev))
+			continue;
+
+		/* Hold RTNL to protect racing with callbacks */
+		rtnl_lock();
+		netif_device_detach(netdev);
+		if (netif_running(netdev)) {
+			set_bit(IDPF_VPORT_UP_REQUESTED,
+				adapter->vport_config[i]->flags);
+			dev_close(netdev);
+		}
+		rtnl_unlock();
+	}
+}
+
+static void idpf_attach_and_open(struct idpf_adapter *adapter)
+{
+	int max_vports = adapter->max_vports;
+
+	for (int i = 0; i < max_vports; i++) {
+		struct idpf_vport *vport = adapter->vports[i];
+		struct idpf_vport_config *vport_config;
+		struct net_device *netdev;
+
+		/* In case of a critical error in the init task, the vport
+		 * will be freed. Only continue to restore the netdevs
+		 * if the vport is allocated.
+		 */
+		if (!vport)
+			continue;
+
+		/* No need for RTNL on attach as this function is called
+		 * following detach and dev_close(). We do take RTNL for
+		 * dev_open() below as it can race with external callbacks
+		 * following the call to netif_device_attach().
+		 */
+		netdev = adapter->netdevs[i];
+		netif_device_attach(netdev);
+		vport_config = adapter->vport_config[vport->idx];
+		if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED,
+				       vport_config->flags)) {
+			rtnl_lock();
+			dev_open(netdev, NULL);
+			rtnl_unlock();
+		}
+	}
+}
+
 /**
  * idpf_cfg_netdev - Allocate, configure and register a netdev
  * @vport: main vport structure
@@ -1041,10 +1100,11 @@ static void idpf_vport_dealloc(struct idpf_vport *vport)
 	idpf_idc_deinit_vport_aux_device(vport->vdev_info);
 
 	idpf_deinit_mac_addr(vport);
-	idpf_vport_stop(vport, true);
 
-	if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+	if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) {
+		idpf_vport_stop(vport, true);
 		idpf_decfg_netdev(vport);
+	}
 	if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
 		idpf_del_all_mac_filters(vport);
 
@@ -1544,7 +1604,6 @@ void idpf_init_task(struct work_struct *work)
 	struct idpf_vport_config *vport_config;
 	struct idpf_vport_max_q max_q;
 	struct idpf_adapter *adapter;
-	struct idpf_netdev_priv *np;
 	struct idpf_vport *vport;
 	u16 num_default_vports;
 	struct pci_dev *pdev;
@@ -1600,12 +1659,6 @@ void idpf_init_task(struct work_struct *work)
 	if (idpf_cfg_netdev(vport))
 		goto unwind_vports;
 
-	/* Once state is put into DOWN, driver is ready for dev_open */
-	np = netdev_priv(vport->netdev);
-	clear_bit(IDPF_VPORT_UP, np->state);
-	if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags))
-		idpf_vport_open(vport, true);
-
 	/* Spawn and return 'idpf_init_task' work queue until all the
 	 * default vports are created
 	 */
@@ -1781,27 +1834,6 @@ static int idpf_check_reset_complete(struct idpf_hw *hw,
 	return -EBUSY;
 }
 
-/**
- * idpf_set_vport_state - Set the vport state to be after the reset
- * @adapter: Driver specific private structure
- */
-static void idpf_set_vport_state(struct idpf_adapter *adapter)
-{
-	u16 i;
-
-	for (i = 0; i < adapter->max_vports; i++) {
-		struct idpf_netdev_priv *np;
-
-		if (!adapter->netdevs[i])
-			continue;
-
-		np = netdev_priv(adapter->netdevs[i]);
-		if (test_bit(IDPF_VPORT_UP, np->state))
-			set_bit(IDPF_VPORT_UP_REQUESTED,
-				adapter->vport_config[i]->flags);
-	}
-}
-
 /**
  * idpf_init_hard_reset - Initiate a hardware reset
  * @adapter: Driver specific private structure
@@ -1810,28 +1842,17 @@ static void idpf_set_vport_state(struct idpf_adapter *adapter)
  * reallocate. Also reinitialize the mailbox. Return 0 on success,
  * negative on failure.
  */
-static int idpf_init_hard_reset(struct idpf_adapter *adapter)
+static void idpf_init_hard_reset(struct idpf_adapter *adapter)
 {
 	struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops;
 	struct device *dev = &adapter->pdev->dev;
-	struct net_device *netdev;
 	int err;
-	u16 i;
 
+	idpf_detach_and_close(adapter);
 	mutex_lock(&adapter->vport_ctrl_lock);
 
 	dev_info(dev, "Device HW Reset initiated\n");
 
-	/* Avoid TX hangs on reset */
-	for (i = 0; i < adapter->max_vports; i++) {
-		netdev = adapter->netdevs[i];
-		if (!netdev)
-			continue;
-
-		netif_carrier_off(netdev);
-		netif_tx_disable(netdev);
-	}
-
 	/* Prepare for reset */
 	if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
 		reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD);
@@ -1840,7 +1861,6 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
 
 		idpf_idc_issue_reset_event(adapter->cdev_info);
 
-		idpf_set_vport_state(adapter);
 		idpf_vc_core_deinit(adapter);
 		if (!is_reset)
 			reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET);
@@ -1887,11 +1907,14 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
 unlock_mutex:
 	mutex_unlock(&adapter->vport_ctrl_lock);
 
-	/* Wait until all vports are created to init RDMA CORE AUX */
-	if (!err)
-		err = idpf_idc_init(adapter);
-
-	return err;
+	/* Attempt to restore netdevs and initialize RDMA CORE AUX device,
+	 * provided vc_core_init succeeded. It is still possible that
+	 * vports are not allocated at this point if the init task failed.
+	 */
+	if (!err) {
+		idpf_attach_and_open(adapter);
+		idpf_idc_init(adapter);
+	}
 }
 
 /**

From f6242b354605faff263ca45882b148200915a3f6 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Thu, 20 Nov 2025 16:12:16 -0800
Subject: [PATCH 36/70] idpf: fix memory leak in idpf_vport_rel()

Free vport->rx_ptype_lkup in idpf_vport_rel() to avoid leaking memory
during a reset. Reported by kmemleak:

unreferenced object 0xff450acac838a000 (size 4096):
  comm "kworker/u258:5", pid 7732, jiffies 4296830044
  hex dump (first 32 bytes):
    00 00 00 00 00 10 00 00 00 10 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00  ................
  backtrace (crc 3da81902):
    __kmalloc_cache_noprof+0x469/0x7a0
    idpf_send_get_rx_ptype_msg+0x90/0x570 [idpf]
    idpf_init_task+0x1ec/0x8d0 [idpf]
    process_one_work+0x226/0x6d0
    worker_thread+0x19e/0x340
    kthread+0x10f/0x250
    ret_from_fork+0x251/0x2b0
    ret_from_fork_asm+0x1a/0x30

Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration")
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index a964e0f5891e..04af10cfaa8c 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1082,6 +1082,8 @@ static void idpf_vport_rel(struct idpf_vport *vport)
 		kfree(adapter->vport_config[idx]->req_qs_chunks);
 		adapter->vport_config[idx]->req_qs_chunks = NULL;
 	}
+	kfree(vport->rx_ptype_lkup);
+	vport->rx_ptype_lkup = NULL;
 	kfree(vport);
 	adapter->num_alloc_vports--;
 }

From e111cbc4adf9f9974eed040aeece7e17460f6bff Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Thu, 20 Nov 2025 16:12:17 -0800
Subject: [PATCH 37/70] idpf: fix memory leak in idpf_vc_core_deinit()

Make sure to free hw->lan_regs. Reported by kmemleak during reset:

unreferenced object 0xff1b913d02a936c0 (size 96):
  comm "kworker/u258:14", pid 2174, jiffies 4294958305
  hex dump (first 32 bytes):
    00 00 00 c0 a8 ba 2d ff 00 00 00 00 00 00 00 00  ......-.........
    00 00 40 08 00 00 00 00 00 00 25 b3 a8 ba 2d ff  ..@.......%...-.
  backtrace (crc 36063c4f):
    __kmalloc_noprof+0x48f/0x890
    idpf_vc_core_init+0x6ce/0x9b0 [idpf]
    idpf_vc_event_task+0x1fb/0x350 [idpf]
    process_one_work+0x226/0x6d0
    worker_thread+0x19e/0x340
    kthread+0x10f/0x250
    ret_from_fork+0x251/0x2b0
    ret_from_fork_asm+0x1a/0x30

Fixes: 6aa53e861c1a ("idpf: implement get LAN MMIO memory regions")
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Joshua Hay <joshua.a.hay@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index 5bbe7d9294c1..01bbd12a642a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -3570,6 +3570,7 @@ init_failed:
  */
 void idpf_vc_core_deinit(struct idpf_adapter *adapter)
 {
+	struct idpf_hw *hw = &adapter->hw;
 	bool remove_in_prog;
 
 	if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags))
@@ -3593,6 +3594,9 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter)
 
 	idpf_vport_params_buf_rel(adapter);
 
+	kfree(hw->lan_regs);
+	hw->lan_regs = NULL;
+
 	kfree(adapter->vports);
 	adapter->vports = NULL;
 

From 4d792219fe6f891b5b557a607ac8a0a14eda6e38 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Thu, 20 Nov 2025 16:12:18 -0800
Subject: [PATCH 38/70] idpf: fix error handling in the init_task on load

If the init_task fails during a driver load, we end up without vports and
netdevs, effectively failing the entire process. In that state a
subsequent reset will result in a crash as the service task attempts to
access uninitialized resources. Following trace is from an error in the
init_task where the CREATE_VPORT (op 501) is rejected by the FW:

[40922.763136] idpf 0000:83:00.0: Device HW Reset initiated
[40924.449797] idpf 0000:83:00.0: Transaction failed (op 501)
[40958.148190] idpf 0000:83:00.0: HW reset detected
[40958.161202] BUG: kernel NULL pointer dereference, address: 00000000000000a8
...
[40958.168094] Workqueue: idpf-0000:83:00.0-vc_event idpf_vc_event_task [idpf]
[40958.168865] RIP: 0010:idpf_vc_event_task+0x9b/0x350 [idpf]
...
[40958.177932] Call Trace:
[40958.178491]  <TASK>
[40958.179040]  process_one_work+0x226/0x6d0
[40958.179609]  worker_thread+0x19e/0x340
[40958.180158]  ? __pfx_worker_thread+0x10/0x10
[40958.180702]  kthread+0x10f/0x250
[40958.181238]  ? __pfx_kthread+0x10/0x10
[40958.181774]  ret_from_fork+0x251/0x2b0
[40958.182307]  ? __pfx_kthread+0x10/0x10
[40958.182834]  ret_from_fork_asm+0x1a/0x30
[40958.183370]  </TASK>

Fix the error handling in the init_task to make sure the service and
mailbox tasks are disabled if the error happens during load. These are
started in idpf_vc_core_init(), which spawns the init_task and has no way
of knowing if it failed. If the error happens on reset, following
successful driver load, the tasks can still run, as that will allow the
netdevs to attempt recovery through another reset. Stop the PTP callbacks
either way as those will be restarted by the call to idpf_vc_core_init()
during a successful reset.

Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration")
Reported-by: Vivek Kumar <iamvivekkumar@google.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 04af10cfaa8c..e2ee8b137421 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1690,10 +1690,9 @@ void idpf_init_task(struct work_struct *work)
 		set_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags);
 	}
 
-	/* As all the required vports are created, clear the reset flag
-	 * unconditionally here in case we were in reset and the link was down.
-	 */
+	/* Clear the reset and load bits as all vports are created */
 	clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+	clear_bit(IDPF_HR_DRV_LOAD, adapter->flags);
 	/* Start the statistics task now */
 	queue_delayed_work(adapter->stats_wq, &adapter->stats_task,
 			   msecs_to_jiffies(10 * (pdev->devfn & 0x07)));
@@ -1707,6 +1706,15 @@ unwind_vports:
 				idpf_vport_dealloc(adapter->vports[index]);
 		}
 	}
+	/* Cleanup after vc_core_init, which has no way of knowing the
+	 * init task failed on driver load.
+	 */
+	if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
+		cancel_delayed_work_sync(&adapter->serv_task);
+		cancel_delayed_work_sync(&adapter->mbx_task);
+	}
+	idpf_ptp_release(adapter);
+
 	clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
 }
 
@@ -1856,7 +1864,7 @@ static void idpf_init_hard_reset(struct idpf_adapter *adapter)
 	dev_info(dev, "Device HW Reset initiated\n");
 
 	/* Prepare for reset */
-	if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
+	if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
 		reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD);
 	} else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) {
 		bool is_reset = idpf_is_reset_detected(adapter);

From f9841bd28b600526ca4f6713b0ca49bf7bb98452 Mon Sep 17 00:00:00 2001
From: Sreedevi Joshi <sreedevi.joshi@intel.com>
Date: Tue, 30 Sep 2025 16:23:51 -0500
Subject: [PATCH 39/70] idpf: fix memory leak of flow steer list on rmmod

The flow steering list maintains entries that are added and removed as
ethtool creates and deletes flow steering rules. Module removal with active
entries causes memory leak as the list is not properly cleaned up.

Prevent this by iterating through the remaining entries in the list and
freeing the associated memory during module removal. Add a spinlock
(flow_steer_list_lock) to protect the list access from multiple threads.

Fixes: ada3e24b84a0 ("idpf: add flow steering support")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Mina Almasry <almasrymina@google.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf.h        |  2 ++
 .../net/ethernet/intel/idpf/idpf_ethtool.c    | 15 ++++++++--
 drivers/net/ethernet/intel/idpf/idpf_lib.c    | 28 ++++++++++++++++++-
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index 8cfc68cbfa06..a61821333f5d 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -558,6 +558,7 @@ struct idpf_vector_lifo {
  * @max_q: Maximum possible queues
  * @req_qs_chunks: Queue chunk data for requested queues
  * @mac_filter_list_lock: Lock to protect mac filters
+ * @flow_steer_list_lock: Lock to protect fsteer filters
  * @flags: See enum idpf_vport_config_flags
  */
 struct idpf_vport_config {
@@ -565,6 +566,7 @@ struct idpf_vport_config {
 	struct idpf_vport_max_q max_q;
 	struct virtchnl2_add_queues *req_qs_chunks;
 	spinlock_t mac_filter_list_lock;
+	spinlock_t flow_steer_list_lock;
 	DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS);
 };
 
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
index 2589e124e41c..00481fec8179 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -37,6 +37,7 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 {
 	struct idpf_netdev_priv *np = netdev_priv(netdev);
 	struct idpf_vport_user_config_data *user_config;
+	struct idpf_vport_config *vport_config;
 	struct idpf_fsteer_fltr *f;
 	struct idpf_vport *vport;
 	unsigned int cnt = 0;
@@ -44,7 +45,8 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 
 	idpf_vport_ctrl_lock(netdev);
 	vport = idpf_netdev_to_vport(netdev);
-	user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
+	vport_config = np->adapter->vport_config[np->vport_idx];
+	user_config = &vport_config->user_config;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXCLSRLCNT:
@@ -53,15 +55,18 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		break;
 	case ETHTOOL_GRXCLSRULE:
 		err = -EINVAL;
+		spin_lock_bh(&vport_config->flow_steer_list_lock);
 		list_for_each_entry(f, &user_config->flow_steer_list, list)
 			if (f->loc == cmd->fs.location) {
 				cmd->fs.ring_cookie = f->q_index;
 				err = 0;
 				break;
 			}
+		spin_unlock_bh(&vport_config->flow_steer_list_lock);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
 		cmd->data = idpf_fsteer_max_rules(vport);
+		spin_lock_bh(&vport_config->flow_steer_list_lock);
 		list_for_each_entry(f, &user_config->flow_steer_list, list) {
 			if (cnt == cmd->rule_cnt) {
 				err = -EMSGSIZE;
@@ -72,6 +77,7 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		}
 		if (!err)
 			cmd->rule_cnt = user_config->num_fsteer_fltrs;
+		spin_unlock_bh(&vport_config->flow_steer_list_lock);
 		break;
 	default:
 		break;
@@ -240,6 +246,7 @@ static int idpf_add_flow_steer(struct net_device *netdev,
 
 	fltr->loc = fsp->location;
 	fltr->q_index = q_index;
+	spin_lock_bh(&vport_config->flow_steer_list_lock);
 	list_for_each_entry(f, &user_config->flow_steer_list, list) {
 		if (f->loc >= fltr->loc)
 			break;
@@ -250,6 +257,7 @@ static int idpf_add_flow_steer(struct net_device *netdev,
 		 list_add(&fltr->list, &user_config->flow_steer_list);
 
 	user_config->num_fsteer_fltrs++;
+	spin_unlock_bh(&vport_config->flow_steer_list_lock);
 
 out:
 	kfree(rule);
@@ -302,17 +310,20 @@ static int idpf_del_flow_steer(struct net_device *netdev,
 		goto out;
 	}
 
+	spin_lock_bh(&vport_config->flow_steer_list_lock);
 	list_for_each_entry_safe(f, iter,
 				 &user_config->flow_steer_list, list) {
 		if (f->loc == fsp->location) {
 			list_del(&f->list);
 			kfree(f);
 			user_config->num_fsteer_fltrs--;
-			goto out;
+			goto out_unlock;
 		}
 	}
 	err = -EINVAL;
 
+out_unlock:
+	spin_unlock_bh(&vport_config->flow_steer_list_lock);
 out:
 	kfree(rule);
 	return err;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index e2ee8b137421..d56366e676cf 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -442,6 +442,29 @@ send_dealloc_vecs:
 	return err;
 }
 
+/**
+ * idpf_del_all_flow_steer_filters - Delete all flow steer filters in list
+ * @vport: main vport struct
+ *
+ * Takes flow_steer_list_lock spinlock.  Deletes all filters
+ */
+static void idpf_del_all_flow_steer_filters(struct idpf_vport *vport)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_fsteer_fltr *f, *ftmp;
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+
+	spin_lock_bh(&vport_config->flow_steer_list_lock);
+	list_for_each_entry_safe(f, ftmp, &vport_config->user_config.flow_steer_list,
+				 list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+	vport_config->user_config.num_fsteer_fltrs = 0;
+	spin_unlock_bh(&vport_config->flow_steer_list_lock);
+}
+
 /**
  * idpf_find_mac_filter - Search filter list for specific mac filter
  * @vconfig: Vport config structure
@@ -1107,8 +1130,10 @@ static void idpf_vport_dealloc(struct idpf_vport *vport)
 		idpf_vport_stop(vport, true);
 		idpf_decfg_netdev(vport);
 	}
-	if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+	if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) {
 		idpf_del_all_mac_filters(vport);
+		idpf_del_all_flow_steer_filters(vport);
+	}
 
 	if (adapter->netdevs[i]) {
 		struct idpf_netdev_priv *np = netdev_priv(adapter->netdevs[i]);
@@ -1648,6 +1673,7 @@ void idpf_init_task(struct work_struct *work)
 	vport_config = adapter->vport_config[index];
 
 	spin_lock_init(&vport_config->mac_filter_list_lock);
+	spin_lock_init(&vport_config->flow_steer_list_lock);
 
 	INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
 	INIT_LIST_HEAD(&vport_config->user_config.flow_steer_list);

From 36aae2ea6bd76b8246caa50e34a4f4824f0a3be8 Mon Sep 17 00:00:00 2001
From: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
Date: Tue, 30 Sep 2025 16:23:52 -0500
Subject: [PATCH 40/70] idpf: fix issue with ethtool -n command display

When ethtool -n is executed on an interface to display the flow steering
rules, "rxclass: Unknown flow type" error is generated.

The flow steering list maintained in the driver currently stores only the
location and q_index but other fields of the ethtool_rx_flow_spec are not
stored. This may be enough for the virtchnl command to delete the entry.
However, when the ethtool -n command is used to query the flow steering
rules, the ethtool_rx_flow_spec returned is not complete causing the
error below.

Resolve this by storing the flow spec (fsp) when rules are added and
returning the complete flow spec when rules are queried.

Also, change the return value from EINVAL to ENOENT when flow steering
entry is not found during query by location or when deleting an entry.

Add logic to detect and reject duplicate filter entries at the same
location and change logic to perform upfront validation of all error
conditions before adding flow rules through virtchnl. This avoids the
need for additional virtchnl delete messages when subsequent operations
fail, which was missing in the original upstream code.

Example:
Before the fix:
ethtool -n eth1
2 RX rings available
Total 2 rules

rxclass: Unknown flow type
rxclass: Unknown flow type

After the fix:
ethtool -n eth1
2 RX rings available
Total 2 rules

Filter: 0
        Rule Type: TCP over IPv4
        Src IP addr: 10.0.0.1 mask: 0.0.0.0
        Dest IP addr: 0.0.0.0 mask: 255.255.255.255
        TOS: 0x0 mask: 0xff
        Src port: 0 mask: 0xffff
        Dest port: 0 mask: 0xffff
        Action: Direct to queue 0

Filter: 1
        Rule Type: UDP over IPv4
        Src IP addr: 10.0.0.1 mask: 0.0.0.0
        Dest IP addr: 0.0.0.0 mask: 255.255.255.255
        TOS: 0x0 mask: 0xff
        Src port: 0 mask: 0xffff
        Dest port: 0 mask: 0xffff
        Action: Direct to queue 0

Fixes: ada3e24b84a0 ("idpf: add flow steering support")
Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
Co-developed-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Mina Almasry <almasrymina@google.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf.h        |  3 +-
 .../net/ethernet/intel/idpf/idpf_ethtool.c    | 59 ++++++++++++-------
 2 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index a61821333f5d..dab36c0c3cdc 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -284,8 +284,7 @@ struct idpf_port_stats {
 
 struct idpf_fsteer_fltr {
 	struct list_head list;
-	u32 loc;
-	u32 q_index;
+	struct ethtool_rx_flow_spec fs;
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
index 00481fec8179..7000f6283a33 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -54,11 +54,15 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		cmd->data = idpf_fsteer_max_rules(vport);
 		break;
 	case ETHTOOL_GRXCLSRULE:
-		err = -EINVAL;
+		err = -ENOENT;
 		spin_lock_bh(&vport_config->flow_steer_list_lock);
 		list_for_each_entry(f, &user_config->flow_steer_list, list)
-			if (f->loc == cmd->fs.location) {
-				cmd->fs.ring_cookie = f->q_index;
+			if (f->fs.location == cmd->fs.location) {
+				/* Avoid infoleak from padding: zero first,
+				 * then assign fields
+				 */
+				memset(&cmd->fs, 0, sizeof(cmd->fs));
+				cmd->fs = f->fs;
 				err = 0;
 				break;
 			}
@@ -72,7 +76,7 @@ static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 				err = -EMSGSIZE;
 				break;
 			}
-			rule_locs[cnt] = f->loc;
+			rule_locs[cnt] = f->fs.location;
 			cnt++;
 		}
 		if (!err)
@@ -174,7 +178,7 @@ static int idpf_add_flow_steer(struct net_device *netdev,
 	struct idpf_vport *vport;
 	u32 flow_type, q_index;
 	u16 num_rxq;
-	int err;
+	int err = 0;
 
 	vport = idpf_netdev_to_vport(netdev);
 	vport_config = vport->adapter->vport_config[np->vport_idx];
@@ -200,6 +204,29 @@ static int idpf_add_flow_steer(struct net_device *netdev,
 	if (!rule)
 		return -ENOMEM;
 
+	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+	if (!fltr) {
+		err = -ENOMEM;
+		goto out_free_rule;
+	}
+
+	/* detect duplicate entry and reject before adding rules */
+	spin_lock_bh(&vport_config->flow_steer_list_lock);
+	list_for_each_entry(f, &user_config->flow_steer_list, list) {
+		if (f->fs.location == fsp->location) {
+			err = -EEXIST;
+			break;
+		}
+
+		if (f->fs.location > fsp->location)
+			break;
+		parent = f;
+	}
+	spin_unlock_bh(&vport_config->flow_steer_list_lock);
+
+	if (err)
+		goto out;
+
 	rule->vport_id = cpu_to_le32(vport->vport_id);
 	rule->count = cpu_to_le32(1);
 	info = &rule->rule_info[0];
@@ -238,28 +265,20 @@ static int idpf_add_flow_steer(struct net_device *netdev,
 		goto out;
 	}
 
-	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
-	if (!fltr) {
-		err = -ENOMEM;
-		goto out;
-	}
+	/* Save a copy of the user's flow spec so ethtool can later retrieve it */
+	fltr->fs = *fsp;
 
-	fltr->loc = fsp->location;
-	fltr->q_index = q_index;
 	spin_lock_bh(&vport_config->flow_steer_list_lock);
-	list_for_each_entry(f, &user_config->flow_steer_list, list) {
-		if (f->loc >= fltr->loc)
-			break;
-		parent = f;
-	}
-
 	parent ? list_add(&fltr->list, &parent->list) :
 		 list_add(&fltr->list, &user_config->flow_steer_list);
 
 	user_config->num_fsteer_fltrs++;
 	spin_unlock_bh(&vport_config->flow_steer_list_lock);
+	goto out_free_rule;
 
 out:
+	kfree(fltr);
+out_free_rule:
 	kfree(rule);
 	return err;
 }
@@ -313,14 +332,14 @@ static int idpf_del_flow_steer(struct net_device *netdev,
 	spin_lock_bh(&vport_config->flow_steer_list_lock);
 	list_for_each_entry_safe(f, iter,
 				 &user_config->flow_steer_list, list) {
-		if (f->loc == fsp->location) {
+		if (f->fs.location == fsp->location) {
 			list_del(&f->list);
 			kfree(f);
 			user_config->num_fsteer_fltrs--;
 			goto out_unlock;
 		}
 	}
-	err = -EINVAL;
+	err = -ENOENT;
 
 out_unlock:
 	spin_unlock_bh(&vport_config->flow_steer_list_lock);

From 83f38f210b85676f40ba8586b5a8edae19b56995 Mon Sep 17 00:00:00 2001
From: Sreedevi Joshi <sreedevi.joshi@intel.com>
Date: Mon, 24 Nov 2025 12:47:48 -0600
Subject: [PATCH 41/70] idpf: Fix RSS LUT NULL pointer crash on early ethtool
 operations

The RSS LUT is not initialized until the interface comes up, causing
the following NULL pointer crash when ethtool operations like rxhash on/off
are performed before the interface is brought up for the first time.

Move RSS LUT initialization from ndo_open to vport creation to ensure LUT
is always available. This enables RSS configuration via ethtool before
bringing the interface up. Simplify LUT management by maintaining all
changes in the driver's soft copy and programming zeros to the indirection
table when rxhash is disabled. Defer HW programming until the interface
comes up if it is down during rxhash and LUT configuration changes.

Steps to reproduce:
** Load idpf driver; interfaces will be created
	modprobe idpf
** Before bringing the interfaces up, turn rxhash off
	ethtool -K eth2 rxhash off

[89408.371875] BUG: kernel NULL pointer dereference, address: 0000000000000000
[89408.371908] #PF: supervisor read access in kernel mode
[89408.371924] #PF: error_code(0x0000) - not-present page
[89408.371940] PGD 0 P4D 0
[89408.371953] Oops: Oops: 0000 [#1] SMP NOPTI
<snip>
[89408.372052] RIP: 0010:memcpy_orig+0x16/0x130
[89408.372310] Call Trace:
[89408.372317]  <TASK>
[89408.372326]  ? idpf_set_features+0xfc/0x180 [idpf]
[89408.372363]  __netdev_update_features+0x295/0xde0
[89408.372384]  ethnl_set_features+0x15e/0x460
[89408.372406]  genl_family_rcv_msg_doit+0x11f/0x180
[89408.372429]  genl_rcv_msg+0x1ad/0x2b0
[89408.372446]  ? __pfx_ethnl_set_features+0x10/0x10
[89408.372465]  ? __pfx_genl_rcv_msg+0x10/0x10
[89408.372482]  netlink_rcv_skb+0x58/0x100
[89408.372502]  genl_rcv+0x2c/0x50
[89408.372516]  netlink_unicast+0x289/0x3e0
[89408.372533]  netlink_sendmsg+0x215/0x440
[89408.372551]  __sys_sendto+0x234/0x240
[89408.372571]  __x64_sys_sendto+0x28/0x30
[89408.372585]  x64_sys_call+0x1909/0x1da0
[89408.372604]  do_syscall_64+0x7a/0xfa0
[89408.373140]  ? clear_bhb_loop+0x60/0xb0
[89408.373647]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[89408.378887]  </TASK>
<snip>

Fixes: a251eee62133 ("idpf: add SRIOV support and other ndo_ops")
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf.h        |  2 -
 drivers/net/ethernet/intel/idpf/idpf_lib.c    | 94 +++++++++----------
 drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 36 +++----
 drivers/net/ethernet/intel/idpf/idpf_txrx.h   |  4 +-
 .../net/ethernet/intel/idpf/idpf_virtchnl.c   |  9 +-
 5 files changed, 66 insertions(+), 79 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index dab36c0c3cdc..1bf7934d4e28 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -423,14 +423,12 @@ enum idpf_user_flags {
  * @rss_key: RSS hash key
  * @rss_lut_size: Size of RSS lookup table
  * @rss_lut: RSS lookup table
- * @cached_lut: Used to restore previously init RSS lut
  */
 struct idpf_rss_data {
 	u16 rss_key_size;
 	u8 *rss_key;
 	u16 rss_lut_size;
 	u32 *rss_lut;
-	u32 *cached_lut;
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index d56366e676cf..51716e5a84ef 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1073,7 +1073,7 @@ static void idpf_vport_rel(struct idpf_vport *vport)
 	u16 idx = vport->idx;
 
 	vport_config = adapter->vport_config[vport->idx];
-	idpf_deinit_rss(vport);
+	idpf_deinit_rss_lut(vport);
 	rss_data = &vport_config->user_config.rss_data;
 	kfree(rss_data->rss_key);
 	rss_data->rss_key = NULL;
@@ -1226,6 +1226,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
 	u16 idx = adapter->next_vport;
 	struct idpf_vport *vport;
 	u16 num_max_q;
+	int err;
 
 	if (idx == IDPF_NO_FREE_SLOT)
 		return NULL;
@@ -1276,10 +1277,11 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
 
 	idpf_vport_init(vport, max_q);
 
-	/* This alloc is done separate from the LUT because it's not strictly
-	 * dependent on how many queues we have. If we change number of queues
-	 * and soft reset we'll need a new LUT but the key can remain the same
-	 * for as long as the vport exists.
+	/* LUT and key are both initialized here. Key is not strictly dependent
+	 * on how many queues we have. If we change number of queues and soft
+	 * reset is initiated, LUT will be freed and a new LUT will be allocated
+	 * as per the updated number of queues during vport bringup. However,
+	 * the key remains the same for as long as the vport exists.
 	 */
 	rss_data = &adapter->vport_config[idx]->user_config.rss_data;
 	rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL);
@@ -1289,6 +1291,11 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
 	/* Initialize default rss key */
 	netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size);
 
+	/* Initialize default rss LUT */
+	err = idpf_init_rss_lut(vport);
+	if (err)
+		goto free_rss_key;
+
 	/* fill vport slot in the adapter struct */
 	adapter->vports[idx] = vport;
 	adapter->vport_ids[idx] = idpf_get_vport_id(vport);
@@ -1299,6 +1306,8 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
 
 	return vport;
 
+free_rss_key:
+	kfree(rss_data->rss_key);
 free_vector_idxs:
 	kfree(vport->q_vector_idxs);
 free_vport:
@@ -1476,6 +1485,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
 	struct idpf_adapter *adapter = vport->adapter;
 	struct idpf_vport_config *vport_config;
+	struct idpf_rss_data *rss_data;
 	int err;
 
 	if (test_bit(IDPF_VPORT_UP, np->state))
@@ -1570,12 +1580,21 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 	idpf_restore_features(vport);
 
 	vport_config = adapter->vport_config[vport->idx];
-	if (vport_config->user_config.rss_data.rss_lut)
-		err = idpf_config_rss(vport);
-	else
-		err = idpf_init_rss(vport);
+	rss_data = &vport_config->user_config.rss_data;
+
+	if (!rss_data->rss_lut) {
+		err = idpf_init_rss_lut(vport);
+		if (err) {
+			dev_err(&adapter->pdev->dev,
+				"Failed to initialize RSS LUT for vport %u: %d\n",
+				vport->vport_id, err);
+			goto disable_vport;
+		}
+	}
+
+	err = idpf_config_rss(vport);
 	if (err) {
-		dev_err(&adapter->pdev->dev, "Failed to initialize RSS for vport %u: %d\n",
+		dev_err(&adapter->pdev->dev, "Failed to configure RSS for vport %u: %d\n",
 			vport->vport_id, err);
 		goto disable_vport;
 	}
@@ -1584,7 +1603,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to complete interface up for vport %u: %d\n",
 			vport->vport_id, err);
-		goto deinit_rss;
+		goto disable_vport;
 	}
 
 	if (rtnl)
@@ -1592,8 +1611,6 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 
 	return 0;
 
-deinit_rss:
-	idpf_deinit_rss(vport);
 disable_vport:
 	idpf_send_disable_vport_msg(vport);
 disable_queues:
@@ -2051,7 +2068,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 		idpf_vport_stop(vport, false);
 	}
 
-	idpf_deinit_rss(vport);
+	idpf_deinit_rss_lut(vport);
 	/* We're passing in vport here because we need its wait_queue
 	 * to send a message and it should be getting all the vport
 	 * config data out of the adapter but we need to be careful not
@@ -2219,40 +2236,6 @@ static void idpf_set_rx_mode(struct net_device *netdev)
 		dev_err(dev, "Failed to set promiscuous mode: %d\n", err);
 }
 
-/**
- * idpf_vport_manage_rss_lut - disable/enable RSS
- * @vport: the vport being changed
- *
- * In the event of disable request for RSS, this function will zero out RSS
- * LUT, while in the event of enable request for RSS, it will reconfigure RSS
- * LUT with the default LUT configuration.
- */
-static int idpf_vport_manage_rss_lut(struct idpf_vport *vport)
-{
-	bool ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH);
-	struct idpf_rss_data *rss_data;
-	u16 idx = vport->idx;
-	int lut_size;
-
-	rss_data = &vport->adapter->vport_config[idx]->user_config.rss_data;
-	lut_size = rss_data->rss_lut_size * sizeof(u32);
-
-	if (ena) {
-		/* This will contain the default or user configured LUT */
-		memcpy(rss_data->rss_lut, rss_data->cached_lut, lut_size);
-	} else {
-		/* Save a copy of the current LUT to be restored later if
-		 * requested.
-		 */
-		memcpy(rss_data->cached_lut, rss_data->rss_lut, lut_size);
-
-		/* Zero out the current LUT to disable */
-		memset(rss_data->rss_lut, 0, lut_size);
-	}
-
-	return idpf_config_rss(vport);
-}
-
 /**
  * idpf_set_features - set the netdev feature flags
  * @netdev: ptr to the netdev being adjusted
@@ -2278,10 +2261,19 @@ static int idpf_set_features(struct net_device *netdev,
 	}
 
 	if (changed & NETIF_F_RXHASH) {
+		struct idpf_netdev_priv *np = netdev_priv(netdev);
+
 		netdev->features ^= NETIF_F_RXHASH;
-		err = idpf_vport_manage_rss_lut(vport);
-		if (err)
-			goto unlock_mutex;
+
+		/* If the interface is not up when changing the rxhash, update
+		 * to the HW is skipped. The updated LUT will be committed to
+		 * the HW when the interface is brought up.
+		 */
+		if (test_bit(IDPF_VPORT_UP, np->state)) {
+			err = idpf_config_rss(vport);
+			if (err)
+				goto unlock_mutex;
+		}
 	}
 
 	if (changed & NETIF_F_GRO_HW) {
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 1d91c56f7469..8991a891a440 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -4650,57 +4650,47 @@ static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport)
 
 	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
 
-	for (i = 0; i < rss_data->rss_lut_size; i++) {
+	for (i = 0; i < rss_data->rss_lut_size; i++)
 		rss_data->rss_lut[i] = i % num_active_rxq;
-		rss_data->cached_lut[i] = rss_data->rss_lut[i];
-	}
 }
 
 /**
- * idpf_init_rss - Allocate and initialize RSS resources
+ * idpf_init_rss_lut - Allocate and initialize RSS LUT
  * @vport: virtual port
  *
- * Return 0 on success, negative on failure
+ * Return: 0 on success, negative on failure
  */
-int idpf_init_rss(struct idpf_vport *vport)
+int idpf_init_rss_lut(struct idpf_vport *vport)
 {
 	struct idpf_adapter *adapter = vport->adapter;
 	struct idpf_rss_data *rss_data;
-	u32 lut_size;
 
 	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+	if (!rss_data->rss_lut) {
+		u32 lut_size;
 
-	lut_size = rss_data->rss_lut_size * sizeof(u32);
-	rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL);
-	if (!rss_data->rss_lut)
-		return -ENOMEM;
-
-	rss_data->cached_lut = kzalloc(lut_size, GFP_KERNEL);
-	if (!rss_data->cached_lut) {
-		kfree(rss_data->rss_lut);
-		rss_data->rss_lut = NULL;
-
-		return -ENOMEM;
+		lut_size = rss_data->rss_lut_size * sizeof(u32);
+		rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL);
+		if (!rss_data->rss_lut)
+			return -ENOMEM;
 	}
 
 	/* Fill the default RSS lut values */
 	idpf_fill_dflt_rss_lut(vport);
 
-	return idpf_config_rss(vport);
+	return 0;
 }
 
 /**
- * idpf_deinit_rss - Release RSS resources
+ * idpf_deinit_rss_lut - Release RSS LUT
  * @vport: virtual port
  */
-void idpf_deinit_rss(struct idpf_vport *vport)
+void idpf_deinit_rss_lut(struct idpf_vport *vport)
 {
 	struct idpf_adapter *adapter = vport->adapter;
 	struct idpf_rss_data *rss_data;
 
 	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
-	kfree(rss_data->cached_lut);
-	rss_data->cached_lut = NULL;
 	kfree(rss_data->rss_lut);
 	rss_data->rss_lut = NULL;
 }
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 75b977094741..7d20593bd877 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -1086,8 +1086,8 @@ void idpf_vport_intr_deinit(struct idpf_vport *vport);
 int idpf_vport_intr_init(struct idpf_vport *vport);
 void idpf_vport_intr_ena(struct idpf_vport *vport);
 int idpf_config_rss(struct idpf_vport *vport);
-int idpf_init_rss(struct idpf_vport *vport);
-void idpf_deinit_rss(struct idpf_vport *vport);
+int idpf_init_rss_lut(struct idpf_vport *vport);
+void idpf_deinit_rss_lut(struct idpf_vport *vport);
 int idpf_rx_bufs_init_all(struct idpf_vport *vport);
 
 struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport,
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index 01bbd12a642a..cb702eac86c8 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -2804,6 +2804,10 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport)
  * @vport: virtual port data structure
  * @get: flag to set or get rss look up table
  *
+ * When rxhash is disabled, RSS LUT will be configured with zeros.  If rxhash
+ * is enabled, the LUT values stored in driver's soft copy will be used to setup
+ * the HW.
+ *
  * Returns 0 on success, negative on failure.
  */
 int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get)
@@ -2814,10 +2818,12 @@ int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get)
 	struct idpf_rss_data *rss_data;
 	int buf_size, lut_buf_size;
 	ssize_t reply_sz;
+	bool rxhash_ena;
 	int i;
 
 	rss_data =
 		&vport->adapter->vport_config[vport->idx]->user_config.rss_data;
+	rxhash_ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH);
 	buf_size = struct_size(rl, lut, rss_data->rss_lut_size);
 	rl = kzalloc(buf_size, GFP_KERNEL);
 	if (!rl)
@@ -2839,7 +2845,8 @@ int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get)
 	} else {
 		rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size);
 		for (i = 0; i < rss_data->rss_lut_size; i++)
-			rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]);
+			rl->lut[i] = rxhash_ena ?
+				cpu_to_le32(rss_data->rss_lut[i]) : 0;
 
 		xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_LUT;
 	}

From 445b49d13787da2fe8d51891ee196e5077feef44 Mon Sep 17 00:00:00 2001
From: Sreedevi Joshi <sreedevi.joshi@intel.com>
Date: Mon, 24 Nov 2025 12:47:49 -0600
Subject: [PATCH 42/70] idpf: Fix RSS LUT configuration on down interfaces

RSS LUT provisioning and queries on a down interface currently return
silently without effect. Users should be able to configure RSS settings
even when the interface is down.

Fix by maintaining RSS configuration changes in the driver's soft copy and
deferring HW programming until the interface comes up.

Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks")
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_ethtool.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
index 7000f6283a33..2efa3c08aba5 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -411,7 +411,10 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev)
  * @netdev: network interface device structure
  * @rxfh: pointer to param struct (indir, key, hfunc)
  *
- * Reads the indirection table directly from the hardware. Always returns 0.
+ * RSS LUT and Key information are read from driver's cached
+ * copy. When rxhash is off, rss lut will be displayed as zeros.
+ *
+ * Return: 0 on success, -errno otherwise.
  */
 static int idpf_get_rxfh(struct net_device *netdev,
 			 struct ethtool_rxfh_param *rxfh)
@@ -419,10 +422,13 @@ static int idpf_get_rxfh(struct net_device *netdev,
 	struct idpf_netdev_priv *np = netdev_priv(netdev);
 	struct idpf_rss_data *rss_data;
 	struct idpf_adapter *adapter;
+	struct idpf_vport *vport;
+	bool rxhash_ena;
 	int err = 0;
 	u16 i;
 
 	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
 
 	adapter = np->adapter;
 
@@ -432,9 +438,8 @@ static int idpf_get_rxfh(struct net_device *netdev,
 	}
 
 	rss_data = &adapter->vport_config[np->vport_idx]->user_config.rss_data;
-	if (!test_bit(IDPF_VPORT_UP, np->state))
-		goto unlock_mutex;
 
+	rxhash_ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH);
 	rxfh->hfunc = ETH_RSS_HASH_TOP;
 
 	if (rxfh->key)
@@ -442,7 +447,7 @@ static int idpf_get_rxfh(struct net_device *netdev,
 
 	if (rxfh->indir) {
 		for (i = 0; i < rss_data->rss_lut_size; i++)
-			rxfh->indir[i] = rss_data->rss_lut[i];
+			rxfh->indir[i] = rxhash_ena ? rss_data->rss_lut[i] : 0;
 	}
 
 unlock_mutex:
@@ -482,8 +487,6 @@ static int idpf_set_rxfh(struct net_device *netdev,
 	}
 
 	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
-	if (!test_bit(IDPF_VPORT_UP, np->state))
-		goto unlock_mutex;
 
 	if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
 	    rxfh->hfunc != ETH_RSS_HASH_TOP) {
@@ -499,7 +502,8 @@ static int idpf_set_rxfh(struct net_device *netdev,
 			rss_data->rss_lut[lut] = rxfh->indir[lut];
 	}
 
-	err = idpf_config_rss(vport);
+	if (test_bit(IDPF_VPORT_UP, np->state))
+		err = idpf_config_rss(vport);
 
 unlock_mutex:
 	idpf_vport_ctrl_unlock(netdev);

From ebecca5b093895da801b3eba1a55b4ec4027d196 Mon Sep 17 00:00:00 2001
From: Sreedevi Joshi <sreedevi.joshi@intel.com>
Date: Mon, 24 Nov 2025 12:47:50 -0600
Subject: [PATCH 43/70] idpf: Fix RSS LUT NULL ptr issue after soft reset

During soft reset, the RSS LUT is freed and not restored unless the
interface is up. If an ethtool command that accesses the rss lut is
attempted immediately after reset, it will result in NULL ptr
dereference. Also, there is no need to reset the rss lut if the soft reset
does not involve queue count change.

After soft reset, set the RSS LUT to default values based on the updated
queue count only if the reset was a result of a queue count change and
the LUT was not configured by the user. In all other cases, don't touch
the LUT.

Steps to reproduce:

** Bring the interface down (if up)
ifconfig eth1 down

** update the queue count (eg., 27->20)
ethtool -L eth1 combined 20

** display the RSS LUT
ethtool -x eth1

[82375.558338] BUG: kernel NULL pointer dereference, address: 0000000000000000
[82375.558373] #PF: supervisor read access in kernel mode
[82375.558391] #PF: error_code(0x0000) - not-present page
[82375.558408] PGD 0 P4D 0
[82375.558421] Oops: Oops: 0000 [#1] SMP NOPTI
<snip>
[82375.558516] RIP: 0010:idpf_get_rxfh+0x108/0x150 [idpf]
[82375.558786] Call Trace:
[82375.558793]  <TASK>
[82375.558804]  rss_prepare.isra.0+0x187/0x2a0
[82375.558827]  rss_prepare_data+0x3a/0x50
[82375.558845]  ethnl_default_doit+0x13d/0x3e0
[82375.558863]  genl_family_rcv_msg_doit+0x11f/0x180
[82375.558886]  genl_rcv_msg+0x1ad/0x2b0
[82375.558902]  ? __pfx_ethnl_default_doit+0x10/0x10
[82375.558920]  ? __pfx_genl_rcv_msg+0x10/0x10
[82375.558937]  netlink_rcv_skb+0x58/0x100
[82375.558957]  genl_rcv+0x2c/0x50
[82375.558971]  netlink_unicast+0x289/0x3e0
[82375.558988]  netlink_sendmsg+0x215/0x440
[82375.559005]  __sys_sendto+0x234/0x240
[82375.559555]  __x64_sys_sendto+0x28/0x30
[82375.560068]  x64_sys_call+0x1909/0x1da0
[82375.560576]  do_syscall_64+0x7a/0xfa0
[82375.561076]  ? clear_bhb_loop+0x60/0xb0
[82375.561567]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
<snip>

Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks")
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c  | 20 ++++----------------
 drivers/net/ethernet/intel/idpf/idpf_txrx.c |  2 +-
 drivers/net/ethernet/intel/idpf/idpf_txrx.h |  1 +
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 51716e5a84ef..003bab3ce5ae 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1484,8 +1484,6 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 {
 	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
 	struct idpf_adapter *adapter = vport->adapter;
-	struct idpf_vport_config *vport_config;
-	struct idpf_rss_data *rss_data;
 	int err;
 
 	if (test_bit(IDPF_VPORT_UP, np->state))
@@ -1579,19 +1577,6 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 
 	idpf_restore_features(vport);
 
-	vport_config = adapter->vport_config[vport->idx];
-	rss_data = &vport_config->user_config.rss_data;
-
-	if (!rss_data->rss_lut) {
-		err = idpf_init_rss_lut(vport);
-		if (err) {
-			dev_err(&adapter->pdev->dev,
-				"Failed to initialize RSS LUT for vport %u: %d\n",
-				vport->vport_id, err);
-			goto disable_vport;
-		}
-	}
-
 	err = idpf_config_rss(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to configure RSS for vport %u: %d\n",
@@ -2068,7 +2053,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 		idpf_vport_stop(vport, false);
 	}
 
-	idpf_deinit_rss_lut(vport);
 	/* We're passing in vport here because we need its wait_queue
 	 * to send a message and it should be getting all the vport
 	 * config data out of the adapter but we need to be careful not
@@ -2094,6 +2078,10 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 	if (err)
 		goto err_open;
 
+	if (reset_cause == IDPF_SR_Q_CHANGE &&
+	    !netif_is_rxfh_configured(vport->netdev))
+		idpf_fill_dflt_rss_lut(vport);
+
 	if (vport_is_up)
 		err = idpf_vport_open(vport, false);
 
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 8991a891a440..f51d52297e1e 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -4641,7 +4641,7 @@ int idpf_config_rss(struct idpf_vport *vport)
  * idpf_fill_dflt_rss_lut - Fill the indirection table with the default values
  * @vport: virtual port structure
  */
-static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport)
+void idpf_fill_dflt_rss_lut(struct idpf_vport *vport)
 {
 	struct idpf_adapter *adapter = vport->adapter;
 	u16 num_active_rxq = vport->num_rxq;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 7d20593bd877..0472698ca192 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -1085,6 +1085,7 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
 void idpf_vport_intr_deinit(struct idpf_vport *vport);
 int idpf_vport_intr_init(struct idpf_vport *vport);
 void idpf_vport_intr_ena(struct idpf_vport *vport);
+void idpf_fill_dflt_rss_lut(struct idpf_vport *vport);
 int idpf_config_rss(struct idpf_vport *vport);
 int idpf_init_rss_lut(struct idpf_vport *vport);
 void idpf_deinit_rss_lut(struct idpf_vport *vport);

From 87b8ee64685bc096a087af833d4594b2332bfdb1 Mon Sep 17 00:00:00 2001
From: Sreedevi Joshi <sreedevi.joshi@intel.com>
Date: Tue, 2 Dec 2025 17:12:46 -0600
Subject: [PATCH 44/70] idpf: Fix error handling in idpf_vport_open()

Fix error handling to properly cleanup interrupts when
idpf_vport_queue_ids_init() or idpf_rx_bufs_init_all() fail. Jump to
'intr_deinit' instead of 'queues_rel' to ensure interrupts are cleaned up
before releasing other resources.

Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport")
Signed-off-by: Sreedevi Joshi <sreedevi.joshi@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 003bab3ce5ae..131a8121839b 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1524,14 +1524,14 @@ static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n",
 			vport->vport_id, err);
-		goto queues_rel;
+		goto intr_deinit;
 	}
 
 	err = idpf_rx_bufs_init_all(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n",
 			vport->vport_id, err);
-		goto queues_rel;
+		goto intr_deinit;
 	}
 
 	idpf_rx_init_buf_tail(vport);

From 086efe0a1ecc36cffe46640ce12649a4cd3ff171 Mon Sep 17 00:00:00 2001
From: Joshua Hay <joshua.a.hay@intel.com>
Date: Mon, 3 Nov 2025 13:20:36 -0800
Subject: [PATCH 45/70] idpf: cap maximum Rx buffer size

The HW only supports a maximum Rx buffer size of 16K-128. On systems
using large pages, the libeth logic can configure the buffer size to be
larger than this. The upper bound is PAGE_SIZE while the lower bound is
MTU rounded up to the nearest power of 2. For example, ARM systems with
a 64K page size and an mtu of 9000 will set the Rx buffer size to 16K,
which will cause the config Rx queues message to fail.

Initialize the bufq/fill queue buf_len field to the maximum supported
size. This will trigger the libeth logic to cap the maximum Rx buffer
size by reducing the upper bound.

Fixes: 74d1412ac8f37 ("idpf: use libeth Rx buffer management for payload buffer")
Signed-off-by: Joshua Hay <joshua.a.hay@intel.com>
Acked-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: David Decotigny <ddecotig@google.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_txrx.c | 8 +++++---
 drivers/net/ethernet/intel/idpf/idpf_txrx.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index f51d52297e1e..7f3933ca9edc 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -695,9 +695,10 @@ err:
 static int idpf_rx_bufs_init_singleq(struct idpf_rx_queue *rxq)
 {
 	struct libeth_fq fq = {
-		.count	= rxq->desc_count,
-		.type	= LIBETH_FQE_MTU,
-		.nid	= idpf_q_vector_to_mem(rxq->q_vector),
+		.count		= rxq->desc_count,
+		.type		= LIBETH_FQE_MTU,
+		.buf_len	= IDPF_RX_MAX_BUF_SZ,
+		.nid		= idpf_q_vector_to_mem(rxq->q_vector),
 	};
 	int ret;
 
@@ -754,6 +755,7 @@ static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq,
 		.truesize	= bufq->truesize,
 		.count		= bufq->desc_count,
 		.type		= type,
+		.buf_len	= IDPF_RX_MAX_BUF_SZ,
 		.hsplit		= idpf_queue_has(HSPLIT_EN, bufq),
 		.xdp		= idpf_xdp_enabled(bufq->q_vector->vport),
 		.nid		= idpf_q_vector_to_mem(bufq->q_vector),
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 0472698ca192..423cc9486dce 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -101,6 +101,7 @@ do {								\
 		idx = 0;					\
 } while (0)
 
+#define IDPF_RX_MAX_BUF_SZ			(16384 - 128)
 #define IDPF_RX_BUF_STRIDE			32
 #define IDPF_RX_BUF_POST_STRIDE			16
 #define IDPF_LOW_WATERMARK			64

From 4648fb2f2e7210c53b85220ee07d42d1e4bae3f9 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Mon, 17 Nov 2025 08:03:49 +0100
Subject: [PATCH 46/70] idpf: fix aux device unplugging when rdma is not
 supported by vport

If vport flags do not contain VIRTCHNL2_VPORT_ENABLE_RDMA, driver does not
allocate vdev_info for this vport. This leads to kernel NULL pointer
dereference in idpf_idc_vport_dev_down(), which references vdev_info for
every vport regardless.

Check, if vdev_info was ever allocated before unplugging aux device.

Fixes: be91128c579c ("idpf: implement RDMA vport auxiliary dev create, init, and destroy")
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_idc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index 7e20a07e98e5..6dad0593f7f2 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -322,7 +322,7 @@ static void idpf_idc_vport_dev_down(struct idpf_adapter *adapter)
 	for (i = 0; i < adapter->num_alloc_vports; i++) {
 		struct idpf_vport *vport = adapter->vports[i];
 
-		if (!vport)
+		if (!vport || !vport->vdev_info)
 			continue;
 
 		idpf_unplug_aux_dev(vport->vdev_info->adev);

From 44694ffaa4e27dbda1120abfafa00732f2d52760 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Thu, 11 Dec 2025 16:58:31 +0530
Subject: [PATCH 47/70] MAINTAINERS: Add an additional maintainer to the AMD
 XGBE driver

Add Raju Rangoju as an additional maintainer to support the AMD XGBE
network device driver.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Acked-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20251211112831.1781030-1-Shyam-sundar.S-k@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 410fd1f199f2..50d7266e1e62 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1283,6 +1283,7 @@ F:	include/uapi/drm/amdxdna_accel.h
 
 AMD XGBE DRIVER
 M:	"Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
+M:	Raju Rangoju <Raju.Rangoju@amd.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi

From 7801edc9badd972cb62cf11c0427e70b6dca239d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 4 Jan 2026 11:39:52 +0200
Subject: [PATCH 48/70] Revert "dsa: mv88e6xxx: make serdes SGMII/Fiber tx
 amplitude configurable"

This reverts commit 926eae604403acfa27ba5b072af458e87e634a50, which
never could have produced the intended effect:
https://lore.kernel.org/netdev/AM0PR06MB10396BBF8B568D77556FC46F8F7DEA@AM0PR06MB10396.eurprd06.prod.outlook.com/

The reason why it is broken beyond repair in this form is that the
mv88e6xxx driver outsources its "tx-p2p-microvolt" property to the OF
node of an external Ethernet PHY. This:
(a) does not work if there is no external PHY (chip-to-chip connection,
    or SFP module)
(b) pollutes the OF property namespace / bindings of said external PHY
    ("tx-p2p-microvolt" could have meaning for the Ethernet PHY's SerDes
    interface as well)

We can revisit the idea of making SerDes amplitude configurable once we
have proper bindings for the mv88e6xxx SerDes. Until then, remove the
code that leaves us with unnecessary baggage.

Fixes: 926eae604403 ("dsa: mv88e6xxx: make serdes SGMII/Fiber tx amplitude configurable")
Cc: Holger Brunck <holger.brunck@hitachienergy.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260104093952.486606-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mv88e6xxx/chip.c   | 23 ---------------
 drivers/net/dsa/mv88e6xxx/chip.h   |  4 ---
 drivers/net/dsa/mv88e6xxx/serdes.c | 46 ------------------------------
 drivers/net/dsa/mv88e6xxx/serdes.h |  5 ----
 4 files changed, 78 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index b4d48997bf46..09002c853b78 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3364,13 +3364,10 @@ static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
 
 static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 {
-	struct device_node *phy_handle = NULL;
 	struct fwnode_handle *ports_fwnode;
 	struct fwnode_handle *port_fwnode;
 	struct dsa_switch *ds = chip->ds;
 	struct mv88e6xxx_port *p;
-	struct dsa_port *dp;
-	int tx_amp;
 	int err;
 	u16 reg;
 	u32 val;
@@ -3582,23 +3579,6 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 			return err;
 	}
 
-	if (chip->info->ops->serdes_set_tx_amplitude) {
-		dp = dsa_to_port(ds, port);
-		if (dp)
-			phy_handle = of_parse_phandle(dp->dn, "phy-handle", 0);
-
-		if (phy_handle && !of_property_read_u32(phy_handle,
-							"tx-p2p-microvolt",
-							&tx_amp))
-			err = chip->info->ops->serdes_set_tx_amplitude(chip,
-								port, tx_amp);
-		if (phy_handle) {
-			of_node_put(phy_handle);
-			if (err)
-				return err;
-		}
-	}
-
 	/* Port based VLAN map: give each port the same default address
 	 * database, and allow bidirectional communication between the
 	 * CPU and DSA port(s), and the other ports.
@@ -4768,7 +4748,6 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 	.serdes_irq_mapping = mv88e6352_serdes_irq_mapping,
 	.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
 	.serdes_get_regs = mv88e6352_serdes_get_regs,
-	.serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude,
 	.gpio_ops = &mv88e6352_gpio_ops,
 	.phylink_get_caps = mv88e6352_phylink_get_caps,
 	.pcs_ops = &mv88e6352_pcs_ops,
@@ -5044,7 +5023,6 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.serdes_irq_mapping = mv88e6352_serdes_irq_mapping,
 	.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
 	.serdes_get_regs = mv88e6352_serdes_get_regs,
-	.serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude,
 	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6352_avb_ops,
 	.ptp_ops = &mv88e6352_ptp_ops,
@@ -5481,7 +5459,6 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.serdes_get_stats = mv88e6352_serdes_get_stats,
 	.serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
 	.serdes_get_regs = mv88e6352_serdes_get_regs,
-	.serdes_set_tx_amplitude = mv88e6352_serdes_set_tx_amplitude,
 	.phylink_get_caps = mv88e6352_phylink_get_caps,
 	.pcs_ops = &mv88e6352_pcs_ops,
 };
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 2f211e55cb47..e073446ee7d0 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -642,10 +642,6 @@ struct mv88e6xxx_ops {
 	void (*serdes_get_regs)(struct mv88e6xxx_chip *chip, int port,
 				void *_p);
 
-	/* SERDES SGMII/Fiber Output Amplitude */
-	int (*serdes_set_tx_amplitude)(struct mv88e6xxx_chip *chip, int port,
-				       int val);
-
 	/* Address Translation Unit operations */
 	int (*atu_get_hash)(struct mv88e6xxx_chip *chip, u8 *hash);
 	int (*atu_set_hash)(struct mv88e6xxx_chip *chip, u8 hash);
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index b3330211edbc..a936ee80ce00 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -25,14 +25,6 @@ static int mv88e6352_serdes_read(struct mv88e6xxx_chip *chip, int reg,
 				       reg, val);
 }
 
-static int mv88e6352_serdes_write(struct mv88e6xxx_chip *chip, int reg,
-				  u16 val)
-{
-	return mv88e6xxx_phy_page_write(chip, MV88E6352_ADDR_SERDES,
-					MV88E6352_SERDES_PAGE_FIBER,
-					reg, val);
-}
-
 static int mv88e6390_serdes_read(struct mv88e6xxx_chip *chip,
 				 int lane, int device, int reg, u16 *val)
 {
@@ -506,41 +498,3 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 			p[i] = reg;
 	}
 }
-
-static const int mv88e6352_serdes_p2p_to_reg[] = {
-	/* Index of value in microvolts corresponds to the register value */
-	14000, 112000, 210000, 308000, 406000, 504000, 602000, 700000,
-};
-
-int mv88e6352_serdes_set_tx_amplitude(struct mv88e6xxx_chip *chip, int port,
-				      int val)
-{
-	bool found = false;
-	u16 ctrl, reg;
-	int err;
-	int i;
-
-	err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
-	if (err <= 0)
-		return err;
-
-	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_p2p_to_reg); ++i) {
-		if (mv88e6352_serdes_p2p_to_reg[i] == val) {
-			reg = i;
-			found = true;
-			break;
-		}
-	}
-
-	if (!found)
-		return -EINVAL;
-
-	err = mv88e6352_serdes_read(chip, MV88E6352_SERDES_SPEC_CTRL2, &ctrl);
-	if (err)
-		return err;
-
-	ctrl &= ~MV88E6352_SERDES_OUT_AMP_MASK;
-	ctrl |= reg;
-
-	return mv88e6352_serdes_write(chip, MV88E6352_SERDES_SPEC_CTRL2, ctrl);
-}
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index ad887d8601bc..17a3e85fabaa 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -29,8 +29,6 @@ struct phylink_link_state;
 #define MV88E6352_SERDES_INT_FIBRE_ENERGY	BIT(4)
 #define MV88E6352_SERDES_INT_STATUS	0x13
 
-#define MV88E6352_SERDES_SPEC_CTRL2	0x1a
-#define MV88E6352_SERDES_OUT_AMP_MASK		0x0007
 
 #define MV88E6341_PORT5_LANE		0x15
 
@@ -140,9 +138,6 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p);
 int mv88e6390_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port);
 void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p);
 
-int mv88e6352_serdes_set_tx_amplitude(struct mv88e6xxx_chip *chip, int port,
-				      int val);
-
 /* Return the (first) SERDES lane address a port is using, -errno otherwise. */
 static inline int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
 					    int port)

From e5c8eda39a9fc1547d1398d707aa06c1d080abdd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 5 Jan 2026 09:36:30 +0000
Subject: [PATCH 49/70] udp: call skb_orphan() before skb_attempt_defer_free()

Standard UDP receive path does not use skb->destructor.

But skmsg layer does use it, since it calls skb_set_owner_sk_safe()
from udp_read_skb().

This then triggers this warning in skb_attempt_defer_free():

    DEBUG_NET_WARN_ON_ONCE(skb->destructor);

We must call skb_orphan() to fix this issue.

Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()")
Reported-by: syzbot+3e68572cf2286ce5ebe9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/695b83bd.050a0220.1c9965.002b.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260105093630.1976085-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/udp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ffe074cb5865..ee63af0ef42c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1851,6 +1851,7 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		sk_peek_offset_bwd(sk, len);
 
 	if (!skb_shared(skb)) {
+		skb_orphan(skb);
 		skb_attempt_defer_free(skb);
 		return;
 	}

From 13ff3e724207f579d3c814ee05516fefcb4f32e8 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 5 Jan 2026 16:18:39 +0100
Subject: [PATCH 50/70] net: sfp: return the number of written bytes for smbus
 single byte access

We expect the SFP write accessors to return the number of written bytes.
We fail to do so for single-byte smbus accesses, which may cause errors
when setting a module's high-power state and for some cotsworks modules.

Let's return the amount of written bytes, as expected.

Fixes: 7662abf4db94 ("net: phy: sfp: Add support for SMBus module access")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20260105151840.144552-1-maxime.chevallier@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/sfp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6166e9196364..84bef5099dda 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -765,7 +765,7 @@ static int sfp_smbus_byte_write(struct sfp *sfp, bool a2, u8 dev_addr,
 		dev_addr++;
 	}
 
-	return 0;
+	return data - (u8 *)buf;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)

From adb25a46dc0a43173f5ea5f5f58fc8ba28970c7c Mon Sep 17 00:00:00 2001
From: Shivani Gupta <shivani07g@gmail.com>
Date: Mon, 5 Jan 2026 00:59:05 +0000
Subject: [PATCH 51/70] net/sched: act_api: avoid dereferencing ERR_PTR in
 tcf_idrinfo_destroy

syzbot reported a crash in tc_act_in_hw() during netns teardown where
tcf_idrinfo_destroy() passed an ERR_PTR(-EBUSY) value as a tc_action
pointer, leading to an invalid dereference.

Guard against ERR_PTR entries when iterating the action IDR so teardown
does not call tc_act_in_hw() on an error pointer.

Fixes: 84a7d6797e6a ("net/sched: acp_api: no longer acquire RTNL in tc_action_net_exit()")
Link: https://syzkaller.appspot.com/bug?extid=8f1c492ffa4644ff3826
Reported-by: syzbot+8f1c492ffa4644ff3826@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=8f1c492ffa4644ff3826
Signed-off-by: Shivani Gupta <shivani07g@gmail.com>
Link: https://patch.msgid.link/20260105005905.243423-1-shivani07g@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_api.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ff6be5cfe2b0..e1ab0faeb811 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -940,6 +940,8 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
 	int ret;
 
 	idr_for_each_entry_ul(idr, p, tmp, id) {
+		if (IS_ERR(p))
+			continue;
 		if (tc_act_in_hw(p) && !mutex_taken) {
 			rtnl_lock();
 			mutex_taken = true;

From 353cfc0ef3f34ef7fe313ae38dac37f2454a7cf5 Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Mon, 5 Jan 2026 18:33:19 +0200
Subject: [PATCH 52/70] selftests: drv-net: Bring back tool() to driver
 __init__s

The pp_alloc_fail.py test (which doesn't run in NIPA CI?) uses tool, add
back the import.

Resolves:
  ImportError: cannot import name 'tool' from 'lib.py'

Fixes: 68a052239fc4 ("selftests: drv-net: update remaining Python init files")
Reviewed-by: Nimrod Oren <noren@nvidia.com>
Signed-off-by: Gal Pressman <gal@nvidia.com>
Link: https://patch.msgid.link/20260105163319.47619-1-gal@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/hw/lib/py/__init__.py | 4 ++--
 tools/testing/selftests/net/lib/py/__init__.py            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 766bfc4ad842..d5d247eca6b7 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -22,7 +22,7 @@ try:
         NlError, RtnlFamily, DevlinkFamily, PSPFamily
     from net.lib.py import CmdExitFailure
     from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
-        fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+        fd_read_timeout, ip, rand_port, wait_port_listen, wait_file, tool
     from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
     from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
         ksft_setup, ksft_variants, KsftNamedVariant
@@ -37,7 +37,7 @@ try:
                "CmdExitFailure",
                "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
                "fd_read_timeout", "ip", "rand_port",
-               "wait_port_listen", "wait_file",
+               "wait_port_listen", "wait_file", "tool",
                "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
                "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
                "ksft_setup", "ksft_variants", "KsftNamedVariant",
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 40f9ce307dd1..f528b67639de 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -13,7 +13,7 @@ from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
 from .netns import NetNS, NetNSEnter
 from .nsim import NetdevSim, NetdevSimDev
 from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
-    bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file
+    bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file, tool
 from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
 from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily
 
@@ -26,7 +26,7 @@ __all__ = ["KSRC",
            "NetNS", "NetNSEnter",
            "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
            "bpftool", "ip", "ethtool", "bpftrace", "rand_port",
-           "wait_port_listen", "wait_file",
+           "wait_port_listen", "wait_file", "tool",
            "NetdevSim", "NetdevSimDev",
            "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
            "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",

From d83dddffe1904e4a576d11a541878850a8e64cd2 Mon Sep 17 00:00:00 2001
From: Yohei Kojima <yk@y-koj.net>
Date: Tue, 6 Jan 2026 00:17:32 +0900
Subject: [PATCH 53/70] net: netdevsim: fix inconsistent carrier state after
 link/unlink

This patch fixes the edge case behavior on ifup/ifdown and
linking/unlinking two netdevsim interfaces:

1. unlink two interfaces netdevsim1 and netdevsim2
2. ifdown netdevsim1
3. ifup netdevsim1
4. link two interfaces netdevsim1 and netdevsim2
5. (Now two interfaces are linked in terms of netdevsim peer, but
    carrier state of the two interfaces remains DOWN.)

This inconsistent behavior is caused by the current implementation,
which only cares about the "link, then ifup" order, not "ifup, then
link" order. This patch fixes the inconsistency by calling
netif_carrier_on() when two netdevsim interfaces are linked.

This patch fixes buggy behavior on NetworkManager-based systems which
causes the netdevsim test to fail with the following error:

  # timeout set to 600
  # selftests: drivers/net/netdevsim: peer.sh
  # 2025/12/25 00:54:03 socat[9115] W address is opened in read-write mode but only supports read-only
  # 2025/12/25 00:56:17 socat[9115] W connect(7, AF=2 192.168.1.1:1234, 16): Connection timed out
  # 2025/12/25 00:56:17 socat[9115] E TCP:192.168.1.1:1234: Connection timed out
  # expected 3 bytes, got 0
  # 2025/12/25 00:56:17 socat[9109] W exiting on signal 15
  not ok 13 selftests: drivers/net/netdevsim: peer.sh # exit=1

This patch also solves timeout on TCP Fast Open (TFO) test in
NetworkManager-based systems because it also depends on netdevsim's
carrier consistency.

Fixes: 1a8fed52f7be ("netdevsim: set the carrier when the device goes up")
Signed-off-by: Yohei Kojima <yk@y-koj.net>
Reviewed-by: Breno Leitao <leitao@debian.org>
Link: https://patch.msgid.link/602c9e1ba5bb2ee1997bb38b1d866c9c3b807ae9.1767624906.git.yk@y-koj.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/netdevsim/bus.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 70e8c38ddad6..d16b95304aa7 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -332,6 +332,11 @@ static ssize_t link_device_store(const struct bus_type *bus, const char *buf, si
 	rcu_assign_pointer(nsim_a->peer, nsim_b);
 	rcu_assign_pointer(nsim_b->peer, nsim_a);
 
+	if (netif_running(dev_a) && netif_running(dev_b)) {
+		netif_carrier_on(dev_a);
+		netif_carrier_on(dev_b);
+	}
+
 out_err:
 	put_net(ns_b);
 	put_net(ns_a);
@@ -381,6 +386,9 @@ static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf,
 	if (!peer)
 		goto out_put_netns;
 
+	netif_carrier_off(dev);
+	netif_carrier_off(peer->netdev);
+
 	err = 0;
 	RCU_INIT_POINTER(nsim->peer, NULL);
 	RCU_INIT_POINTER(peer->peer, NULL);

From 75df712cddfd6c76e0e255584766385648ad9529 Mon Sep 17 00:00:00 2001
From: Yohei Kojima <yk@y-koj.net>
Date: Tue, 6 Jan 2026 00:17:33 +0900
Subject: [PATCH 54/70] selftests: netdevsim: add carrier state consistency
 test

This commit adds a test case for netdevsim carrier state consistency.
Specifically, the added test verifies the carrier state during the
following operations:

1. Unlink two netdevsims
2. ifdown one netdevsim, then ifup again
3. Link the netdevsims again
4. ifdown one netdevsim, then ifup again

These steps verifies that the carrier is UP iff two netdevsims are
linked and ifuped.

Signed-off-by: Yohei Kojima <yk@y-koj.net>
Link: https://patch.msgid.link/481e2729e53b6074ebfc0ad85764d8feb244de8c.1767624906.git.yk@y-koj.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/netdevsim/peer.sh   | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index 7f32b5600925..f4721f7636dd 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -52,6 +52,39 @@ cleanup_ns()
 	ip netns del nssv
 }
 
+is_carrier_up()
+{
+	local netns="$1"
+	local nsim_dev="$2"
+
+	test "$(ip netns exec "$netns"	\
+		cat /sys/class/net/"$nsim_dev"/carrier 2>/dev/null)" -eq 1
+}
+
+assert_carrier_up()
+{
+	local netns="$1"
+	local nsim_dev="$2"
+
+	if ! is_carrier_up "$netns" "$nsim_dev"; then
+		echo "$nsim_dev's carrier should be UP, but it isn't"
+		cleanup_ns
+		exit 1
+	fi
+}
+
+assert_carrier_down()
+{
+	local netns="$1"
+	local nsim_dev="$2"
+
+	if is_carrier_up "$netns" "$nsim_dev"; then
+		echo "$nsim_dev's carrier should be DOWN, but it isn't"
+		cleanup_ns
+		exit 1
+	fi
+}
+
 ###
 ### Code start
 ###
@@ -113,6 +146,32 @@ if [ $? -eq 0 ]; then
 	exit 1
 fi
 
+# netdevsim carrier state consistency checking
+assert_carrier_up nssv "$NSIM_DEV_1_NAME"
+assert_carrier_up nscl "$NSIM_DEV_2_NAME"
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > "$NSIM_DEV_SYS_UNLINK"
+
+assert_carrier_down nssv "$NSIM_DEV_1_NAME"
+assert_carrier_down nscl "$NSIM_DEV_2_NAME"
+
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up
+
+assert_carrier_down nssv "$NSIM_DEV_1_NAME"
+assert_carrier_down nscl "$NSIM_DEV_2_NAME"
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+
+assert_carrier_up nssv "$NSIM_DEV_1_NAME"
+assert_carrier_up nscl "$NSIM_DEV_2_NAME"
+
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down
+ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up
+
+assert_carrier_up nssv "$NSIM_DEV_1_NAME"
+assert_carrier_up nscl "$NSIM_DEV_2_NAME"
+
 # send/recv packets
 
 tmp_file=$(mktemp)

From 6abcf751bc084804a9e5b3051442e8a2ce67f48a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 5 Jan 2026 09:43:31 +0100
Subject: [PATCH 55/70] net: airoha: Fix schedule while atomic in
 airoha_ppe_deinit()

airoha_ppe_deinit() runs airoha_npu_ppe_deinit() in atomic context.
airoha_npu_ppe_deinit routine allocates ppe_data buffer with GFP_KERNEL
flag. Rely on rcu_replace_pointer in airoha_ppe_deinit routine in order
to fix schedule while atomic issue in airoha_npu_ppe_deinit() since we
do not need atomic context there.

Fixes: 00a7678310fe3 ("net: airoha: Introduce flowtable offload support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20260105-airoha-fw-ethtool-v2-1-3b32b158cc31@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/airoha/airoha_ppe.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 0caabb0c3aa0..2221bafaf7c9 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -1547,13 +1547,16 @@ void airoha_ppe_deinit(struct airoha_eth *eth)
 {
 	struct airoha_npu *npu;
 
-	rcu_read_lock();
-	npu = rcu_dereference(eth->npu);
+	mutex_lock(&flow_offload_mutex);
+
+	npu = rcu_replace_pointer(eth->npu, NULL,
+				  lockdep_is_held(&flow_offload_mutex));
 	if (npu) {
 		npu->ops.ppe_deinit(npu);
 		airoha_npu_put(npu);
 	}
-	rcu_read_unlock();
+
+	mutex_unlock(&flow_offload_mutex);
 
 	rhashtable_destroy(&eth->ppe->l2_flows);
 	rhashtable_destroy(&eth->flow_table);

From 21cbf883d073abbfe09e3924466aa5e0449e7261 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Jan 2026 10:19:27 +0000
Subject: [PATCH 56/70] wifi: avoid kernel-infoleak from struct iw_point

struct iw_point has a 32bit hole on 64bit arches.

struct iw_point {
  void __user   *pointer;       /* Pointer to the data  (in user space) */
  __u16         length;         /* number of fields or size in bytes */
  __u16         flags;          /* Optional params */
};

Make sure to zero the structure to avoid disclosing 32bits of kernel data
to user space.

Fixes: 87de87d5e47f ("wext: Dispatch and handle compat ioctls entirely in net/wireless/wext.c")
Reported-by: syzbot+bfc7323743ca6dbcc3d3@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/695f83f3.050a0220.1c677c.0392.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260108101927.857582-1-edumazet@google.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/wext-core.c | 4 ++++
 net/wireless/wext-priv.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c32a7c6903d5..7b8e94214b07 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1101,6 +1101,10 @@ static int compat_standard_call(struct net_device	*dev,
 		return ioctl_standard_call(dev, iwr, cmd, info, handler);
 
 	iwp_compat = (struct compat_iw_point *) &iwr->u.data;
+
+	/* struct iw_point has a 32bit hole on 64bit arches. */
+	memset(&iwp, 0, sizeof(iwp));
+
 	iwp.pointer = compat_ptr(iwp_compat->pointer);
 	iwp.length = iwp_compat->length;
 	iwp.flags = iwp_compat->flags;
diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c
index 674d426a9d24..37d1147019c2 100644
--- a/net/wireless/wext-priv.c
+++ b/net/wireless/wext-priv.c
@@ -228,6 +228,10 @@ int compat_private_call(struct net_device *dev, struct iwreq *iwr,
 		struct iw_point iwp;
 
 		iwp_compat = (struct compat_iw_point *) &iwr->u.data;
+
+		/* struct iw_point has a 32bit hole on 64bit arches. */
+		memset(&iwp, 0, sizeof(iwp));
+
 		iwp.pointer = compat_ptr(iwp_compat->pointer);
 		iwp.length = iwp_compat->length;
 		iwp.flags = iwp_compat->flags;

From 333418872bfecf4843f1ded7a4151685dfcf07d5 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Wed, 7 Jan 2026 14:36:51 +0100
Subject: [PATCH 57/70] wifi: mac80211_hwsim: fix typo in frequency
 notification

The NAN notification is for 5745 MHz which corresponds to channel 149
and not 5475 which is not actually a valid channel. This could result in
a NULL pointer dereference in cfg80211_next_nan_dw_notif.

Fixes: a37a6f54439b ("wifi: mac80211_hwsim: Add simulation support for NAN device")
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260107143652.7dab2035836f.Iacbaf7bb94ed5c14a0928a625827e4137d8bfede@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c
index 551f5eb4e747..92427f527286 100644
--- a/drivers/net/wireless/virtual/mac80211_hwsim.c
+++ b/drivers/net/wireless/virtual/mac80211_hwsim.c
@@ -4040,7 +4040,7 @@ mac80211_hwsim_nan_dw_start(struct hrtimer *timer)
 			ieee80211_vif_to_wdev(data->nan_device_vif);
 
 		if (data->nan_curr_dw_band == NL80211_BAND_5GHZ)
-			ch = ieee80211_get_channel(hw->wiphy, 5475);
+			ch = ieee80211_get_channel(hw->wiphy, 5745);
 		else
 			ch = ieee80211_get_channel(hw->wiphy, 2437);
 

From c0d82ba9612fb65a8394af639f1427dbe87fb788 Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Wed, 7 Jan 2026 14:37:36 +0100
Subject: [PATCH 58/70] wifi: mac80211: don't iterate not running interfaces

for_each_chanctx_user_* was introdcued as a replacement for
for_each_sdata_link, which visits also other chanctx users that are not
link.
for_each_sdata_link skips not running interfaces, do the same for
for_each_chanctx_user_*

Fixes: 1ce954c98b89 ("wifi: mac80211: add and use chanctx usage iteration")
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20260107143736.55c084e2a976.I38b7b904a135dadca339321923b501b2c2c5c8c0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/chan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index d0bfb1216401..d8c5f11afc15 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -90,6 +90,9 @@ next_interface:
 	/* next (or first) interface */
 	iter->sdata = list_prepare_entry(iter->sdata, &local->interfaces, list);
 	list_for_each_entry_continue(iter->sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(iter->sdata))
+			continue;
+
 		/* AP_VLAN has a chanctx pointer but follows AP */
 		if (iter->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 			continue;

From 6f385937160174b31a5e4105e759406f0b128494 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Wed, 7 Jan 2026 14:38:05 +0100
Subject: [PATCH 59/70] wifi: mac80211_hwsim: disable BHs for hwsim_radio_lock

The hwsim_radio_lock spinlock expects bottom-half to be disabled, fix
the call in mac80211_hwsim_nan_stop to ensure BHs are disabled.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20260107143805.ce7406511608.I688f8b19346e94c1f8de0cdadde072054d4b861c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/virtual/mac80211_hwsim.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c
index 92427f527286..79cc63272134 100644
--- a/drivers/net/wireless/virtual/mac80211_hwsim.c
+++ b/drivers/net/wireless/virtual/mac80211_hwsim.c
@@ -4112,14 +4112,14 @@ static int mac80211_hwsim_stop_nan(struct ieee80211_hw *hw,
 	hrtimer_cancel(&data->nan_timer);
 	data->nan_device_vif = NULL;
 
-	spin_lock(&hwsim_radio_lock);
+	spin_lock_bh(&hwsim_radio_lock);
 	list_for_each_entry(data2, &hwsim_radios, list) {
 		if (data2->nan_device_vif) {
 			nan_cluster_running = true;
 			break;
 		}
 	}
-	spin_unlock(&hwsim_radio_lock);
+	spin_unlock_bh(&hwsim_radio_lock);
 
 	if (!nan_cluster_running)
 		memset(hwsim_nan_cluster_id, 0, ETH_ALEN);

From d594cc6f2c588810888df70c83a9654b6bc7942d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 16 Dec 2025 11:52:42 +0100
Subject: [PATCH 60/70] wifi: mac80211: restore non-chanctx injection behaviour

During the transition to use channel contexts throughout, the
ability to do injection while in monitor mode concurrent with
another interface was lost, since the (virtual) monitor won't
have a chanctx assigned in this scenario.

It's harder to fix drivers that actually transitioned to using
channel contexts themselves, such as mt76, but it's easy to do
those that are (still) just using the emulation. Do that.

Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218763
Reported-and-tested-by: Oscar Alfonso Diaz <oscar.alfonso.diaz@gmail.com>
Fixes: 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx drivers")
Link: https://patch.msgid.link/20251216105242.18366-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9d8b0a25f73c..1b55e8340413 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2397,6 +2397,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 
 	if (chanctx_conf)
 		chandef = &chanctx_conf->def;
+	else if (local->emulate_chanctx)
+		chandef = &local->hw.conf.chandef;
 	else
 		goto fail_rcu;
 

From a203dbeeca15a9b924f0d51f510921f4bae96801 Mon Sep 17 00:00:00 2001
From: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Date: Mon, 22 Dec 2025 10:29:07 +0800
Subject: [PATCH 61/70] wifi: mac80211: collect station statistics earlier when
 disconnect

In __sta_info_destroy_part2(), station statistics are requested after the
IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST transition. This is
problematic because the driver may be unable to handle the request due to
the STA being in the NOTEXIST state (i.e. if the driver destroys the
underlying data when transitioning to NOTEXIST).

Move the statistics collection to before the state transition to avoid
this issue.

Signed-off-by: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Link: https://patch.msgid.link/20251222-mac80211-move-station-stats-collection-earlier-v1-1-12cd4e42c633@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f4d3b67fda06..1a995bc301b1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1533,6 +1533,10 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
 		}
 	}
 
+	sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+	if (sinfo)
+		sta_set_sinfo(sta, sinfo, true);
+
 	if (sta->uploaded) {
 		ret = drv_sta_state(local, sdata, sta, IEEE80211_STA_NONE,
 				    IEEE80211_STA_NOTEXIST);
@@ -1541,9 +1545,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
 
 	sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
 
-	sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
-	if (sinfo)
-		sta_set_sinfo(sta, sinfo, true);
 	cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
 	kfree(sinfo);
 

From c1d73b1480235731e35c81df70b08f4714a7d095 Mon Sep 17 00:00:00 2001
From: Xiang Mei <xmei5@asu.edu>
Date: Mon, 5 Jan 2026 20:41:00 -0700
Subject: [PATCH 62/70] net/sched: sch_qfq: Fix NULL deref when deactivating
 inactive aggregate in qfq_reset

`qfq_class->leaf_qdisc->q.qlen > 0` does not imply that the class
itself is active.

Two qfq_class objects may point to the same leaf_qdisc. This happens
when:

1. one QFQ qdisc is attached to the dev as the root qdisc, and

2. another QFQ qdisc is temporarily referenced (e.g., via qdisc_get()
/ qdisc_put()) and is pending to be destroyed, as in function
tc_new_tfilter.

When packets are enqueued through the root QFQ qdisc, the shared
leaf_qdisc->q.qlen increases. At the same time, the second QFQ
qdisc triggers qdisc_put and qdisc_destroy: the qdisc enters
qfq_reset() with its own q->q.qlen == 0, but its class's leaf
qdisc->q.qlen > 0. Therefore, the qfq_reset would wrongly deactivate
an inactive aggregate and trigger a null-deref in qfq_deactivate_agg:

[    0.903172] BUG: kernel NULL pointer dereference, address: 0000000000000000
[    0.903571] #PF: supervisor write access in kernel mode
[    0.903860] #PF: error_code(0x0002) - not-present page
[    0.904177] PGD 10299b067 P4D 10299b067 PUD 10299c067 PMD 0
[    0.904502] Oops: Oops: 0002 [#1] SMP NOPTI
[    0.904737] CPU: 0 UID: 0 PID: 135 Comm: exploit Not tainted 6.19.0-rc3+ #2 NONE
[    0.905157] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
[    0.905754] RIP: 0010:qfq_deactivate_agg (include/linux/list.h:992 (discriminator 2) include/linux/list.h:1006 (discriminator 2) net/sched/sch_qfq.c:1367 (discriminator 2) net/sched/sch_qfq.c:1393 (discriminator 2))
[    0.906046] Code: 0f 84 4d 01 00 00 48 89 70 18 8b 4b 10 48 c7 c2 ff ff ff ff 48 8b 78 08 48 d3 e2 48 21 f2 48 2b 13 48 8b 30 48 d3 ea 8b 4b 18 0

Code starting with the faulting instruction
===========================================
   0:	0f 84 4d 01 00 00    	je     0x153
   6:	48 89 70 18          	mov    %rsi,0x18(%rax)
   a:	8b 4b 10             	mov    0x10(%rbx),%ecx
   d:	48 c7 c2 ff ff ff ff 	mov    $0xffffffffffffffff,%rdx
  14:	48 8b 78 08          	mov    0x8(%rax),%rdi
  18:	48 d3 e2             	shl    %cl,%rdx
  1b:	48 21 f2             	and    %rsi,%rdx
  1e:	48 2b 13             	sub    (%rbx),%rdx
  21:	48 8b 30             	mov    (%rax),%rsi
  24:	48 d3 ea             	shr    %cl,%rdx
  27:	8b 4b 18             	mov    0x18(%rbx),%ecx
	...
[    0.907095] RSP: 0018:ffffc900004a39a0 EFLAGS: 00010246
[    0.907368] RAX: ffff8881043a0880 RBX: ffff888102953340 RCX: 0000000000000000
[    0.907723] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[    0.908100] RBP: ffff888102952180 R08: 0000000000000000 R09: 0000000000000000
[    0.908451] R10: ffff8881043a0000 R11: 0000000000000000 R12: ffff888102952000
[    0.908804] R13: ffff888102952180 R14: ffff8881043a0ad8 R15: ffff8881043a0880
[    0.909179] FS:  000000002a1a0380(0000) GS:ffff888196d8d000(0000) knlGS:0000000000000000
[    0.909572] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.909857] CR2: 0000000000000000 CR3: 0000000102993002 CR4: 0000000000772ef0
[    0.910247] PKRU: 55555554
[    0.910391] Call Trace:
[    0.910527]  <TASK>
[    0.910638]  qfq_reset_qdisc (net/sched/sch_qfq.c:357 net/sched/sch_qfq.c:1485)
[    0.910826]  qdisc_reset (include/linux/skbuff.h:2195 include/linux/skbuff.h:2501 include/linux/skbuff.h:3424 include/linux/skbuff.h:3430 net/sched/sch_generic.c:1036)
[    0.911040]  __qdisc_destroy (net/sched/sch_generic.c:1076)
[    0.911236]  tc_new_tfilter (net/sched/cls_api.c:2447)
[    0.911447]  rtnetlink_rcv_msg (net/core/rtnetlink.c:6958)
[    0.911663]  ? __pfx_rtnetlink_rcv_msg (net/core/rtnetlink.c:6861)
[    0.911894]  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
[    0.912100]  netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
[    0.912296]  ? __alloc_skb (net/core/skbuff.c:706)
[    0.912484]  netlink_sendmsg (net/netlink/af_netlink.c:1894)
[    0.912682]  sock_write_iter (net/socket.c:727 (discriminator 1) net/socket.c:742 (discriminator 1) net/socket.c:1195 (discriminator 1))
[    0.912880]  vfs_write (fs/read_write.c:593 fs/read_write.c:686)
[    0.913077]  ksys_write (fs/read_write.c:738)
[    0.913252]  do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1))
[    0.913438]  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:131)
[    0.913687] RIP: 0033:0x424c34
[    0.913844] Code: 89 02 48 c7 c0 ff ff ff ff eb bd 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d 2d 44 09 00 00 74 13 b8 01 00 00 00 0f 05 9

Code starting with the faulting instruction
===========================================
   0:	89 02                	mov    %eax,(%rdx)
   2:	48 c7 c0 ff ff ff ff 	mov    $0xffffffffffffffff,%rax
   9:	eb bd                	jmp    0xffffffffffffffc8
   b:	66 2e 0f 1f 84 00 00 	cs nopw 0x0(%rax,%rax,1)
  12:	00 00 00
  15:	90                   	nop
  16:	f3 0f 1e fa          	endbr64
  1a:	80 3d 2d 44 09 00 00 	cmpb   $0x0,0x9442d(%rip)        # 0x9444e
  21:	74 13                	je     0x36
  23:	b8 01 00 00 00       	mov    $0x1,%eax
  28:	0f 05                	syscall
  2a:	09                   	.byte 0x9
[    0.914807] RSP: 002b:00007ffea1938b78 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
[    0.915197] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000424c34
[    0.915556] RDX: 000000000000003c RSI: 000000002af378c0 RDI: 0000000000000003
[    0.915912] RBP: 00007ffea1938bc0 R08: 00000000004b8820 R09: 0000000000000000
[    0.916297] R10: 0000000000000001 R11: 0000000000000202 R12: 00007ffea1938d28
[    0.916652] R13: 00007ffea1938d38 R14: 00000000004b3828 R15: 0000000000000001
[    0.917039]  </TASK>
[    0.917158] Modules linked in:
[    0.917316] CR2: 0000000000000000
[    0.917484] ---[ end trace 0000000000000000 ]---
[    0.917717] RIP: 0010:qfq_deactivate_agg (include/linux/list.h:992 (discriminator 2) include/linux/list.h:1006 (discriminator 2) net/sched/sch_qfq.c:1367 (discriminator 2) net/sched/sch_qfq.c:1393 (discriminator 2))
[    0.917978] Code: 0f 84 4d 01 00 00 48 89 70 18 8b 4b 10 48 c7 c2 ff ff ff ff 48 8b 78 08 48 d3 e2 48 21 f2 48 2b 13 48 8b 30 48 d3 ea 8b 4b 18 0

Code starting with the faulting instruction
===========================================
   0:	0f 84 4d 01 00 00    	je     0x153
   6:	48 89 70 18          	mov    %rsi,0x18(%rax)
   a:	8b 4b 10             	mov    0x10(%rbx),%ecx
   d:	48 c7 c2 ff ff ff ff 	mov    $0xffffffffffffffff,%rdx
  14:	48 8b 78 08          	mov    0x8(%rax),%rdi
  18:	48 d3 e2             	shl    %cl,%rdx
  1b:	48 21 f2             	and    %rsi,%rdx
  1e:	48 2b 13             	sub    (%rbx),%rdx
  21:	48 8b 30             	mov    (%rax),%rsi
  24:	48 d3 ea             	shr    %cl,%rdx
  27:	8b 4b 18             	mov    0x18(%rbx),%ecx
	...
[    0.918902] RSP: 0018:ffffc900004a39a0 EFLAGS: 00010246
[    0.919198] RAX: ffff8881043a0880 RBX: ffff888102953340 RCX: 0000000000000000
[    0.919559] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[    0.919908] RBP: ffff888102952180 R08: 0000000000000000 R09: 0000000000000000
[    0.920289] R10: ffff8881043a0000 R11: 0000000000000000 R12: ffff888102952000
[    0.920648] R13: ffff888102952180 R14: ffff8881043a0ad8 R15: ffff8881043a0880
[    0.921014] FS:  000000002a1a0380(0000) GS:ffff888196d8d000(0000) knlGS:0000000000000000
[    0.921424] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.921710] CR2: 0000000000000000 CR3: 0000000102993002 CR4: 0000000000772ef0
[    0.922097] PKRU: 55555554
[    0.922240] Kernel panic - not syncing: Fatal exception
[    0.922590] Kernel Offset: disabled

Fixes: 0545a3037773 ("pkt_sched: QFQ - quick fair queue scheduler")
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Link: https://patch.msgid.link/20260106034100.1780779-1-xmei5@asu.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_qfq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d920f57dc6d7..f4013b547438 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1481,7 +1481,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
-			if (cl->qdisc->q.qlen > 0)
+			if (cl_is_active(cl))
 				qfq_deactivate_class(q, cl);
 
 			qdisc_reset(cl->qdisc);

From a4e305ed60f7c41bbf9aabc16dd75267194e0de3 Mon Sep 17 00:00:00 2001
From: Thomas Fourier <fourier.thomas@gmail.com>
Date: Tue, 6 Jan 2026 10:47:21 +0100
Subject: [PATCH 63/70] net: 3com: 3c59x: fix possible null dereference in
 vortex_probe1()

pdev can be null and free_ring: can be called in 1297 with a null
pdev.

Fixes: 55c82617c3e8 ("3c59x: convert to generic DMA API")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
Link: https://patch.msgid.link/20260106094731.25819-2-fourier.thomas@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/3com/3c59x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 8c9cc97efd4e..4fe4efdb3737 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1473,7 +1473,7 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
 		return 0;
 
 free_ring:
-	dma_free_coherent(&pdev->dev,
+	dma_free_coherent(gendev,
 		sizeof(struct boom_rx_desc) * RX_RING_SIZE +
 		sizeof(struct boom_tx_desc) * TX_RING_SIZE,
 		vp->rx_ring, vp->rx_ring_dma);

From afa27621a28af317523e0836dad430bec551eb54 Mon Sep 17 00:00:00 2001
From: Petko Manolov <petkan@nucleusys.com>
Date: Tue, 6 Jan 2026 10:48:21 +0200
Subject: [PATCH 64/70] net: usb: pegasus: fix memory leak in
 update_eth_regs_async()

When asynchronously writing to the device registers and if usb_submit_urb()
fail, the code fail to release allocated to this point resources.

Fixes: 323b34963d11 ("drivers: net: usb: pegasus: fix control urb submission")
Signed-off-by: Petko Manolov <petkan@nucleusys.com>
Link: https://patch.msgid.link/20260106084821.3746677-1-petko.manolov@konsulko.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/pegasus.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 81ca64debc5b..c514483134f0 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -168,6 +168,8 @@ static int update_eth_regs_async(pegasus_t *pegasus)
 			netif_device_detach(pegasus->net);
 		netif_err(pegasus, drv, pegasus->net,
 			  "%s returned %d\n", __func__, ret);
+		usb_free_urb(async_urb);
+		kfree(req);
 	}
 	return ret;
 }

From 3358995b1a7f9dcb52a56ec8251570d71024dad0 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Tue, 6 Jan 2026 06:31:14 -0800
Subject: [PATCH 65/70] bnxt_en: Fix NULL pointer crash in bnxt_ptp_enable
 during error cleanup

When bnxt_init_one() fails during initialization (e.g.,
bnxt_init_int_mode returns -ENODEV), the error path calls
bnxt_free_hwrm_resources() which destroys the DMA pool and sets
bp->hwrm_dma_pool to NULL. Subsequently, bnxt_ptp_clear() is called,
which invokes ptp_clock_unregister().

Since commit a60fc3294a37 ("ptp: rework ptp_clock_unregister() to
disable events"), ptp_clock_unregister() now calls
ptp_disable_all_events(), which in turn invokes the driver's .enable()
callback (bnxt_ptp_enable()) to disable PTP events before completing the
unregistration.

bnxt_ptp_enable() attempts to send HWRM commands via bnxt_ptp_cfg_pin()
and bnxt_ptp_cfg_event(), both of which call hwrm_req_init(). This
function tries to allocate from bp->hwrm_dma_pool, causing a NULL
pointer dereference:

  bnxt_en 0000:01:00.0 (unnamed net_device) (uninitialized): bnxt_init_int_mode err: ffffffed
  KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
  Call Trace:
   __hwrm_req_init (drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c:72)
   bnxt_ptp_enable (drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:323 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:517)
   ptp_disable_all_events (drivers/ptp/ptp_chardev.c:66)
   ptp_clock_unregister (drivers/ptp/ptp_clock.c:518)
   bnxt_ptp_clear (drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:1134)
   bnxt_init_one (drivers/net/ethernet/broadcom/bnxt/bnxt.c:16889)

Lines are against commit f8f9c1f4d0c7 ("Linux 6.19-rc3")

Fix this by clearing and unregistering ptp (bnxt_ptp_clear()) before
freeing HWRM resources.

Suggested-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Fixes: a60fc3294a37 ("ptp: rework ptp_clock_unregister() to disable events")
Cc: stable@vger.kernel.org
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://patch.msgid.link/20260106-bnxt-v3-1-71f37e11446a@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d160e54ac121..8419d1eb4035 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -16891,12 +16891,12 @@ init_err_dl:
 
 init_err_pci_clean:
 	bnxt_hwrm_func_drv_unrgtr(bp);
-	bnxt_free_hwrm_resources(bp);
-	bnxt_hwmon_uninit(bp);
-	bnxt_ethtool_free(bp);
 	bnxt_ptp_clear(bp);
 	kfree(bp->ptp_cfg);
 	bp->ptp_cfg = NULL;
+	bnxt_free_hwrm_resources(bp);
+	bnxt_hwmon_uninit(bp);
+	bnxt_ethtool_free(bp);
 	kfree(bp->fw_health);
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);

From 7d11e047eda5f98514ae62507065ac961981c025 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 6 Jan 2026 10:05:46 -0500
Subject: [PATCH 66/70] net: do not write to msg_get_inq in callee

NULL pointer dereference fix.

msg_get_inq is an input field from caller to callee. Don't set it in
the callee, as the caller may not clear it on struct reuse.

This is a kernel-internal variant of msghdr only, and the only user
does reinitialize the field. So this is not critical for that reason.
But it is more robust to avoid the write, and slightly simpler code.
And it fixes a bug, see below.

Callers set msg_get_inq to request the input queue length to be
returned in msg_inq. This is equivalent to but independent from the
SO_INQ request to return that same info as a cmsg (tp->recvmsg_inq).
To reduce branching in the hot path the second also sets the msg_inq.
That is WAI.

This is a fix to commit 4d1442979e4a ("af_unix: don't post cmsg for
SO_INQ unless explicitly asked for"), which fixed the inverse.

Also avoid NULL pointer dereference in unix_stream_read_generic if
state->msg is NULL and msg->msg_get_inq is written. A NULL state->msg
can happen when splicing as of commit 2b514574f7e8 ("net: af_unix:
implement splice for stream af_unix sockets").

Also collapse two branches using a bitwise or.

Cc: stable@vger.kernel.org
Fixes: 4d1442979e4a ("af_unix: don't post cmsg for SO_INQ unless explicitly asked for")
Link: https://lore.kernel.org/netdev/willemdebruijn.kernel.24d8030f7a3de@gmail.com/
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260106150626.3944363-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp.c     | 8 +++-----
 net/unix/af_unix.c | 8 +++-----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f035440c475a..d5319ebe2452 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2652,10 +2652,8 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
 	if (sk->sk_state == TCP_LISTEN)
 		goto out;
 
-	if (tp->recvmsg_inq) {
+	if (tp->recvmsg_inq)
 		*cmsg_flags = TCP_CMSG_INQ;
-		msg->msg_get_inq = 1;
-	}
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	/* Urgent data needs to be handled specially. */
@@ -2929,10 +2927,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 	ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
 	release_sock(sk);
 
-	if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
+	if ((cmsg_flags | msg->msg_get_inq) && ret >= 0) {
 		if (cmsg_flags & TCP_CMSG_TS)
 			tcp_recv_timestamp(msg, sk, &tss);
-		if (msg->msg_get_inq) {
+		if ((cmsg_flags & TCP_CMSG_INQ) | msg->msg_get_inq) {
 			msg->msg_inq = tcp_inq_hint(sk);
 			if (cmsg_flags & TCP_CMSG_INQ)
 				put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a7ca74653d94..d0511225799b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2904,7 +2904,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
 	unsigned int last_len;
 	struct unix_sock *u;
 	int copied = 0;
-	bool do_cmsg;
 	int err = 0;
 	long timeo;
 	int target;
@@ -2930,9 +2929,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
 
 	u = unix_sk(sk);
 
-	do_cmsg = READ_ONCE(u->recvmsg_inq);
-	if (do_cmsg)
-		msg->msg_get_inq = 1;
 redo:
 	/* Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
@@ -3090,9 +3086,11 @@ unlock:
 
 	mutex_unlock(&u->iolock);
 	if (msg) {
+		bool do_cmsg = READ_ONCE(u->recvmsg_inq);
+
 		scm_recv_unix(sock, msg, &scm, flags);
 
-		if (msg->msg_get_inq && (copied ?: err) >= 0) {
+		if ((do_cmsg | msg->msg_get_inq) && (copied ?: err) >= 0) {
 			msg->msg_inq = READ_ONCE(u->inq_len);
 			if (do_cmsg)
 				put_cmsg(msg, SOL_SOCKET, SCM_INQ,

From 790792ebc9603a7ccbf6996cb537d89607e3a75b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 6 Jan 2026 08:34:26 -0800
Subject: [PATCH 67/70] tools: ynl: don't install tests

make's install target is meant for installing the production
artifacts, AFAIU. Don't install test_ynl_cli and test_ynl_ethtool
from under the main YNL install target. The install target
under tests/ is retained in case someone wants the tests
to be installed.

Fixes: 308b7dee3e5c ("tools: ynl: add YNL test framework")
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://patch.msgid.link/20260106163426.1468943-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index 7736b492f559..c2f3e8b3f2ac 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -51,7 +51,6 @@ install: libynl.a lib/*.h
 	@echo -e "\tINSTALL pyynl"
 	@pip install --prefix=$(DESTDIR)$(prefix) .
 	@make -C generated install
-	@make -C tests install
 
 run_tests:
 	@$(MAKE) -C tests run_tests

From 4d984b0574ff708e66152763fbfdef24ea40933f Mon Sep 17 00:00:00 2001
From: Thomas Fourier <fourier.thomas@gmail.com>
Date: Wed, 7 Jan 2026 10:01:36 +0100
Subject: [PATCH 68/70] atm: Fix dma_free_coherent() size

The size of the buffer is not the same when alloc'd with
dma_alloc_coherent() in he_init_tpdrq() and freed.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Fourier <fourier.thomas@gmail.com>
Link: https://patch.msgid.link/20260107090141.80900-2-fourier.thomas@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/atm/he.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index ad91cc6a34fc..92a041d5387b 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1587,7 +1587,8 @@ he_stop(struct he_dev *he_dev)
 				  he_dev->tbrq_base, he_dev->tbrq_phys);
 
 	if (he_dev->tpdrq_base)
-		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+		dma_free_coherent(&he_dev->pci_dev->dev,
+				  CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
 				  he_dev->tpdrq_base, he_dev->tpdrq_phys);
 
 	dma_pool_destroy(he_dev->tpd_pool);

From 4b5bdabb5449b652122e43f507f73789041d4abe Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Wed, 7 Jan 2026 17:12:04 +0800
Subject: [PATCH 69/70] net: enetc: fix build warning when PAGE_SIZE is greater
 than 128K

The max buffer size of ENETC RX BD is 0xFFFF bytes, so if the PAGE_SIZE
is greater than 128K, ENETC_RXB_DMA_SIZE and ENETC_RXB_DMA_SIZE_XDP will
be greater than 0xFFFF, thus causing a build warning.

This will not cause any practical issues because ENETC is currently only
used on the ARM64 platform, and the max PAGE_SIZE is 64K. So this patch
is only for fixing the build warning that occurs when compiling ENETC
drivers for other platforms.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202601050637.kHEKKOG7-lkp@intel.com/
Fixes: e59bc32df2e9 ("net: enetc: correct the value of ENETC_RXB_TRUESIZE")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260107091204.1980222-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/enetc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index dce27bd67a7d..aecd40aeef9c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -79,9 +79,9 @@ struct enetc_lso_t {
 #define ENETC_RXB_TRUESIZE	(PAGE_SIZE >> 1)
 #define ENETC_RXB_PAD		NET_SKB_PAD /* add extra space if needed */
 #define ENETC_RXB_DMA_SIZE	\
-	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD)
+	min(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD, 0xffff)
 #define ENETC_RXB_DMA_SIZE_XDP	\
-	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM)
+	min(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM, 0xffff)
 
 struct enetc_rx_swbd {
 	dma_addr_t dma;

From c92510f5e3f82ba11c95991824a41e59a9c5ed81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 7 Jan 2026 21:22:50 +0000
Subject: [PATCH 70/70] arp: do not assume dev_hard_header() does not change
 skb->head

arp_create() is the only dev_hard_header() caller
making assumption about skb->head being unchanged.

A recent commit broke this assumption.

Initialize @arp pointer after dev_hard_header() call.

Fixes: db5b4e39c4e6 ("ip6_gre: make ip6gre_header() robust")
Reported-by: syzbot+58b44a770a1585795351@syzkaller.appspotmail.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260107212250.384552-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/arp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7f3863daaa40..c8c3e1713c0e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -564,7 +564,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
-	arp = skb_put(skb, arp_hdr_len(dev));
+	skb_put(skb, arp_hdr_len(dev));
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_ARP);
 	if (!src_hw)
@@ -572,12 +572,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 	if (!dest_hw)
 		dest_hw = dev->broadcast;
 
-	/*
-	 *	Fill the device header for the ARP frame
+	/* Fill the device header for the ARP frame.
+	 * Note: skb->head can be changed.
 	 */
 	if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0)
 		goto out;
 
+	arp = arp_hdr(skb);
 	/*
 	 * Fill out the arp protocol part.
 	 *