Merge branch 'team-fix-reference-count-leak-when-changing-port-netns'

Ido Schimmel says:

====================
team: Fix reference count leak when changing port netns

Patch #1 fixes a reference count leak that was reported by syzkaller.
The leak happens when a net device that is member in a team is changing
netns. The fix is to align the team driver with the bond driver and have
it suppress NETDEV_CHANGEMTU events for a net device that is being
unregistered.

Without this change, the NETDEV_CHANGEMTU event causes inetdev_event()
to recreate an inet device for this net device in its original netns,
after it was previously destroyed upon NETDEV_UNREGISTER. Later on, when
inetdev_event() receives a NETDEV_REGISTER event for this net device in
the new nents, it simply leaks the reference:

case NETDEV_REGISTER:
        pr_debug("%s: bug\n", __func__);
        RCU_INIT_POINTER(dev->ip_ptr, NULL);
        break;

addrconf_notify() handles this differently and reuses the existing inet6
device if one exists when a NETDEV_REGISTER event is received. This
creates a different problem where it is possible for a net device to
reference an inet6 device that was created in a previous netns.

A more generic fix that we can try in net-next is to revert the changes
in the bond and team drivers and instead have IPv4 and IPv6 destroy and
recreate an inet device if one already exists upon NETDEV_REGISTER.

Patch #2 adds a selftest that passes with the fix and hangs without it.
====================

Link: https://patch.msgid.link/20260224125709.317574-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-02-25 19:17:12 -08:00
commit 77da71283c
3 changed files with 39 additions and 5 deletions

View file

@ -1292,7 +1292,7 @@ err_set_mtu:
static void __team_port_change_port_removed(struct team_port *port);
static int team_port_del(struct team *team, struct net_device *port_dev)
static int team_port_del(struct team *team, struct net_device *port_dev, bool unregister)
{
struct net_device *dev = team->dev;
struct team_port *port;
@ -1330,7 +1330,13 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
__team_port_change_port_removed(port);
team_port_set_orig_dev_addr(port);
dev_set_mtu(port_dev, port->orig.mtu);
if (unregister) {
netdev_lock_ops(port_dev);
__netif_set_mtu(port_dev, port->orig.mtu);
netdev_unlock_ops(port_dev);
} else {
dev_set_mtu(port_dev, port->orig.mtu);
}
kfree_rcu(port, rcu);
netdev_info(dev, "Port device %s removed\n", portname);
netdev_compute_master_upper_features(team->dev, true);
@ -1634,7 +1640,7 @@ static void team_uninit(struct net_device *dev)
ASSERT_RTNL();
list_for_each_entry_safe(port, tmp, &team->port_list, list)
team_port_del(team, port->dev);
team_port_del(team, port->dev, false);
__team_change_mode(team, NULL); /* cleanup */
__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
@ -1933,7 +1939,16 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
ASSERT_RTNL();
return team_port_del(team, port_dev);
return team_port_del(team, port_dev, false);
}
static int team_del_slave_on_unregister(struct net_device *dev, struct net_device *port_dev)
{
struct team *team = netdev_priv(dev);
ASSERT_RTNL();
return team_port_del(team, port_dev, true);
}
static netdev_features_t team_fix_features(struct net_device *dev,
@ -2926,7 +2941,7 @@ static int team_device_event(struct notifier_block *unused,
!!netif_oper_up(port->dev));
break;
case NETDEV_UNREGISTER:
team_del_slave(port->team->dev, dev);
team_del_slave_on_unregister(port->team->dev, dev);
break;
case NETDEV_FEAT_CHANGE:
if (!port->team->notifier_ctx) {
@ -2999,3 +3014,4 @@ MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
MODULE_DESCRIPTION("Ethernet team device driver");
MODULE_ALIAS_RTNL_LINK(DRV_NAME);
MODULE_IMPORT_NS("NETDEV_INTERNAL");

View file

@ -5,6 +5,7 @@ TEST_PROGS := \
dev_addr_lists.sh \
options.sh \
propagation.sh \
refleak.sh \
# end of TEST_PROGS
TEST_INCLUDES := \

View file

@ -0,0 +1,17 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# shellcheck disable=SC2154
lib_dir=$(dirname "$0")
source "$lib_dir"/../../../net/lib.sh
trap cleanup_all_ns EXIT
# Test that there is no reference count leak and that dummy1 can be deleted.
# https://lore.kernel.org/netdev/4d69abe1-ca8d-4f0b-bcf8-13899b211e57@I-love.SAKURA.ne.jp/
setup_ns ns1 ns2
ip -n "$ns1" link add name team1 type team
ip -n "$ns1" link add name dummy1 mtu 1499 type dummy
ip -n "$ns1" link set dev dummy1 master team1
ip -n "$ns1" link set dev dummy1 netns "$ns2"
ip -n "$ns2" link del dev dummy1