Merge branch 'ipvlan-addrs_lock-made-per-port'

Dmitry Skorodumov says:

====================
ipvlan: addrs_lock made per port

First patch fixes a rather minor issues that sometimes
ipvlan-addrs are modified without lock (because
for IPv6 addr can be sometimes added without RTNL)
====================

Link: https://patch.msgid.link/20260112142417.4039566-1-skorodumov.dmitry@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-01-19 10:03:33 -08:00
commit 7b8e1a807c
6 changed files with 208 additions and 30 deletions

View file

@ -69,7 +69,6 @@ struct ipvl_dev {
DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
netdev_features_t sfeatures;
u32 msg_enable;
spinlock_t addrs_lock;
};
struct ipvl_addr {
@ -90,6 +89,7 @@ struct ipvl_port {
struct net_device *dev;
possible_net_t pnet;
struct hlist_head hlhead[IPVLAN_HASH_SIZE];
spinlock_t addrs_lock; /* guards hash-table and addrs */
struct list_head ipvlans;
u16 mode;
u16 flags;

View file

@ -107,17 +107,15 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
const void *iaddr, bool is_v6)
{
struct ipvl_addr *addr, *ret = NULL;
struct ipvl_addr *addr;
rcu_read_lock();
list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
if (addr_equal(is_v6, addr, iaddr)) {
ret = addr;
break;
}
assert_spin_locked(&ipvlan->port->addrs_lock);
list_for_each_entry(addr, &ipvlan->addrs, anode) {
if (addr_equal(is_v6, addr, iaddr))
return addr;
}
rcu_read_unlock();
return ret;
return NULL;
}
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)

View file

@ -75,6 +75,7 @@ static int ipvlan_port_create(struct net_device *dev)
for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
INIT_HLIST_HEAD(&port->hlhead[idx]);
spin_lock_init(&port->addrs_lock);
skb_queue_head_init(&port->backlog);
INIT_WORK(&port->wq, ipvlan_process_multicast);
ida_init(&port->ida);
@ -181,6 +182,7 @@ static void ipvlan_uninit(struct net_device *dev)
static int ipvlan_open(struct net_device *dev)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_port *port = ipvlan->port;
struct ipvl_addr *addr;
if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
@ -189,10 +191,10 @@ static int ipvlan_open(struct net_device *dev)
else
dev->flags &= ~IFF_NOARP;
rcu_read_lock();
list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
spin_lock_bh(&port->addrs_lock);
list_for_each_entry(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_add(ipvlan, addr);
rcu_read_unlock();
spin_unlock_bh(&port->addrs_lock);
return 0;
}
@ -206,10 +208,10 @@ static int ipvlan_stop(struct net_device *dev)
dev_uc_unsync(phy_dev, dev);
dev_mc_unsync(phy_dev, dev);
rcu_read_lock();
list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
spin_lock_bh(&ipvlan->port->addrs_lock);
list_for_each_entry(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
rcu_read_unlock();
spin_unlock_bh(&ipvlan->port->addrs_lock);
return 0;
}
@ -579,7 +581,6 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
if (!tb[IFLA_MTU])
ipvlan_adjust_mtu(ipvlan, phy_dev);
INIT_LIST_HEAD(&ipvlan->addrs);
spin_lock_init(&ipvlan->addrs_lock);
/* TODO Probably put random address here to be presented to the
* world but keep using the physical-dev address for the outgoing
@ -657,13 +658,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_addr *addr, *next;
spin_lock_bh(&ipvlan->addrs_lock);
spin_lock_bh(&ipvlan->port->addrs_lock);
list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
ipvlan_ht_addr_del(addr);
list_del_rcu(&addr->anode);
kfree_rcu(addr, rcu);
}
spin_unlock_bh(&ipvlan->addrs_lock);
spin_unlock_bh(&ipvlan->port->addrs_lock);
ida_free(&ipvlan->port->ida, dev->dev_id);
list_del_rcu(&ipvlan->pnode);
@ -817,6 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
assert_spin_locked(&ipvlan->port->addrs_lock);
addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
if (!addr)
return -ENOMEM;
@ -847,16 +850,16 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
spin_lock_bh(&ipvlan->addrs_lock);
spin_lock_bh(&ipvlan->port->addrs_lock);
addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
if (!addr) {
spin_unlock_bh(&ipvlan->addrs_lock);
spin_unlock_bh(&ipvlan->port->addrs_lock);
return;
}
ipvlan_ht_addr_del(addr);
list_del_rcu(&addr->anode);
spin_unlock_bh(&ipvlan->addrs_lock);
spin_unlock_bh(&ipvlan->port->addrs_lock);
kfree_rcu(addr, rcu);
}
@ -878,14 +881,14 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
{
int ret = -EINVAL;
spin_lock_bh(&ipvlan->addrs_lock);
spin_lock_bh(&ipvlan->port->addrs_lock);
if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv6=%pI6c addr for %s intf\n",
ip6_addr, ipvlan->dev->name);
else
ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
spin_unlock_bh(&ipvlan->addrs_lock);
spin_unlock_bh(&ipvlan->port->addrs_lock);
return ret;
}
@ -924,21 +927,24 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
int ret = NOTIFY_OK;
if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_UP:
spin_lock_bh(&ipvlan->port->addrs_lock);
if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
NL_SET_ERR_MSG(i6vi->extack,
"Address already assigned to an ipvlan device");
return notifier_from_errno(-EADDRINUSE);
ret = notifier_from_errno(-EADDRINUSE);
}
spin_unlock_bh(&ipvlan->port->addrs_lock);
break;
}
return NOTIFY_OK;
return ret;
}
#endif
@ -946,14 +952,14 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
{
int ret = -EINVAL;
spin_lock_bh(&ipvlan->addrs_lock);
spin_lock_bh(&ipvlan->port->addrs_lock);
if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv4=%pI4 on %s intf.\n",
ip4_addr, ipvlan->dev->name);
else
ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
spin_unlock_bh(&ipvlan->addrs_lock);
spin_unlock_bh(&ipvlan->port->addrs_lock);
return ret;
}
@ -995,21 +1001,24 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused,
struct in_validator_info *ivi = (struct in_validator_info *)ptr;
struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
int ret = NOTIFY_OK;
if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_UP:
spin_lock_bh(&ipvlan->port->addrs_lock);
if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
NL_SET_ERR_MSG(ivi->extack,
"Address already assigned to an ipvlan device");
return notifier_from_errno(-EADDRINUSE);
ret = notifier_from_errno(-EADDRINUSE);
}
spin_unlock_bh(&ipvlan->port->addrs_lock);
break;
}
return NOTIFY_OK;
return ret;
}
static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {

View file

@ -48,6 +48,7 @@ TEST_PROGS := \
ipv6_flowlabel.sh \
ipv6_force_forwarding.sh \
ipv6_route_update_soft_lockup.sh \
ipvtap_test.sh \
l2_tos_ttl_inherit.sh \
l2tp.sh \
link_netns.py \

View file

@ -48,6 +48,7 @@ CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPV6_SIT=y
CONFIG_IPV6_VTI=y
CONFIG_IPVLAN=m
CONFIG_IPVTAP=m
CONFIG_KALLSYMS=y
CONFIG_L2TP=m
CONFIG_L2TP_ETH=m
@ -116,6 +117,7 @@ CONFIG_PROC_SYSCTL=y
CONFIG_PSAMPLE=m
CONFIG_RPS=y
CONFIG_SYSFS=y
CONFIG_TAP=m
CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_TEST_BPF=m

View file

@ -0,0 +1,168 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Simple tests for ipvtap
#
# The testing environment looks this way:
#
# |------HNS-------| |------PHY-------|
# | veth<----------------->veth |
# |------|--|------| |----------------|
# | |
# | | |-----TST0-------|
# | |------------|----ipvlan |
# | |----------------|
# |
# | |-----TST1-------|
# |---------------|----ipvlan |
# |----------------|
#
ALL_TESTS="
test_ip_set
"
source lib.sh
DEBUG=0
VETH_HOST=vethtst.h
VETH_PHY=vethtst.p
NS_COUNT=32
IP_ITERATIONS=1024
IPSET_TIMEOUT="60s"
ns_run() {
ns=$1
shift
if [[ "$ns" == "global" ]]; then
"$@" >/dev/null
else
ip netns exec "$ns" "$@" >/dev/null
fi
}
test_ip_setup_env() {
setup_ns NS_PHY
setup_ns HST_NS
# setup simulated other-host (phy) and host itself
ns_run "$HST_NS" ip link add $VETH_HOST type veth peer name $VETH_PHY \
netns "$NS_PHY" >/dev/null
ns_run "$HST_NS" ip link set $VETH_HOST up
ns_run "$NS_PHY" ip link set $VETH_PHY up
for ((i=0; i<NS_COUNT; i++)); do
setup_ns ipvlan_ns_$i
ns="ipvlan_ns_$i"
if [ "$DEBUG" = "1" ]; then
echo "created NS ${!ns}"
fi
if ! ns_run "$HST_NS" ip link add netns ${!ns} ipvlan0 \
link $VETH_HOST \
type ipvtap mode l2 bridge; then
exit_error "FAIL: Failed to configure ipvlan link."
fi
done
}
test_ip_cleanup_env() {
ns_run "$HST_NS" ip link del $VETH_HOST
cleanup_all_ns
}
exit_error() {
echo "$1"
exit $ksft_fail
}
rnd() {
echo $(( RANDOM % 32 + 16 ))
}
test_ip_set_thread() {
# Here we are trying to create some IP conflicts between namespaces.
# If just add/remove IP, nothing interesting will happen.
# But if add random IP and then remove random IP,
# eventually conflicts start to apear.
ip link set ipvlan0 up
for ((i=0; i<IP_ITERATIONS; i++)); do
v=$(rnd)
ip a a "172.25.0.$v/24" dev ipvlan0 2>/dev/null
ip a a "fc00::$v/64" dev ipvlan0 2>/dev/null
v=$(rnd)
ip a d "172.25.0.$v/24" dev ipvlan0 2>/dev/null
ip a d "fc00::$v/64" dev ipvlan0 2>/dev/null
done
}
test_ip_set() {
RET=0
trap test_ip_cleanup_env EXIT
test_ip_setup_env
declare -A ns_pids
for ((i=0; i<NS_COUNT; i++)); do
ns="ipvlan_ns_$i"
ns_run ${!ns} timeout "$IPSET_TIMEOUT" \
bash -c "$0 test_ip_set_thread"&
ns_pids[$i]=$!
done
for ((i=0; i<NS_COUNT; i++)); do
wait "${ns_pids[$i]}"
done
declare -A all_ips
for ((i=0; i<NS_COUNT; i++)); do
ns="ipvlan_ns_$i"
ip_output=$(ip netns exec ${!ns} ip a l dev ipvlan0 | grep inet)
while IFS= read -r nsip_out; do
if [[ -z $nsip_out ]]; then
continue;
fi
nsip=$(awk '{print $2}' <<< "$nsip_out")
if [[ -v all_ips[$nsip] ]]; then
RET=$ksft_fail
log_test "conflict for $nsip"
return "$RET"
else
all_ips[$nsip]=$i
fi
done <<< "$ip_output"
done
if [ "$DEBUG" = "1" ]; then
for key in "${!all_ips[@]}"; do
echo "$key: ${all_ips[$key]}"
done
fi
trap - EXIT
test_ip_cleanup_env
log_test "test multithreaded ip set"
}
if [[ "$1" == "-d" ]]; then
DEBUG=1
shift
fi
if [[ "$1" == "-t" ]]; then
shift
TESTS="$*"
fi
if [[ "$1" == "test_ip_set_thread" ]]; then
test_ip_set_thread
else
require_command ip
tests_run
fi