Merge branch 'net-ethtool-support-including-flow-label-in-the-flow-hash-for-rss'

Jakub Kicinski says:

====================
net: ethtool: support including Flow Label in the flow hash for RSS

Add support for using IPv6 Flow Label in Rx hash computation
and therefore RSS queue selection.

v3: https://lore.kernel.org/20250724015101.186608-1-kuba@kernel.org
v2:  https://lore.kernel.org/20250722014915.3365370-1-kuba@kernel.org
RFC: https://lore.kernel.org/20250609173442.1745856-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250811234212.580748-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2025-08-14 11:40:21 +02:00
commit 875c541ea6
11 changed files with 233 additions and 18 deletions

View file

@ -204,6 +204,9 @@ definitions:
doc: dst port in case of TCP/UDP/SCTP
-
name: gtp-teid
-
name: ip6-fl
doc: IPv6 Flow Label
-
name: discard
value: 31

View file

@ -6957,6 +6957,8 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP;
if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP)
bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP;
if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP)
bp->rss_cap |= BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP;
if (flags & VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP)
bp->fw_cap |= BNXT_FW_CAP_VNIC_RE_FLUSH;
}

View file

@ -2407,6 +2407,7 @@ struct bnxt {
#define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6)
#define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7)
#define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8)
#define BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP BIT(9)
u8 rss_hash_key[HW_HASH_KEY_SIZE];
u8 rss_hash_key_valid:1;

View file

@ -1584,6 +1584,8 @@ static u64 get_ethtool_ipv6_rss(struct bnxt *bp)
{
if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6)
return RXH_IP_SRC | RXH_IP_DST;
if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL)
return RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL;
return 0;
}
@ -1662,13 +1664,18 @@ static int bnxt_set_rxfh_fields(struct net_device *dev,
if (cmd->data == RXH_4TUPLE)
tuple = 4;
else if (cmd->data == RXH_2TUPLE)
else if (cmd->data == RXH_2TUPLE ||
cmd->data == (RXH_2TUPLE | RXH_IP6_FL))
tuple = 2;
else if (!cmd->data)
tuple = 0;
else
return -EINVAL;
if (cmd->data & RXH_IP6_FL &&
!(bp->rss_cap & BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP))
return -EINVAL;
if (cmd->flow_type == TCP_V4_FLOW) {
rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4;
if (tuple == 4)
@ -1732,10 +1739,15 @@ static int bnxt_set_rxfh_fields(struct net_device *dev,
case AH_V6_FLOW:
case ESP_V6_FLOW:
case IPV6_FLOW:
if (tuple == 2)
rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL);
if (!tuple)
break;
if (cmd->data & RXH_IP6_FL)
rss_hash_cfg |=
VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL;
else if (tuple == 2)
rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
else if (!tuple)
rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
break;
}

View file

@ -1310,7 +1310,7 @@ fbnic_get_rss_hash_opts(struct net_device *netdev,
#define FBNIC_L2_HASH_OPTIONS \
(RXH_L2DA | RXH_DISCARD)
#define FBNIC_L3_HASH_OPTIONS \
(FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST)
(FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL)
#define FBNIC_L4_HASH_OPTIONS \
(FBNIC_L3_HASH_OPTIONS | RXH_L4_B_0_1 | RXH_L4_B_2_3)

View file

@ -71,6 +71,8 @@ u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type)
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP_DST, IP_DST, flow_hash);
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_0_1, L4_SRC, flow_hash);
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_2_3, L4_DST, flow_hash);
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, OV6_FL_LBL, flow_hash);
rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, IV6_FL_LBL, flow_hash);
return rss_en_mask;
}

View file

@ -2380,6 +2380,7 @@ enum {
#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */
#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */
#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */
#define RXH_IP6_FL (1 << 9) /* IPv6 flow label */
#define RXH_DISCARD (1 << 31)
#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL

View file

@ -1014,6 +1014,28 @@ static bool flow_type_hashable(u32 flow_type)
return false;
}
static bool flow_type_v6(u32 flow_type)
{
switch (flow_type) {
case TCP_V6_FLOW:
case UDP_V6_FLOW:
case SCTP_V6_FLOW:
case AH_ESP_V6_FLOW:
case AH_V6_FLOW:
case ESP_V6_FLOW:
case IPV6_FLOW:
case GTPU_V6_FLOW:
case GTPC_V6_FLOW:
case GTPC_TEID_V6_FLOW:
case GTPU_EH_V6_FLOW:
case GTPU_UL_V6_FLOW:
case GTPU_DL_V6_FLOW:
return true;
}
return false;
}
/* When adding a new type, update the assert and, if it's hashable, add it to
* the flow_type_hashable switch case.
*/
@ -1077,6 +1099,9 @@ ethtool_set_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr)
if (rc)
return rc;
if (info.data & RXH_IP6_FL && !flow_type_v6(info.flow_type))
return -EINVAL;
if (info.flow_type & FLOW_RSS && info.rss_context &&
!ops->rxfh_per_ctx_fields)
return -EINVAL;

View file

@ -536,35 +536,36 @@ void ethtool_rss_notify(struct net_device *dev, u32 type, u32 rss_context)
#define RFH_MASK (RXH_L2DA | RXH_VLAN | RXH_IP_SRC | RXH_IP_DST | \
RXH_L3_PROTO | RXH_L4_B_0_1 | RXH_L4_B_2_3 | \
RXH_GTP_TEID | RXH_DISCARD)
#define RFH_MASKv6 (RFH_MASK | RXH_IP6_FL)
static const struct nla_policy ethnl_rss_flows_policy[] = {
[ETHTOOL_A_FLOW_ETHER] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_IP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_TCP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_UDP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_SCTP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_AH_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_AH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPC4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPC_TEID4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU_EH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU_UL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
[ETHTOOL_A_FLOW_GTPU_DL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
[ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6),
};
const struct nla_policy ethnl_rss_set_policy[ETHTOOL_A_RSS_FLOW_HASH + 1] = {

View file

@ -18,6 +18,7 @@ TEST_PROGS = \
pp_alloc_fail.py \
rss_api.py \
rss_ctx.py \
rss_flow_label.py \
rss_input_xfrm.py \
tso.py \
xsk_reconfig.py \

View file

@ -0,0 +1,167 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""
Tests for RSS hashing on IPv6 Flow Label.
"""
import glob
import os
import socket
from lib.py import CmdExitFailure
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \
ksft_not_in, ksft_raises, KsftSkipEx
from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port
from lib.py import NetDrvEpEnv
def _check_system(cfg):
if not hasattr(socket, "SO_INCOMING_CPU"):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
if qcnt < 2:
raise KsftSkipEx(f"Local has only {qcnt} queues")
for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt",
f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]:
try:
with open(f, 'r') as fp:
setting = fp.read().strip()
# CPU mask will be zeros and commas
if setting.replace("0", "").replace(",", ""):
raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}")
except FileNotFoundError:
pass
# 1 is the default, if someone changed it we probably shouldn"t mess with it
af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout
if af.strip() != "1":
raise KsftSkipEx("Remote does not have auto_flowlabels enabled")
def _ethtool_get_cfg(cfg, fl_type):
descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
converter = {
"IP SA": "s",
"IP DA": "d",
"L3 proto": "t",
"L4 bytes 0 & 1 [TCP/UDP src port]": "f",
"L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
"IPv6 Flow Label": "l",
}
ret = ""
for line in descr.split("\n")[1:-2]:
# if this raises we probably need to add more keys to converter above
ret += converter[line]
return ret
def _traffic(cfg, one_sock, one_cpu):
local_port = rand_port(socket.SOCK_DGRAM)
remote_port = rand_port(socket.SOCK_DGRAM)
sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
sock.bind(("", local_port))
sock.connect((cfg.remote_addr_v["6"], 0))
if one_sock:
send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \
"for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-"
else:
send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \
f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done"
cpus = set()
with bkg(send, shell=True, host=cfg.remote, exit_wait=True):
for _ in range(20):
fd_read_timeout(sock.fileno(), 1)
cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
cpus.add(cpu)
if one_cpu:
ksft_eq(len(cpus), 1,
f"{one_sock=} - expected one CPU, got traffic on: {cpus=}")
else:
ksft_ge(len(cpus), 2,
f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}")
def test_rss_flow_label(cfg):
"""
Test hashing on IPv6 flow label. Send traffic over a single socket
and over multiple sockets. Depend on the remote having auto-label
enabled so that it randomizes the label per socket.
"""
cfg.require_ipver("6")
cfg.require_cmd("socat", remote=True)
_check_system(cfg)
# Enable flow label hashing for UDP6
initial = _ethtool_get_cfg(cfg, "udp6")
no_lbl = initial.replace("l", "")
if "l" not in initial:
try:
cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
except CmdExitFailure as exc:
raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
_traffic(cfg, one_sock=True, one_cpu=True)
_traffic(cfg, one_sock=False, one_cpu=False)
# Disable it, we should see no hashing (reset was already defer()ed)
cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
_traffic(cfg, one_sock=False, one_cpu=True)
def _check_v4_flow_types(cfg):
for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]:
try:
cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
ksft_not_in("Flow Label", cur,
comment=f"{fl_type=} has Flow Label:" + cur)
except CmdExitFailure:
# Probably does not support this flow type
pass
def test_rss_flow_label_6only(cfg):
"""
Test interactions with IPv4 flow types. It should not be possible to set
IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also
not appear in the IPv4 "current config".
"""
with ksft_raises(CmdExitFailure) as cm:
cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl")
ksft_in("Invalid argument", cm.exception.cmd.stderr)
_check_v4_flow_types(cfg)
# Try to enable Flow Labels and check again, in case it leaks thru
initial = _ethtool_get_cfg(cfg, "udp6")
changed = initial.replace("l", "") if "l" in initial else initial + "l"
cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
_check_v4_flow_types(cfg)
restore.exec()
_check_v4_flow_types(cfg)
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
ksft_run([test_rss_flow_label,
test_rss_flow_label_6only],
args=(cfg, ))
ksft_exit()
if __name__ == "__main__":
main()