From f947d9e77b26238b821b5227afb4fee8c7ea0d5a Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Mon, 12 Jan 2026 09:39:03 +1000 Subject: [PATCH 001/828] nvme/host: fixup some typos Fix up some minor typos in the nvme host driver and a comment style to conform to the standard kernel style. Signed-off-by: Wilfred Mallawa Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 69cb04406b47..74cbbf48a981 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -25,7 +25,8 @@ struct nvme_tcp_queue; -/* Define the socket priority to use for connections were it is desirable +/* + * Define the socket priority to use for connections where it is desirable * that the NIC consider performing optimized packet processing or filtering. * A non-zero value being sufficient to indicate general consideration of any * possible optimization. Making it a module param allows for alternative @@ -926,7 +927,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, req->curr_bio = req->curr_bio->bi_next; /* - * If we don`t have any bios it means that controller + * If we don't have any bios it means the controller * sent more data than we requested, hence error */ if (!req->curr_bio) { From ddfb8b322bbd8ae996f4ac0192f0190feb0a01ce Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 3 Nov 2025 15:44:06 +0100 Subject: [PATCH 002/828] nvme: expose active quirks in sysfs Currently, there is no straightforward way for a user to inspect which quirks are active for a given device from userspace. Add a new "quirks" sysfs attribute to the nvme controller device. Reading this file will display a human-readable list of all active quirks, with each quirk name on a new line. If no quirks are active, it will display "none". Tested-by: John Meneghini Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Chaitanya Kulkarni Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 54 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/sysfs.c | 23 +++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9a5f28c5103c..523015ae2add 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -180,6 +180,60 @@ enum nvme_quirks { NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22), }; +static inline char *nvme_quirk_name(enum nvme_quirks q) +{ + switch (q) { + case NVME_QUIRK_STRIPE_SIZE: + return "stripe_size"; + case NVME_QUIRK_IDENTIFY_CNS: + return "identify_cns"; + case NVME_QUIRK_DEALLOCATE_ZEROES: + return "deallocate_zeroes"; + case NVME_QUIRK_DELAY_BEFORE_CHK_RDY: + return "delay_before_chk_rdy"; + case NVME_QUIRK_NO_APST: + return "no_apst"; + case NVME_QUIRK_NO_DEEPEST_PS: + return "no_deepest_ps"; + case NVME_QUIRK_QDEPTH_ONE: + return "qdepth_one"; + case NVME_QUIRK_MEDIUM_PRIO_SQ: + return "medium_prio_sq"; + case NVME_QUIRK_IGNORE_DEV_SUBNQN: + return "ignore_dev_subnqn"; + case NVME_QUIRK_DISABLE_WRITE_ZEROES: + return "disable_write_zeroes"; + case NVME_QUIRK_SIMPLE_SUSPEND: + return "simple_suspend"; + case NVME_QUIRK_SINGLE_VECTOR: + return "single_vector"; + case NVME_QUIRK_128_BYTES_SQES: + return "128_bytes_sqes"; + case NVME_QUIRK_SHARED_TAGS: + return "shared_tags"; + case NVME_QUIRK_NO_TEMP_THRESH_CHANGE: + return "no_temp_thresh_change"; + case NVME_QUIRK_NO_NS_DESC_LIST: + return "no_ns_desc_list"; + case NVME_QUIRK_DMA_ADDRESS_BITS_48: + return "dma_address_bits_48"; + case NVME_QUIRK_SKIP_CID_GEN: + return "skip_cid_gen"; + case NVME_QUIRK_BOGUS_NID: + return "bogus_nid"; + case NVME_QUIRK_NO_SECONDARY_TEMP_THRESH: + return "no_secondary_temp_thresh"; + case NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND: + return "force_no_simple_suspend"; + case NVME_QUIRK_BROKEN_MSI: + return "broken_msi"; + case NVME_QUIRK_DMAPOOL_ALIGN_512: + return "dmapool_align_512"; + } + + return "unknown"; +} + /* * Common request structure for NVMe passthrough. All drivers must have * this structure as the first member of their request-private data. diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 29430949ce2f..16c6fea4b2db 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -601,6 +601,28 @@ static ssize_t dctype_show(struct device *dev, } static DEVICE_ATTR_RO(dctype); +static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int count = 0, i; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + unsigned long quirks = ctrl->quirks; + + if (!quirks) + return sysfs_emit(buf, "none\n"); + + for (i = 0; quirks; ++i) { + if (quirks & 1) { + count += sysfs_emit_at(buf, count, "%s\n", + nvme_quirk_name(BIT(i))); + } + quirks >>= 1; + } + + return count; +} +static DEVICE_ATTR_RO(quirks); + #ifdef CONFIG_NVME_HOST_AUTH static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -742,6 +764,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_kato.attr, &dev_attr_cntrltype.attr, &dev_attr_dctype.attr, + &dev_attr_quirks.attr, #ifdef CONFIG_NVME_HOST_AUTH &dev_attr_dhchap_secret.attr, &dev_attr_dhchap_ctrl_secret.attr, From ac30cd304347f2daeece6998bb5f0ae2db64e397 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 28 Jan 2026 08:26:23 +0000 Subject: [PATCH 003/828] nvme: stop using AWUPF As described at [0], much of the atomic write parts of the specification are lacking. For now, there is nothing which we can do in software about the lack of a dedicated NVMe write atomic command. As for reading the atomic write limits, it is felt that the per-namespace values are mostly properly specified and it is assumed that they are properly implemented. The specification of NAWUPF is quite clear. However the specification of NABSPF is less clear. The lack of clarity in NABSPF comes from deciding whether NABSPF applies when NSABP is 0 - it is assumed that NSABPF does not apply when NSABP is 0. As for the per-controller AWUPF, how this value applies to shared namespaces is missing in the specification. Furthermore, the value is in terms of logical blocks, which is an NS entity. Since AWUPF is so poorly defined, stop using it already together. Hopefully this will force vendors to implement NAWUPF support always. Note that AWUPF not only effects atomic write support, but also the physical block size reported for the device. To help users know this restriction, log an info message per NS. [0] https://lore.kernel.org/linux-nvme/20250707141834.GA30198@lst.de/ Tested-by: Nilay Shroff Reviewed-by: Nilay Shroff Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 14 +++++--------- drivers/nvme/host/nvme.h | 3 ++- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7bf228df6001..83efc88ac0f9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2045,14 +2045,10 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns, if (id->nabspf) boundary = (le16_to_cpu(id->nabspf) + 1) * bs; } else { - /* - * Use the controller wide atomic write unit. This sucks - * because the limit is defined in terms of logical blocks while - * namespaces can have different formats, and because there is - * no clear language in the specification prohibiting different - * values for different controllers in the subsystem. - */ - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + if (ns->ctrl->awupf) + dev_info_once(ns->ctrl->device, + "AWUPF ignored, only NAWUPF accepted\n"); + atomic_bs = bs; } lim->atomic_write_hw_max = atomic_bs; @@ -3221,7 +3217,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->model, id->mn, sizeof(subsys->model)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; - subsys->awupf = le16_to_cpu(id->awupf); /* Versions prior to 1.4 don't necessarily report a valid type */ if (id->cntrltype == NVME_CTRL_DISC || @@ -3654,6 +3649,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); + ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 523015ae2add..9971045dbc05 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -464,6 +464,8 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; + + u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -496,7 +498,6 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; - u16 awupf; /* 0's based value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; From 1799d8abeabc68ec05679292aaf6cba93b343c05 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 27 Jan 2026 19:38:44 +0800 Subject: [PATCH 004/828] xfrm6: fix uninitialized saddr in xfrm6_get_saddr() xfrm6_get_saddr() does not check the return value of ipv6_dev_get_saddr(). When ipv6_dev_get_saddr() fails to find a suitable source address (returns -EADDRNOTAVAIL), saddr->in6 is left uninitialized, but xfrm6_get_saddr() still returns 0 (success). This causes the caller xfrm_tmpl_resolve_one() to use the uninitialized address in xfrm_state_find(), triggering KMSAN warning: ===================================================== BUG: KMSAN: uninit-value in xfrm_state_find+0x2424/0xa940 xfrm_state_find+0x2424/0xa940 xfrm_resolve_and_create_bundle+0x906/0x5a20 xfrm_lookup_with_ifid+0xcc0/0x3770 xfrm_lookup_route+0x63/0x2b0 ip_route_output_flow+0x1ce/0x270 udp_sendmsg+0x2ce1/0x3400 inet_sendmsg+0x1ef/0x2a0 __sock_sendmsg+0x278/0x3d0 __sys_sendto+0x593/0x720 __x64_sys_sendto+0x130/0x200 x64_sys_call+0x332b/0x3e70 do_syscall_64+0xd3/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable tmp.i.i created at: xfrm_resolve_and_create_bundle+0x3e3/0x5a20 xfrm_lookup_with_ifid+0xcc0/0x3770 ===================================================== Fix by checking the return value of ipv6_dev_get_saddr() and propagating the error. Fixes: a1e59abf8249 ("[XFRM]: Fix wildcard as tunnel source") Reported-by: syzbot+e136d86d34b42399a8b1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68bf1024.a70a0220.7a912.02c2.GAE@google.com/T/ Signed-off-by: Jiayuan Chen Signed-off-by: Jiayuan Chen Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 1f19b6f14484..125ea9a5b8a0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -57,6 +57,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; + int err; dst = xfrm6_dst_lookup(params); if (IS_ERR(dst)) @@ -68,9 +69,11 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, return -EHOSTUNREACH; } dev = idev->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, - &saddr->in6); + err = ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, + &saddr->in6); dst_release(dst); + if (err) + return -EHOSTUNREACH; return 0; } From 0a4524bc69882a4ddb235bb6b279597721bda197 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jan 2026 14:49:23 +0200 Subject: [PATCH 005/828] xfrm: skip templates check for packet offload tunnel mode In packet offload, hardware is responsible to check templates. The result of its operation is forwarded through secpath by relevant drivers. That secpath is actually removed in __xfrm_policy_check2(). In case packet is forwarded, this secpath is reset in RX, but pushed again to TX where policy is rechecked again against dummy secpath in xfrm_policy_ok(). Such situation causes to unexpected XfrmInTmplMismatch increase. As a solution, simply skip template mismatch check. Fixes: 600258d555f0 ("xfrm: delete intermediate secpath entry in packet offload mode") Signed-off-by: Leon Romanovsky Reviewed-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 62486f866975..5428185196a1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3801,8 +3801,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; + int i, k = 0; int ti = 0; - int i, k; sp = skb_sec_path(skb); if (!sp) @@ -3828,6 +3828,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp = stp; } + if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET && sp == &dummy) + /* This policy template was already checked by HW + * and secpath was removed in __xfrm_policy_check2. + */ + goto out; + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement @@ -3837,7 +3843,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * verified to allow them to be skipped in future policy * checks (e.g. nested tunnels). */ - for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + for (i = xfrm_nr - 1; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); if (k < 0) { if (k < -1) @@ -3853,6 +3859,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, goto reject; } +out: xfrm_pols_put(pols, npols); sp->verified_cnt = k; From 0a1fc2f301529ac75aec0ce80d5ab9d9e4dc4b16 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Sat, 31 Jan 2026 19:08:40 -0800 Subject: [PATCH 006/828] nvme-fabrics: use kfree_sensitive() for DHCHAP secrets The DHCHAP secrets (dhchap_secret and dhchap_ctrl_secret) contain authentication key material for NVMe-oF. Use kfree_sensitive() instead of kfree() in nvmf_free_options() to ensure secrets are zeroed before the memory is freed, preventing recovery from freed pages. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Hodges Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 55a8afd2efd5..d37cb140d832 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1290,8 +1290,8 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts) kfree(opts->subsysnqn); kfree(opts->host_traddr); kfree(opts->host_iface); - kfree(opts->dhchap_secret); - kfree(opts->dhchap_ctrl_secret); + kfree_sensitive(opts->dhchap_secret); + kfree_sensitive(opts->dhchap_ctrl_secret); kfree(opts); } EXPORT_SYMBOL_GPL(nvmf_free_options); From b84bb7bd913d8ca2f976ee6faf4a174f91c02b8d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 31 Jan 2026 22:48:08 +0800 Subject: [PATCH 007/828] nvme: fix admin queue leak on controller reset When nvme_alloc_admin_tag_set() is called during a controller reset, a previous admin queue may still exist. Release it properly before allocating a new one to avoid orphaning the old queue. This fixes a regression introduced by commit 03b3bcd319b3 ("nvme: fix admin request_queue lifetime"). Cc: Keith Busch Fixes: 03b3bcd319b3 ("nvme: fix admin request_queue lifetime"). Reported-and-tested-by: Yi Zhang Closes: https://lore.kernel.org/linux-block/CAHj4cs9wv3SdPo+N01Fw2SHBYDs9tj2M_e1-GdQOkRy=DsBB1w@mail.gmail.com/ Signed-off-by: Ming Lei Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 83efc88ac0f9..c12986495e71 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4860,6 +4860,13 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ret) return ret; + /* + * If a previous admin queue exists (e.g., from before a reset), + * put it now before allocating a new one to avoid orphaning it. + */ + if (ctrl->admin_q) + blk_put_queue(ctrl->admin_q); + ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); if (IS_ERR(ctrl->admin_q)) { ret = PTR_ERR(ctrl->admin_q); From 594c11d0e1d445f580898a2b8c850f2e3f099368 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 27 Jan 2026 07:22:35 -0600 Subject: [PATCH 008/828] ipmi: Fix use-after-free and list corruption on sender error The analysis from Breno: When the SMI sender returns an error, smi_work() delivers an error response but then jumps back to restart without cleaning up properly: 1. intf->curr_msg is not cleared, so no new message is pulled 2. newmsg still points to the message, causing sender() to be called again with the same message 3. If sender() fails again, deliver_err_response() is called with the same recv_msg that was already queued for delivery This causes list_add corruption ("list_add double add") because the recv_msg is added to the user_msgs list twice. Subsequently, the corrupted list leads to use-after-free when the memory is freed and reused, and eventually a NULL pointer dereference when accessing recv_msg->done. The buggy sequence: sender() fails -> deliver_err_response(recv_msg) // recv_msg queued for delivery -> goto restart // curr_msg not cleared! sender() fails again (same message!) -> deliver_err_response(recv_msg) // tries to queue same recv_msg -> LIST CORRUPTION Fix this by freeing the message and setting it to NULL on a send error. Also, always free the newmsg on a send error, otherwise it will leak. Reported-by: Breno Leitao Closes: https://lore.kernel.org/lkml/20260127-ipmi-v1-0-ba5cc90f516f@debian.org/ Fixes: 9cf93a8fa9513 ("ipmi: Allow an SMI sender to return an error") Cc: stable@vger.kernel.org # 4.18 Reviewed-by: Breno Leitao Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 3f48fc6ab596..a590a67294e2 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4852,8 +4852,15 @@ restart: if (newmsg->recv_msg) deliver_err_response(intf, newmsg->recv_msg, cc); - else - ipmi_free_smi_msg(newmsg); + if (!run_to_completion) + spin_lock_irqsave(&intf->xmit_msgs_lock, + flags); + intf->curr_msg = NULL; + if (!run_to_completion) + spin_unlock_irqrestore(&intf->xmit_msgs_lock, + flags); + ipmi_free_smi_msg(newmsg); + newmsg = NULL; goto restart; } } From 1d90e6c1a56f6ab83e5c9d30ded19e7ac8155713 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 27 Jan 2026 07:35:02 -0600 Subject: [PATCH 009/828] ipmi: Consolidate the run to completion checking for xmit msgs lock It made things hard to read, move the check to a function. Signed-off-by: Corey Minyard Reviewed-by: Breno Leitao --- drivers/char/ipmi/ipmi_msghandler.c | 42 ++++++++++++++++------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a590a67294e2..a042b1596933 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -602,6 +602,22 @@ static int __ipmi_bmc_register(struct ipmi_smi *intf, static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id, bool rescan); +static void ipmi_lock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion, + unsigned long *flags) +{ + if (run_to_completion) + return; + spin_lock_irqsave(&intf->xmit_msgs_lock, *flags); +} + +static void ipmi_unlock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion, + unsigned long *flags) +{ + if (run_to_completion) + return; + spin_unlock_irqrestore(&intf->xmit_msgs_lock, *flags); +} + static void free_ipmi_user(struct kref *ref) { struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); @@ -1878,11 +1894,9 @@ static void smi_send(struct ipmi_smi *intf, int run_to_completion = READ_ONCE(intf->run_to_completion); unsigned long flags = 0; - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); smi_msg = smi_add_send_msg(intf, smi_msg, priority); - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (smi_msg) handlers->sender(intf->send_info, smi_msg); @@ -4826,8 +4840,7 @@ static void smi_work(struct work_struct *t) * message delivery. */ restart: - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); if (intf->curr_msg == NULL && !intf->in_shutdown) { struct list_head *entry = NULL; @@ -4843,8 +4856,7 @@ restart: intf->curr_msg = newmsg; } } - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (newmsg) { cc = intf->handlers->sender(intf->send_info, newmsg); @@ -4852,13 +4864,9 @@ restart: if (newmsg->recv_msg) deliver_err_response(intf, newmsg->recv_msg, cc); - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, - flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); intf->curr_msg = NULL; - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, - flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); ipmi_free_smi_msg(newmsg); newmsg = NULL; goto restart; @@ -4928,16 +4936,14 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf, spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock, flags); - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); /* * We can get an asynchronous event or receive message in addition * to commands we send. */ if (msg == intf->curr_msg) intf->curr_msg = NULL; - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (run_to_completion) smi_work(&intf->smi_work); From 9f235ccecd03c436cb1683eac16b12f119e54aa9 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 13 Jan 2026 17:41:34 +0800 Subject: [PATCH 010/828] ipmi: ipmb: initialise event handler read bytes IPMB doesn't use i2c reads, but the handler needs to set a value. Otherwise an i2c read will return an uninitialised value from the bus driver. Fixes: 63c4eb347164 ("ipmi:ipmb: Add initial support for IPMI over IPMB") Signed-off-by: Matt Johnston Message-ID: <20260113-ipmb-read-init-v1-1-a9cbce7b94e3@codeconstruct.com.au> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ipmb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/ipmi/ipmi_ipmb.c b/drivers/char/ipmi/ipmi_ipmb.c index 3a51e58b2487..28818952a7a4 100644 --- a/drivers/char/ipmi/ipmi_ipmb.c +++ b/drivers/char/ipmi/ipmi_ipmb.c @@ -202,11 +202,16 @@ static int ipmi_ipmb_slave_cb(struct i2c_client *client, break; case I2C_SLAVE_READ_REQUESTED: + *val = 0xff; + ipmi_ipmb_check_msg_done(iidev); + break; + case I2C_SLAVE_STOP: ipmi_ipmb_check_msg_done(iidev); break; case I2C_SLAVE_READ_PROCESSED: + *val = 0xff; break; } From 6b157b408d0c7d125e4d7c62e11e7d9376a5d150 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 16 Jan 2026 17:22:01 -0600 Subject: [PATCH 011/828] ipmi:ls2k: Make ipmi_ls2k_platform_driver static No need for it to be global. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601170753.3zDBerGP-lkp@intel.com/ Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_ls2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_ls2k.c b/drivers/char/ipmi/ipmi_si_ls2k.c index 45442c257efd..4c1da80f256c 100644 --- a/drivers/char/ipmi/ipmi_si_ls2k.c +++ b/drivers/char/ipmi/ipmi_si_ls2k.c @@ -168,7 +168,7 @@ static void ipmi_ls2k_remove(struct platform_device *pdev) ipmi_si_remove_by_dev(&pdev->dev); } -struct platform_driver ipmi_ls2k_platform_driver = { +static struct platform_driver ipmi_ls2k_platform_driver = { .driver = { .name = "ls2k-ipmi-si", }, From 7bb8c40f5ad88392bbabb719ebfd5e3354ce0428 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 4 Feb 2026 11:55:55 +0100 Subject: [PATCH 012/828] nvme: add support for dynamic quirk configuration via module parameter Introduce support for enabling or disabling specific NVMe quirks at module load time through the `quirks` module parameter. This mechanism allows users to apply known quirks dynamically based on the device's PCI vendor and device IDs, without requiring to add hardcoded entries in the driver and recompiling the kernel. While the generic PCI new_id sysfs interface exists for dynamic configuration, it is insufficient for scenarios where the system fails to boot (for example, this has been reported to happen because of the bogus_nid quirk). The new_id attribute is writable only after the system has booted and sysfs is mounted. The `quirks` parameter accepts a list of quirk specifications separated by a '-' character in the following format: ::[-::-..] Each quirk is represented by its name and can be prefixed with `^` to indicate that the quirk should be disabled; quirk names are separated by a ',' character. Example: enable BOGUS_NID and BROKEN_MSI, disable DEALLOCATE_ZEROES: $ modprobe nvme quirks=7170:2210:bogus_nid,broken_msi,^deallocate_zeroes Tested-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Maurizio Lombardi Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- .../admin-guide/kernel-parameters.txt | 13 ++ drivers/nvme/host/pci.c | 162 ++++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a8d0afde7f85..f0b286c2dfc1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -74,6 +74,7 @@ TPM TPM drivers are enabled. UMS USB Mass Storage support is enabled. USB USB support is enabled. + NVME NVMe support is enabled USBHID USB Human Interface Device support is enabled. V4L Video For Linux support is enabled. VGA The VGA console has been enabled. @@ -4671,6 +4672,18 @@ Kernel parameters This can be set from sysctl after boot. See Documentation/admin-guide/sysctl/vm.rst for details. + nvme.quirks= [NVME] A list of quirk entries to augment the built-in + nvme quirk list. List entries are separated by a + '-' character. + Each entry has the form VendorID:ProductID:quirk_names. + The IDs are 4-digits hex numbers and quirk_names is a + list of quirk names separated by commas. A quirk name + can be prefixed by '^', meaning that the specified + quirk must be disabled. + + Example: + nvme.quirks=7710:2267:bogus_nid,^identify_cns-9900:7711:broken_msi + ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver. See Documentation/core-api/debugging-via-ohci1394.rst for more info. diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9fc4a60280a0..bd884e294600 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -72,6 +72,13 @@ static_assert(MAX_PRP_RANGE / NVME_CTRL_PAGE_SIZE <= (1 /* prp1 */ + NVME_MAX_NR_DESCRIPTORS * PRPS_PER_PAGE)); +struct quirk_entry { + u16 vendor_id; + u16 dev_id; + u32 enabled_quirks; + u32 disabled_quirks; +}; + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0444); @@ -102,6 +109,142 @@ static unsigned int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096"); +static struct quirk_entry *nvme_pci_quirk_list; +static unsigned int nvme_pci_quirk_count; + +/* Helper to parse individual quirk names */ +static int nvme_parse_quirk_names(char *quirk_str, struct quirk_entry *entry) +{ + int i; + size_t field_len; + bool disabled, found; + char *p = quirk_str, *field; + + while ((field = strsep(&p, ",")) && *field) { + disabled = false; + found = false; + + if (*field == '^') { + /* Skip the '^' character */ + disabled = true; + field++; + } + + field_len = strlen(field); + for (i = 0; i < 32; i++) { + unsigned int bit = 1U << i; + char *q_name = nvme_quirk_name(bit); + size_t q_len = strlen(q_name); + + if (!strcmp(q_name, "unknown")) + break; + + if (!strcmp(q_name, field) && + q_len == field_len) { + if (disabled) + entry->disabled_quirks |= bit; + else + entry->enabled_quirks |= bit; + found = true; + break; + } + } + + if (!found) { + pr_err("nvme: unrecognized quirk %s\n", field); + return -EINVAL; + } + } + return 0; +} + +/* Helper to parse a single VID:DID:quirk_names entry */ +static int nvme_parse_quirk_entry(char *s, struct quirk_entry *entry) +{ + char *field; + + field = strsep(&s, ":"); + if (!field || kstrtou16(field, 16, &entry->vendor_id)) + return -EINVAL; + + field = strsep(&s, ":"); + if (!field || kstrtou16(field, 16, &entry->dev_id)) + return -EINVAL; + + field = strsep(&s, ":"); + if (!field) + return -EINVAL; + + return nvme_parse_quirk_names(field, entry); +} + +static int quirks_param_set(const char *value, const struct kernel_param *kp) +{ + int count, err, i; + struct quirk_entry *qlist; + char *field, *val, *sep_ptr; + + err = param_set_copystring(value, kp); + if (err) + return err; + + val = kstrdup(value, GFP_KERNEL); + if (!val) + return -ENOMEM; + + if (!*val) + goto out_free_val; + + count = 1; + for (i = 0; val[i]; i++) { + if (val[i] == '-') + count++; + } + + qlist = kcalloc(count, sizeof(*qlist), GFP_KERNEL); + if (!qlist) { + err = -ENOMEM; + goto out_free_val; + } + + i = 0; + sep_ptr = val; + while ((field = strsep(&sep_ptr, "-"))) { + if (nvme_parse_quirk_entry(field, &qlist[i])) { + pr_err("nvme: failed to parse quirk string %s\n", + value); + goto out_free_qlist; + } + + i++; + } + + nvme_pci_quirk_count = count; + nvme_pci_quirk_list = qlist; + goto out_free_val; + +out_free_qlist: + kfree(qlist); +out_free_val: + kfree(val); + return err; +} + +static char quirks_param[128]; +static const struct kernel_param_ops quirks_param_ops = { + .set = quirks_param_set, + .get = param_get_string, +}; + +static struct kparam_string quirks_param_string = { + .maxlen = sizeof(quirks_param), + .string = quirks_param, +}; + +module_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0444); +MODULE_PARM_DESC(quirks, "Enable/disable NVMe quirks by specifying " + "quirks=VID:DID:quirk_names"); + static int io_queue_count_set(const char *val, const struct kernel_param *kp) { unsigned int n; @@ -3439,12 +3582,25 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } +static struct quirk_entry *detect_dynamic_quirks(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < nvme_pci_quirk_count; i++) + if (pdev->vendor == nvme_pci_quirk_list[i].vendor_id && + pdev->device == nvme_pci_quirk_list[i].dev_id) + return &nvme_pci_quirk_list[i]; + + return NULL; +} + static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, const struct pci_device_id *id) { unsigned long quirks = id->driver_data; int node = dev_to_node(&pdev->dev); struct nvme_dev *dev; + struct quirk_entry *qentry; int ret = -ENOMEM; dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids), @@ -3476,6 +3632,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, "platform quirk: setting simple suspend\n"); quirks |= NVME_QUIRK_SIMPLE_SUSPEND; } + qentry = detect_dynamic_quirks(pdev); + if (qentry) { + quirks |= qentry->enabled_quirks; + quirks &= ~qentry->disabled_quirks; + } ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); if (ret) @@ -4074,6 +4235,7 @@ static int __init nvme_init(void) static void __exit nvme_exit(void) { + kfree(nvme_pci_quirk_list); pci_unregister_driver(&nvme_driver); flush_workqueue(nvme_wq); } From 211ecfaaef186ee5230a77d054cdec7fbfc6724a Mon Sep 17 00:00:00 2001 From: Brad Spengler Date: Wed, 7 Jan 2026 12:12:36 -0500 Subject: [PATCH 013/828] drm/vmwgfx: Fix invalid kref_put callback in vmw_bo_dirty_release The kref_put() call uses (void *)kvfree as the release callback, which is incorrect. kref_put() expects a function with signature void (*release)(struct kref *), but kvfree has signature void (*)(const void *). Calling through an incompatible function pointer is undefined behavior. The code only worked by accident because ref_count is the first member of vmw_bo_dirty, making the kref pointer equal to the struct pointer. Fix this by adding a proper release callback that uses container_of() to retrieve the containing structure before freeing. Fixes: c1962742ffff ("drm/vmwgfx: Use kref in vmw_bo_dirty") Signed-off-by: Brad Spengler Signed-off-by: Zack Rusin Cc: Ian Forbes Link: https://patch.msgid.link/20260107171236.3573118-1-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index fd4e76486f2d..45561bc1c9ef 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -260,6 +260,13 @@ out_no_dirty: return ret; } +static void vmw_bo_dirty_free(struct kref *kref) +{ + struct vmw_bo_dirty *dirty = container_of(kref, struct vmw_bo_dirty, ref_count); + + kvfree(dirty); +} + /** * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object * @vbo: The buffer object @@ -274,7 +281,7 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo) { struct vmw_bo_dirty *dirty = vbo->dirty; - if (dirty && kref_put(&dirty->ref_count, (void *)kvfree)) + if (dirty && kref_put(&dirty->ref_count, vmw_bo_dirty_free)) vbo->dirty = NULL; } From 922f9dec5d19df4cfbb7070275e5c131d10c80f3 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Fri, 9 Jan 2026 09:51:39 -0600 Subject: [PATCH 014/828] drm/vmwgfx: Set a unique ID for each submitted command buffer These IDs are logged by the Hypervisor when debug logging is enabled. Having the IDs in the log makes it much easier to see when command buffers start and finish. They can also be used by logging/tracing in the Guest to help correlate between Guest and Hypervisor logs. Signed-off-by: Ian Forbes Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20260109155139.3259493-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 94e8982f5616..1ee37690b940 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -105,6 +105,7 @@ struct vmw_cmdbuf_context { * @handle: DMA address handle for the command buffer space if @using_mob is * false. Immutable. * @size: The size of the command buffer space. Immutable. + * @id: Monotonically increasing ID of the last cmdbuf submitted. * @num_contexts: Number of contexts actually enabled. */ struct vmw_cmdbuf_man { @@ -132,6 +133,7 @@ struct vmw_cmdbuf_man { bool has_pool; dma_addr_t handle; size_t size; + u64 id; u32 num_contexts; }; @@ -303,6 +305,8 @@ static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header) struct vmw_cmdbuf_man *man = header->man; u32 val; + header->cb_header->id = man->id++; + val = upper_32_bits(header->handle); vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val); From 5023ca80f9589295cb60735016e39fc5cc714243 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Tue, 13 Jan 2026 11:53:57 -0600 Subject: [PATCH 015/828] drm/vmwgfx: Return the correct value in vmw_translate_ptr functions Before the referenced fixes these functions used a lookup function that returned a pointer. This was changed to another lookup function that returned an error code with the pointer becoming an out parameter. The error path when the lookup failed was not changed to reflect this change and the code continued to return the PTR_ERR of the now uninitialized pointer. This could cause the vmw_translate_ptr functions to return success when they actually failed causing further uninitialized and OOB accesses. Reported-by: Kuzey Arda Bulut Fixes: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Ian Forbes Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20260113175357.129285-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 3057f8baa7d2..e1f18020170a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1143,7 +1143,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); if (ret != 0) { drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); - return PTR_ERR(vmw_bo); + return ret; } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); @@ -1199,7 +1199,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); if (ret != 0) { drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); - return PTR_ERR(vmw_bo); + return ret; } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM); From 63059500b124254c2630b2f8c46cb3555e726f52 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 5 Feb 2026 17:11:15 +0000 Subject: [PATCH 016/828] nvme: stop setting namespace gendisk device driver data Since commit 1f4137e882c6 ("nvme: move passthrough logging attribute to head"), we stopped using the namespace to hold the passthrough logging enabled attribute. There is now nowhere now which looks up the gendisk dev driver data, so stop setting it. Incidentally, it would have been better to set this before adding the disk. Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c12986495e71..3a2126584a23 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4181,13 +4181,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) nvme_mpath_add_disk(ns, info->anagrpid); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); - /* - * Set ns->disk->device->driver_data to ns so we can access - * ns->head->passthru_err_log_enabled in - * nvme_io_passthru_err_log_enabled_[store | show](). - */ - dev_set_drvdata(disk_to_dev(ns->disk), ns); - return; out_cleanup_ns_from_list: From 3ddfbfbc78ac1d3d9e95098fb6a32b57b8a0dcae Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 5 Feb 2026 17:11:14 +0000 Subject: [PATCH 017/828] nvme: correct comment about nvme_ns_remove() The comment in nvme_mpath_remove_disk() references nvme_remove_ns(), which should be nvme_ns_remove(). Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 174027d1cc19..bfcc5904e6a2 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -1300,7 +1300,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) mutex_lock(&head->subsys->lock); /* * We are called when all paths have been removed, and at that point - * head->list is expected to be empty. However, nvme_remove_ns() and + * head->list is expected to be empty. However, nvme_ns_remove() and * nvme_init_ns_head() can run concurrently and so if head->delayed_ * removal_secs is configured, it is possible that by the time we reach * this point, head->list may no longer be empty. Therefore, we recheck From 52c9ee202edd21d0599ac3b5a6fe1da2a2f053e5 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 6 Feb 2026 09:59:32 -0600 Subject: [PATCH 018/828] ipmi:si: Handle waiting messages when BMC failure detected If a BMC failure is detected, the current message is returned with an error. However, if there was a waiting message, it would not be handled. Add a check for the waiting message after handling the current message. Suggested-by: Guenter Roeck Reported-by: Rafael J. Wysocki Closes: https://lore.kernel.org/linux-acpi/CAK8fFZ58fidGUCHi5WFX0uoTPzveUUDzT=k=AAm4yWo3bAuCFg@mail.gmail.com/ Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 5459ffdde8dc..ff159b1162b9 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -809,6 +809,12 @@ restart: */ return_hosed_msg(smi_info, IPMI_BUS_ERR); } + if (smi_info->waiting_msg != NULL) { + /* Also handle if there was a message waiting. */ + smi_info->curr_msg = smi_info->waiting_msg; + smi_info->waiting_msg = NULL; + return_hosed_msg(smi_info, IPMI_BUS_ERR); + } smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_HOSED); goto out; } From c3bb3295637cc9bf514f690941ca9a385bf30113 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 6 Feb 2026 10:33:52 -0600 Subject: [PATCH 019/828] ipmi:si: Use a long timeout when the BMC is misbehaving If the driver goes into HOSED state, don't reset the timeout to the short timeout in the timeout handler. Reported-by: Igor Raits Closes: https://lore.kernel.org/linux-acpi/CAK8fFZ58fidGUCHi5WFX0uoTPzveUUDzT=k=AAm4yWo3bAuCFg@mail.gmail.com/ Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index ff159b1162b9..0049e3792ba1 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1119,7 +1119,9 @@ static void smi_timeout(struct timer_list *t) * SI_USEC_PER_JIFFY); smi_result = smi_event_handler(smi_info, time_diff); - if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { + if (smi_info->si_state == SI_HOSED) { + timeout = jiffies + SI_TIMEOUT_HOSED; + } else if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { /* Running with interrupts, only do long timeouts. */ timeout = jiffies + SI_TIMEOUT_JIFFIES; smi_inc_stat(smi_info, long_timeouts); From 4efa91a28576054aae0e6dad9cba8fed8293aef8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 30 Jan 2026 19:42:47 +0900 Subject: [PATCH 020/828] xfrm: always flush state and policy upon NETDEV_UNREGISTER event syzbot is reporting that "struct xfrm_state" refcount is leaking. unregister_netdevice: waiting for netdevsim0 to become free. Usage count = 2 ref_tracker: netdev@ffff888052f24618 has 1/1 users at __netdev_tracker_alloc include/linux/netdevice.h:4400 [inline] netdev_tracker_alloc include/linux/netdevice.h:4412 [inline] xfrm_dev_state_add+0x3a5/0x1080 net/xfrm/xfrm_device.c:316 xfrm_state_construct net/xfrm/xfrm_user.c:986 [inline] xfrm_add_sa+0x34ff/0x5fa0 net/xfrm/xfrm_user.c:1022 xfrm_user_rcv_msg+0x58e/0xc00 net/xfrm/xfrm_user.c:3507 netlink_rcv_skb+0x158/0x420 net/netlink/af_netlink.c:2550 xfrm_netlink_rcv+0x71/0x90 net/xfrm/xfrm_user.c:3529 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] netlink_unicast+0x5aa/0x870 net/netlink/af_netlink.c:1344 netlink_sendmsg+0x8c8/0xdd0 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg net/socket.c:742 [inline] ____sys_sendmsg+0xa5d/0xc30 net/socket.c:2592 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2646 __sys_sendmsg+0x16d/0x220 net/socket.c:2678 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f This is because commit d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") implemented xfrm_dev_unregister() as no-op despite xfrm_dev_state_add() from xfrm_state_construct() acquires a reference to "struct net_device". I guess that that commit expected that NETDEV_DOWN event is fired before NETDEV_UNREGISTER event fires, and also assumed that xfrm_dev_state_add() is called only if (dev->features & NETIF_F_HW_ESP) != 0. Sabrina Dubroca identified steps to reproduce the same symptoms as below. echo 0 > /sys/bus/netdevsim/new_device dev=$(ls -1 /sys/bus/netdevsim/devices/netdevsim0/net/) ip xfrm state add src 192.168.13.1 dst 192.168.13.2 proto esp \ spi 0x1000 mode tunnel aead 'rfc4106(gcm(aes))' $key 128 \ offload crypto dev $dev dir out ethtool -K $dev esp-hw-offload off echo 0 > /sys/bus/netdevsim/del_device Like these steps indicate, the NETIF_F_HW_ESP bit can be cleared after xfrm_dev_state_add() acquired a reference to "struct net_device". Also, xfrm_dev_state_add() does not check for the NETIF_F_HW_ESP bit when acquiring a reference to "struct net_device". Commit 03891f820c21 ("xfrm: handle NETDEV_UNREGISTER for xfrm device") re-introduced the NETDEV_UNREGISTER event to xfrm_dev_event(), but that commit for unknown reason chose to share xfrm_dev_down() between the NETDEV_DOWN event and the NETDEV_UNREGISTER event. I guess that that commit missed the behavior in the previous paragraph. Therefore, we need to re-introduce xfrm_dev_unregister() in order to release the reference to "struct net_device" by unconditionally flushing state and policy. Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Cc: Sabrina Dubroca Signed-off-by: Tetsuo Handa Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 52ae0e034d29..550457e4c4f0 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -544,6 +544,14 @@ static int xfrm_dev_down(struct net_device *dev) return NOTIFY_DONE; } +static int xfrm_dev_unregister(struct net_device *dev) +{ + xfrm_dev_state_flush(dev_net(dev), dev, true); + xfrm_dev_policy_flush(dev_net(dev), dev, true); + + return NOTIFY_DONE; +} + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -556,8 +564,10 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void return xfrm_api_check(dev); case NETDEV_DOWN: - case NETDEV_UNREGISTER: return xfrm_dev_down(dev); + + case NETDEV_UNREGISTER: + return xfrm_dev_unregister(dev); } return NOTIFY_DONE; } From fd3634312a04f336dcbfb481060219f0cd320738 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 7 Feb 2026 14:27:05 +0100 Subject: [PATCH 021/828] debugobject: Make it work with deferred page initialization - again debugobjects uses __GFP_HIGH for allocations as it might be invoked within locked regions. That worked perfectly fine until v6.18. It still works correctly when deferred page initialization is disabled and works by chance when no page allocation is required before deferred page initialization has completed. Since v6.18 allocations w/o a reclaim flag cause new_slab() to end up in alloc_frozen_pages_nolock_noprof(), which returns early when deferred page initialization has not yet completed. As the deferred page initialization takes quite a while the debugobject pool is depleted and debugobjects are disabled. This can be worked around when PREEMPT_COUNT is enabled as that allows debugobjects to add __GFP_KSWAPD_RECLAIM to the GFP flags when the context is preemtible. When PREEMPT_COUNT is disabled the context is unknown and the reclaim bit can't be set because the caller might hold locks which might deadlock in the allocator. In preemptible context the reclaim bit is harmless and not a performance issue as that's usually invoked from slow path initialization context. That makes debugobjects depend on PREEMPT_COUNT || !DEFERRED_STRUCT_PAGE_INIT. Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Alexei Starovoitov Acked-by: Vlastimil Babka Link: https://patch.msgid.link/87pl6gznti.ffs@tglx --- lib/Kconfig.debug | 1 + lib/debugobjects.c | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba36939fda79..a874146ad828 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -723,6 +723,7 @@ source "mm/Kconfig.debug" config DEBUG_OBJECTS bool "Debug object operations" + depends on PREEMPT_COUNT || !DEFERRED_STRUCT_PAGE_INIT depends on DEBUG_KERNEL help If you say Y here, additional code will be inserted into the diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 89a1d6745dc2..12f50de85b62 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -398,9 +398,26 @@ static void fill_pool(void) atomic_inc(&cpus_allocating); while (pool_should_refill(&pool_global)) { + gfp_t gfp = __GFP_HIGH | __GFP_NOWARN; HLIST_HEAD(head); - if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN)) + /* + * Allow reclaim only in preemptible context and during + * early boot. If not preemptible, the caller might hold + * locks causing a deadlock in the allocator. + * + * If the reclaim flag is not set during early boot then + * allocations, which happen before deferred page + * initialization has completed, will fail. + * + * In preemptible context the flag is harmless and not a + * performance issue as that's usually invoked from slow + * path initialization context. + */ + if (preemptible() || system_state < SYSTEM_SCHEDULING) + gfp |= __GFP_KSWAPD_RECLAIM; + + if (!kmem_alloc_batch(&head, obj_cache, gfp)) break; guard(raw_spinlock_irqsave)(&pool_lock); From fef0e649f8b42bdffe4a916dd46e1b1e9ad2f207 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Fri, 30 Jan 2026 00:21:19 +0800 Subject: [PATCH 022/828] drm/logicvc: Fix device node reference leak in logicvc_drm_config_parse() The logicvc_drm_config_parse() function calls of_get_child_by_name() to find the "layers" node but fails to release the reference, leading to a device node reference leak. Fix this by using the __free(device_node) cleanup attribute to automatic release the reference when the variable goes out of scope. Fixes: efeeaefe9be5 ("drm: Add support for the LogiCVC display controller") Signed-off-by: Felix Gu Reviewed-by: Luca Ceresoli Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260130-logicvc_drm-v1-1-04366463750c@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/logicvc/logicvc_drm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c index 204b0fee55d0..bbebf4fc7f51 100644 --- a/drivers/gpu/drm/logicvc/logicvc_drm.c +++ b/drivers/gpu/drm/logicvc/logicvc_drm.c @@ -92,7 +92,6 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc) struct device *dev = drm_dev->dev; struct device_node *of_node = dev->of_node; struct logicvc_drm_config *config = &logicvc->config; - struct device_node *layers_node; int ret; logicvc_of_property_parse_bool(of_node, LOGICVC_OF_PROPERTY_DITHERING, @@ -128,7 +127,8 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc) if (ret) return ret; - layers_node = of_get_child_by_name(of_node, "layers"); + struct device_node *layers_node __free(device_node) = + of_get_child_by_name(of_node, "layers"); if (!layers_node) { drm_err(drm_dev, "Missing non-optional layers node\n"); return -EINVAL; From b777b5e09eabeefc6ba80f4296366a4742701103 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Feb 2026 17:02:25 +0000 Subject: [PATCH 023/828] time/jiffies: Inline jiffies_to_msecs() and jiffies_to_usecs() For common cases (HZ=100, 250 or 1000), these helpers are at most one multiply, so there is no point calling a tiny function. Keep them out of line for HZ=300 and others. This saves cycles in TCP fast path, among other things. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/8 grow/shrink: 25/89 up/down: 530/-3474 (-2944) ... nla_put_msecs 193 - -193 message_stats_print 2131 920 -1211 Total: Before=25365208, After=25362264, chg -0.01% Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260210170226.57209-1-edumazet@google.com --- include/linux/jiffies.h | 40 ++++++++++++++++++++++++++++++++++++++-- kernel/time/time.c | 19 +++++++------------ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fdef2c155c27..d1c3d4941854 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -434,8 +434,44 @@ extern unsigned long preset_lpj; /* * Convert various time units to each other: */ -extern unsigned int jiffies_to_msecs(const unsigned long j); -extern unsigned int jiffies_to_usecs(const unsigned long j); + +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/** + * jiffies_to_msecs - Convert jiffies to milliseconds + * @j: jiffies value + * + * This inline version takes care of HZ in {100,250,1000}. + * + * Return: milliseconds value + */ +static inline unsigned int jiffies_to_msecs(const unsigned long j) +{ + return (MSEC_PER_SEC / HZ) * j; +} +#else +unsigned int jiffies_to_msecs(const unsigned long j); +#endif + +#if !(USEC_PER_SEC % HZ) +/** + * jiffies_to_usecs - Convert jiffies to microseconds + * @j: jiffies value + * + * Return: microseconds value + */ +static inline unsigned int jiffies_to_usecs(const unsigned long j) +{ + /* + * Hz usually doesn't go much further MSEC_PER_SEC. + * jiffies_to_usecs() and usecs_to_jiffies() depend on that. + */ + BUILD_BUG_ON(HZ > USEC_PER_SEC); + + return (USEC_PER_SEC / HZ) * j; +} +#else +unsigned int jiffies_to_usecs(const unsigned long j); +#endif /** * jiffies_to_nsecs - Convert jiffies to nanoseconds diff --git a/kernel/time/time.c b/kernel/time/time.c index 0ba8e3c50d62..36fd2313ae7e 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -365,20 +365,16 @@ SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp) } #endif +#if HZ > MSEC_PER_SEC || (MSEC_PER_SEC % HZ) /** * jiffies_to_msecs - Convert jiffies to milliseconds * @j: jiffies value * - * Avoid unnecessary multiplications/divisions in the - * two most common HZ cases. - * * Return: milliseconds value */ unsigned int jiffies_to_msecs(const unsigned long j) { -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (MSEC_PER_SEC / HZ) * j; -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) +#if HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); #else # if BITS_PER_LONG == 32 @@ -390,7 +386,9 @@ unsigned int jiffies_to_msecs(const unsigned long j) #endif } EXPORT_SYMBOL(jiffies_to_msecs); +#endif +#if (USEC_PER_SEC % HZ) /** * jiffies_to_usecs - Convert jiffies to microseconds * @j: jiffies value @@ -405,17 +403,14 @@ unsigned int jiffies_to_usecs(const unsigned long j) */ BUILD_BUG_ON(HZ > USEC_PER_SEC); -#if !(USEC_PER_SEC % HZ) - return (USEC_PER_SEC / HZ) * j; -#else -# if BITS_PER_LONG == 32 +#if BITS_PER_LONG == 32 return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; -# else +#else return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN; -# endif #endif } EXPORT_SYMBOL(jiffies_to_usecs); +#endif /** * mktime64 - Converts date to seconds. From bbdaa8c17cae18b977f4509911d7a390aa8a6597 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 11 Feb 2026 17:30:28 +0100 Subject: [PATCH 024/828] nvme: fix memory leak in quirks_param_set() When loading the nvme module, if the 'quirks' parameter is specified via both the kernel command line (e.g., nvme.quirks=...) and the modprobe command line (e.g., modprobe nvme quirks=...), the quirks_param_set() callback is invoked twice. Currently, in the double-invocation scenario, the second call overwrites the nvme_pci_quirk_list pointer, causing the memory allocated in the first call to leak. Fix this by freeing the existing list before assigning the new one. Fixes: b4247c8317c5 ("nvme: add support for dynamic quirk configuration via module parameter") Reviewed-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bd884e294600..c0f2104326ab 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -219,6 +219,7 @@ static int quirks_param_set(const char *value, const struct kernel_param *kp) i++; } + kfree(nvme_pci_quirk_list); nvme_pci_quirk_count = count; nvme_pci_quirk_list = qlist; goto out_free_val; From 5ee01f1a7343d6a3547b6802ca2d4cdce0edacb1 Mon Sep 17 00:00:00 2001 From: Qingye Zhao Date: Wed, 11 Feb 2026 09:24:04 +0000 Subject: [PATCH 025/828] cgroup: fix race between task migration and iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a task is migrated out of a css_set, cgroup_migrate_add_task() first moves it from cset->tasks to cset->mg_tasks via: list_move_tail(&task->cg_list, &cset->mg_tasks); If a css_task_iter currently has it->task_pos pointing to this task, css_set_move_task() calls css_task_iter_skip() to keep the iterator valid. However, since the task has already been moved to ->mg_tasks, the iterator is advanced relative to the mg_tasks list instead of the original tasks list. As a result, remaining tasks on cset->tasks, as well as tasks queued on cset->mg_tasks, can be skipped by iteration. Fix this by calling css_set_skip_task_iters() before unlinking task->cg_list from cset->tasks. This advances all active iterators to the next task on cset->tasks, so iteration continues correctly even when a task is concurrently being migrated. This race is hard to hit in practice without instrumentation, but it can be reproduced by artificially slowing down cgroup_procs_show(). For example, on an Android device a temporary /sys/kernel/cgroup/cgroup_test knob can be added to inject a delay into cgroup_procs_show(), and then: 1) Spawn three long-running tasks (PIDs 101, 102, 103). 2) Create a test cgroup and move the tasks into it. 3) Enable a large delay via /sys/kernel/cgroup/cgroup_test. 4) In one shell, read cgroup.procs from the test cgroup. 5) Within the delay window, in another shell migrate PID 102 by writing it to a different cgroup.procs file. Under this setup, cgroup.procs can intermittently show only PID 101 while skipping PID 103. Once the migration completes, reading the file again shows all tasks as expected. Note that this change does not allow removing the existing css_set_skip_task_iters() call in css_set_move_task(). The new call in cgroup_migrate_add_task() only handles iterators that are racing with migration while the task is still on cset->tasks. Iterators may also start after the task has been moved to cset->mg_tasks. If we dropped css_set_skip_task_iters() from css_set_move_task(), such iterators could keep task_pos pointing to a migrating task, causing css_task_iter_advance() to malfunction on the destination css_set, up to and including crashes or infinite loops. The race window between migration and iteration is very small, and css_task_iter is not on a hot path. In the worst case, when an iterator is positioned on the first thread of the migrating process, cgroup_migrate_add_task() may have to skip multiple tasks via css_set_skip_task_iters(). However, this only happens when migration and iteration actually race, so the performance impact is negligible compared to the correctness fix provided here. Fixes: b636fd38dc40 ("cgroup: Implement css_task_iter_skip()") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Qingye Zhao Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8af4351536cf..49da0874a023 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2608,6 +2608,7 @@ static void cgroup_migrate_add_task(struct task_struct *task, mgctx->tset.nr_tasks++; + css_set_skip_task_iters(cset, task); list_move_tail(&task->cg_list, &cset->mg_tasks); if (list_empty(&cset->mg_node)) list_add_tail(&cset->mg_node, From 166e31d7dbf6aa44829b98aa446bda5c9580f12a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Feb 2026 09:26:54 -0800 Subject: [PATCH 026/828] nvme-pci: ensure we're polling a polled queue A user can change the polled queue count at run time. There's a brief window during a reset where a hipri task may try to poll that queue before the block layer has updated the queue maps, which would race with the now interrupt driven queue and may cause double completions. Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c0f2104326ab..4ee4d7ead5a9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1627,7 +1627,8 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) struct nvme_queue *nvmeq = hctx->driver_data; bool found; - if (!nvme_cqe_pending(nvmeq)) + if (!test_bit(NVMEQ_POLLED, &nvmeq->flags) || + !nvme_cqe_pending(nvmeq)) return 0; spin_lock(&nvmeq->cq_poll_lock); From 4735b510a00fb2d4ac9e8d21a8c9552cb281f585 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Feb 2026 11:00:12 -0800 Subject: [PATCH 027/828] nvme-pci: cap queue creation to used queues If the user reduces the special queue count at runtime and resets the controller, we need to reduce the number of queues and interrupts requested accordingly rather than start with the pre-allocated queue count. Tested-by: Kanchan Joshi Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4ee4d7ead5a9..c63efa49132f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2902,7 +2902,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; - nr_io_queues = dev->nr_allocated_queues - 1; + /* + * The initial number of allocated queue slots may be too large if the + * user reduced the special queue parameters. Cap the value to the + * number we need for this round. + */ + nr_io_queues = min(nvme_max_io_queues(dev), + dev->nr_allocated_queues - 1); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; From baa47c4f89eb2e7614e6a06dd5957632e38c4e74 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Feb 2026 10:38:09 -0800 Subject: [PATCH 028/828] nvme-pci: do not try to add queue maps at runtime The block layer allocates the set's maps once. We can't add special purpose queues at runtime if they weren't allocated at initialization time. Tested-by: Kanchan Joshi Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c63efa49132f..0c1a8d7aa1c0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2902,6 +2902,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; + if (dev->ctrl.tagset) { + /* + * The set's maps are allocated only once at initialization + * time. We can't add special queues later if their mq_map + * wasn't preallocated. + */ + if (dev->ctrl.tagset->nr_maps < 3) + dev->nr_poll_queues = 0; + if (dev->ctrl.tagset->nr_maps < 2) + dev->nr_write_queues = 0; + } + /* * The initial number of allocated queue slots may be too large if the * user reduced the special queue parameters. Cap the value to the From f66857bafd4f151c5cc6856e47be2e12c1721e43 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 13 Feb 2026 14:38:12 +0000 Subject: [PATCH 029/828] KVM: arm64: Hide S1POE from guests when not supported by the host When CONFIG_ARM64_POE is disabled, KVM does not save/restore POR_EL1. However, ID_AA64MMFR3_EL1 sanitisation currently exposes the feature to guests whenever the hardware supports it, ignoring the host kernel configuration. If a guest detects this feature and attempts to use it, the host will fail to context-switch POR_EL1, potentially leading to state corruption. Fix this by masking ID_AA64MMFR3_EL1.S1POE in the sanitised system registers, preventing KVM from advertising the feature when the host does not support it (i.e. system_supports_poe() is false). Fixes: 70ed7238297f ("KVM: arm64: Sanitise ID_AA64MMFR3_EL1") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260213143815.1732675-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a7cd0badc20c..1b4cacb6e918 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1816,6 +1816,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ID_AA64MMFR3_EL1_SCTLRX | ID_AA64MMFR3_EL1_S1POE | ID_AA64MMFR3_EL1_S1PIE; + + if (!system_supports_poe()) + val &= ~ID_AA64MMFR3_EL1_S1POE; break; case SYS_ID_MMFR4_EL1: val &= ~ID_MMFR4_EL1_CCIDX; From 9cb0468d0b335ccf769bd8e161cc96195e82d8b1 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 13 Feb 2026 14:38:13 +0000 Subject: [PATCH 030/828] KVM: arm64: Optimise away S1POE handling when not supported by host Although ID register sanitisation prevents guests from seeing the feature, adding this check to the helper allows the compiler to entirely eliminate S1POE-specific code paths (such as context switching POR_EL1) when the host kernel is compiled without support (CONFIG_ARM64_POE is disabled). This aligns with the pattern used for other optional features like SVE (kvm_has_sve()) and FPMR (kvm_has_fpmr()), ensuring no POE logic if the host lacks support, regardless of the guest configuration state. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260213143815.1732675-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5d5a3bbdb95e..2ca264b3db5f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1616,7 +1616,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP)) #define kvm_has_s1poe(k) \ - (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) + (system_supports_poe() && \ + kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) #define kvm_has_ras(k) \ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP)) From 7e7c2cf0024d89443a7af52e09e47b1fe634ab17 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 13 Feb 2026 14:38:14 +0000 Subject: [PATCH 031/828] KVM: arm64: Fix ID register initialization for non-protected pKVM guests In protected mode, the hypervisor maintains a separate instance of the `kvm` structure for each VM. For non-protected VMs, this structure is initialized from the host's `kvm` state. Currently, `pkvm_init_features_from_host()` copies the `KVM_ARCH_FLAG_ID_REGS_INITIALIZED` flag from the host without the underlying `id_regs` data being initialized. This results in the hypervisor seeing the flag as set while the ID registers remain zeroed. Consequently, `kvm_has_feat()` checks at EL2 fail (return 0) for non-protected VMs. This breaks logic that relies on feature detection, such as `ctxt_has_tcrx()` for TCR2_EL1 support. As a result, certain system registers (e.g., TCR2_EL1, PIR_EL1, POR_EL1) are not saved/restored during the world switch, which could lead to state corruption. Fix this by explicitly copying the ID registers from the host `kvm` to the hypervisor `kvm` for non-protected VMs during initialization, since we trust the host with its non-protected guests' features. Also ensure `KVM_ARCH_FLAG_ID_REGS_INITIALIZED` is cleared initially in `pkvm_init_features_from_host` so that `vm_copy_id_regs` can properly initialize them and set the flag once done. Fixes: 41d6028e28bd ("KVM: arm64: Convert the SVE guest vcpu flag to a vm flag") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260213143815.1732675-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 35 ++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8e29d7734a15..f3c1c6955163 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -342,6 +342,7 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc /* No restrictions for non-protected VMs. */ if (!kvm_vm_is_protected(kvm)) { hyp_vm->kvm.arch.flags = host_arch_flags; + hyp_vm->kvm.arch.flags &= ~BIT_ULL(KVM_ARCH_FLAG_ID_REGS_INITIALIZED); bitmap_copy(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, @@ -471,6 +472,35 @@ err: return ret; } +static int vm_copy_id_regs(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); + const struct kvm *host_kvm = hyp_vm->host_kvm; + struct kvm *kvm = &hyp_vm->kvm; + + if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &host_kvm->arch.flags)) + return -EINVAL; + + if (test_and_set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) + return 0; + + memcpy(kvm->arch.id_regs, host_kvm->arch.id_regs, sizeof(kvm->arch.id_regs)); + + return 0; +} + +static int pkvm_vcpu_init_sysregs(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + int ret = 0; + + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) + kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); + else + ret = vm_copy_id_regs(hyp_vcpu); + + return ret; +} + static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, struct pkvm_hyp_vm *hyp_vm, struct kvm_vcpu *host_vcpu) @@ -490,8 +520,9 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; - if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) - kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); + ret = pkvm_vcpu_init_sysregs(hyp_vcpu); + if (ret) + goto done; ret = pkvm_vcpu_init_traps(hyp_vcpu); if (ret) From 02471a78a052b631204aed051ab718e4d14ae687 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 13 Feb 2026 14:38:15 +0000 Subject: [PATCH 032/828] KVM: arm64: Remove redundant kern_hyp_va() in unpin_host_sve_state() The `sve_state` pointer in `hyp_vcpu->vcpu.arch` is initialized as a hypervisor virtual address during vCPU initialization in `pkvm_vcpu_init_sve()`. `unpin_host_sve_state()` calls `kern_hyp_va()` on this address. Since `kern_hyp_va()` is idempotent, it's not a bug. However, it is unnecessary and potentially confusing. Remove the redundant conversion. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260213143815.1732675-5-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index f3c1c6955163..2f029bfe4755 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -392,7 +392,7 @@ static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu) if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE)) return; - sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state); + sve_state = hyp_vcpu->vcpu.arch.sve_state; hyp_unpin_shared_mem(sve_state, sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu)); } From ee5c38a8d31e5dea52299c43c2ec3213351ab6e1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 6 Feb 2026 14:30:23 -0800 Subject: [PATCH 033/828] KVM: arm64: vgic: Handle const qualifier from gic_kvm_info allocation type In preparation for making the kmalloc family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct gic_kvm_info", but the returned type, while matching, is const qualified. To get them exactly matching, just use the dereferenced pointer for the sizeof(). Signed-off-by: Kees Cook Link: https://patch.msgid.link/20260206223022.it.052-kees@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 86c149537493..a53f93546aa0 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -654,7 +654,7 @@ static struct gic_kvm_info *gic_kvm_info; void __init vgic_set_kvm_info(const struct gic_kvm_info *info) { BUG_ON(gic_kvm_info != NULL); - gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL); + gic_kvm_info = kmalloc(sizeof(*gic_kvm_info), GFP_KERNEL); if (gic_kvm_info) *gic_kvm_info = *info; } From 0b87d51690dd5131cbe9fbd23746b037aab89815 Mon Sep 17 00:00:00 2001 From: Franz Schnyder Date: Fri, 6 Feb 2026 13:37:36 +0100 Subject: [PATCH 034/828] drm/bridge: ti-sn65dsi86: Enable HPD polling if IRQ is not used Fallback to polling to detect hotplug events on systems without interrupts. On systems where the interrupt line of the bridge is not connected, the bridge cannot notify hotplug events. Only add the DRM_BRIDGE_OP_HPD flag if an interrupt has been registered otherwise remain in polling mode. Fixes: 55e8ff842051 ("drm/bridge: ti-sn65dsi86: Add HPD for DisplayPort connector type") Cc: stable@vger.kernel.org # 6.16: 9133bc3f0564: drm/bridge: ti-sn65dsi86: Add Signed-off-by: Franz Schnyder Reviewed-by: Douglas Anderson [dianders: Adjusted Fixes/stable line based on discussion] Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20260206123758.374555-1-fra.schnyder@gmail.com --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 276d05d25ad8..98d64ad791d0 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1415,6 +1415,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, { struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); struct device_node *np = pdata->dev->of_node; + const struct i2c_client *client = to_i2c_client(pdata->dev); int ret; pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0); @@ -1433,8 +1434,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | - DRM_BRIDGE_OP_HPD; + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (client->irq) + pdata->bridge.ops |= DRM_BRIDGE_OP_HPD; /* * If comms were already enabled they would have been enabled * with the wrong value of HPD_DISABLE. Update it now. Comms From e9e0b48cd15b46dcb2bbc165f6b0fee698b855d6 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Sun, 8 Feb 2026 22:47:26 +0000 Subject: [PATCH 035/828] drm/fourcc: fix plane order for 10/12/16-bit YCbCr formats The short comments had the correct order, but the long comments had the planes reversed. Fixes: 2271e0a20ef7 ("drm: drm_fourcc: add 10/12/16bit software decoder YCbCr formats") Signed-off-by: Simon Ser Reviewed-by: Daniel Stone Reviewed-by: Robert Mader Link: https://patch.msgid.link/20260208224718.57199-1-contact@emersion.fr --- include/uapi/drm/drm_fourcc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index e527b24bd824..c89aede3cb12 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -401,8 +401,8 @@ extern "C" { * implementation can multiply the values by 2^6=64. For that reason the padding * must only contain zeros. * index 0 = Y plane, [15:0] z:Y [6:10] little endian - * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian - * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian + * index 1 = Cb plane, [15:0] z:Cb [6:10] little endian + * index 2 = Cr plane, [15:0] z:Cr [6:10] little endian */ #define DRM_FORMAT_S010 fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ #define DRM_FORMAT_S210 fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ @@ -414,8 +414,8 @@ extern "C" { * implementation can multiply the values by 2^4=16. For that reason the padding * must only contain zeros. * index 0 = Y plane, [15:0] z:Y [4:12] little endian - * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian - * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian + * index 1 = Cb plane, [15:0] z:Cb [4:12] little endian + * index 2 = Cr plane, [15:0] z:Cr [4:12] little endian */ #define DRM_FORMAT_S012 fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ #define DRM_FORMAT_S212 fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ @@ -424,8 +424,8 @@ extern "C" { /* * 3 plane YCbCr * index 0 = Y plane, [15:0] Y little endian - * index 1 = Cr plane, [15:0] Cr little endian - * index 2 = Cb plane, [15:0] Cb little endian + * index 1 = Cb plane, [15:0] Cb little endian + * index 2 = Cr plane, [15:0] Cr little endian */ #define DRM_FORMAT_S016 fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ #define DRM_FORMAT_S216 fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ From cb184dd19154fc486fa3d9e02afe70a97e54e055 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 6 Feb 2026 14:20:28 +0800 Subject: [PATCH 036/828] fs: init flags_valid before calling vfs_fileattr_get syzbot reported a uninit-value bug in [1]. Similar to the "*get" context where the kernel's internal file_kattr structure is initialized before calling vfs_fileattr_get(), we should use the same mechanism when using fa. [1] BUG: KMSAN: uninit-value in fuse_fileattr_get+0xeb4/0x1450 fs/fuse/ioctl.c:517 fuse_fileattr_get+0xeb4/0x1450 fs/fuse/ioctl.c:517 vfs_fileattr_get fs/file_attr.c:94 [inline] __do_sys_file_getattr fs/file_attr.c:416 [inline] Local variable fa.i created at: __do_sys_file_getattr fs/file_attr.c:380 [inline] __se_sys_file_getattr+0x8c/0xbd0 fs/file_attr.c:372 Reported-by: syzbot+7c31755f2cea07838b0c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7c31755f2cea07838b0c Tested-by: syzbot+7c31755f2cea07838b0c@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Link: https://patch.msgid.link/tencent_B6C4583771D76766D71362A368696EC3B605@qq.com Signed-off-by: Christian Brauner --- fs/file_attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/file_attr.c b/fs/file_attr.c index 42721427245a..0e9b4f737546 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -376,7 +376,7 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename, struct path filepath __free(path_put) = {}; unsigned int lookup_flags = 0; struct file_attr fattr; - struct file_kattr fa; + struct file_kattr fa = { .flags_valid = true }; /* hint only */ int error; BUILD_BUG_ON(sizeof(struct file_attr) < FILE_ATTR_SIZE_VER0); From 9eed043d10f17301c1b5141e16bb98a85a8fd07e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 3 Feb 2026 17:40:14 +0800 Subject: [PATCH 037/828] writeback: Fix wakeup and logging timeouts for !DETECT_HUNG_TASK Recent changes of fs-writeback cause such warnings if DETECT_HUNG_TASK is not enabled: INFO: The task sync:1342 has been waiting for writeback completion for more than 1 seconds. The reason is sysctl_hung_task_timeout_secs is 0 when DETECT_HUNG_TASK is not enabled, then it causes the warning message even if the writeback lasts for only one second. Guard the wakeup and logging with "#ifdef CONFIG_DETECT_HUNG_TASK" can eliminate the warning messages. But on the other hand, it is possible that sysctl_hung_task_timeout_secs be also 0 when DETECT_HUNG_TASK is enabled. So let's just check the value of sysctl_hung_task_timeout_secs to decide whether do wakeup and logging. Fixes: 1888635532fb ("writeback: Wake up waiting tasks when finishing the writeback of a chunk.") Fixes: d6e621590764 ("writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs)") Signed-off-by: Huacai Chen Link: https://patch.msgid.link/20260203094014.2273240-1-chenhuacai@loongson.cn Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 68228bf89b82..eb1ca86bf30a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -198,10 +198,11 @@ static void wb_queue_work(struct bdi_writeback *wb, static bool wb_wait_for_completion_cb(struct wb_completion *done) { + unsigned long timeout = sysctl_hung_task_timeout_secs; unsigned long waited_secs = (jiffies - done->wait_start) / HZ; done->progress_stamp = jiffies; - if (waited_secs > sysctl_hung_task_timeout_secs) + if (timeout && (waited_secs > timeout)) pr_info("INFO: The task %s:%d has been waiting for writeback " "completion for more than %lu seconds.", current->comm, current->pid, waited_secs); @@ -1955,6 +1956,7 @@ static long writeback_sb_inodes(struct super_block *sb, .range_end = LLONG_MAX, }; unsigned long start_time = jiffies; + unsigned long timeout = sysctl_hung_task_timeout_secs; long write_chunk; long total_wrote = 0; /* count both pages and inodes */ unsigned long dirtied_before = jiffies; @@ -2041,9 +2043,8 @@ static long writeback_sb_inodes(struct super_block *sb, __writeback_single_inode(inode, &wbc); /* Report progress to inform the hung task detector of the progress. */ - if (work->done && work->done->progress_stamp && - (jiffies - work->done->progress_stamp) > HZ * - sysctl_hung_task_timeout_secs / 2) + if (work->done && work->done->progress_stamp && timeout && + (jiffies - work->done->progress_stamp) > HZ * timeout / 2) wake_up_all(work->done->waitq); wbc_detach_inode(&wbc); From 81f16c9778d730f573d0d565706bb7227e2405f4 Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Fri, 13 Feb 2026 18:30:06 +0800 Subject: [PATCH 038/828] statmount: Fix the null-ptr-deref in do_statmount() If the mount is internal, it's mnt_ns will be MNT_NS_INTERNAL, which is defined as ERR_PTR(-EINVAL). So, in the do_statmount(), need to check ns of mount by IS_ERR() and return. Fixes: 0e5032237ee5 ("statmount: accept fd as a parameter") Reported-by: syzbot+9e03a9535ea65f687a44@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/698e287a.a70a0220.2c38d7.009e.GAE@google.com/ Signed-off-by: Qing Wang Link: https://patch.msgid.link/20260213103006.2472569-1-wangqing7171@gmail.com Reviewed-by: Bhavik Sachdev Signed-off-by: Christian Brauner --- fs/namespace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index a67cbe42746d..90700df65f0d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5678,6 +5678,8 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, s->mnt = mnt_file->f_path.mnt; ns = real_mount(s->mnt)->mnt_ns; + if (IS_ERR(ns)) + return PTR_ERR(ns); if (!ns) /* * We can't set mount point and mnt_ns_id since we don't have a From ac83896172798cf82ebc643cf555aa4cdd3a07da Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Fri, 13 Feb 2026 02:28:12 +0000 Subject: [PATCH 039/828] iomap: Describe @private in iomap_readahead() The kernel test rebot reports the kernel-doc warning: ``` Warning: fs/iomap/buffered-io.c:624 function parameter 'private' not described in 'iomap_readahead' ``` The former commit in "iomap: stash iomap read ctx in the private field of iomap_iter" has added a new parameter @private to iomap_readahead(), so let's describe the parameter. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601261111.vIL9rhgD-lkp@intel.com/ Fixes: 8806f279244b ("iomap: stash iomap read ctx in the private field of iomap_iter") Signed-off-by: Hongbo Li Link: https://patch.msgid.link/20260213022812.766187-1-lihongbo22@huawei.com Reviewed-by: Gao Xiang Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 1fe19b4ee2f4..58887513b894 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -625,6 +625,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter, * iomap_readahead - Attempt to read pages from a file. * @ops: The operations vector for the filesystem. * @ctx: The ctx used for issuing readahead. + * @private: The filesystem-specific information for issuing iomap_iter. * * This function is for filesystems to call to implement their readahead * address_space operation. From 1cb968a2013ffa8112d52ebe605009ea1c6a582c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 24 Jan 2026 04:18:40 +0000 Subject: [PATCH 040/828] nfsd: Fix cred ref leak in nfsd_nl_threads_set_doit(). syzbot reported memory leak of struct cred. [0] nfsd_nl_threads_set_doit() passes get_current_cred() to nfsd_svc(), but put_cred() is not called after that. The cred is finally passed down to _svc_xprt_create(), which calls get_cred() with the cred for struct svc_xprt. The ownership of the refcount by get_current_cred() is not transferred to anywhere and is just leaked. nfsd_svc() is also called from write_threads(), but it does not bump file->f_cred there. nfsd_nl_threads_set_doit() is called from sendmsg() and current->cred does not go away. Let's use current_cred() in nfsd_nl_threads_set_doit(). [0]: BUG: memory leak unreferenced object 0xffff888108b89480 (size 184): comm "syz-executor", pid 5994, jiffies 4294943386 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 369454a7): kmemleak_alloc_recursive include/linux/kmemleak.h:44 [inline] slab_post_alloc_hook mm/slub.c:4958 [inline] slab_alloc_node mm/slub.c:5263 [inline] kmem_cache_alloc_noprof+0x412/0x580 mm/slub.c:5270 prepare_creds+0x22/0x600 kernel/cred.c:185 copy_creds+0x44/0x290 kernel/cred.c:286 copy_process+0x7a7/0x2870 kernel/fork.c:2086 kernel_clone+0xac/0x6e0 kernel/fork.c:2651 __do_sys_clone+0x7f/0xb0 kernel/fork.c:2792 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xa4/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 924f4fb003ba ("NFSD: convert write_threads to netlink command") Cc: stable@vger.kernel.org Reported-by: syzbot+dd3b43aa0204089217ee@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69744674.a00a0220.33ccc7.0000.GAE@google.com/ Tested-by: syzbot+dd3b43aa0204089217ee@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 89fe2c0e8d44..43ced6b0e197 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1647,7 +1647,7 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) if (attr) nn->min_threads = nla_get_u32(attr); - ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope); + ret = nfsd_svc(nrpools, nthreads, net, current_cred(), scope); if (ret > 0) ret = 0; out_unlock: From 92978c83bb4eef55d02a6c990c01c423131eefa7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 24 Jan 2026 04:18:41 +0000 Subject: [PATCH 041/828] nfsd: Fix cred ref leak in nfsd_nl_listener_set_doit(). nfsd_nl_listener_set_doit() uses get_current_cred() without put_cred(). As we can see from other callers, svc_xprt_create_from_sa() does not require the extra refcount. nfsd_nl_listener_set_doit() is always in the process context, sendmsg(), and current->cred does not go away. Let's use current_cred() in nfsd_nl_listener_set_doit(). Fixes: 16a471177496 ("NFSD: add listener-{set,get} netlink command") Cc: stable@vger.kernel.org Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 43ced6b0e197..b06adb5d5a2e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2000,7 +2000,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) } ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, - get_current_cred()); + current_cred()); /* always save the latest error */ if (ret < 0) err = ret; From 9478c166c46934160135e197b049b5a05753f2ad Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Nov 2024 11:46:01 +1000 Subject: [PATCH 042/828] nouveau/gsp: drop WARN_ON in ACPI probes These WARN_ONs seem to trigger a lot, and we don't seem to have a plan to fix them, so just drop them, as they are most likely harmless. Cc: stable@vger.kernel.org Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20241121014601.229391-1-airlied@gmail.com Signed-off-by: Danilo Krummrich --- .../gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index 7fb13434c051..a575a8dbf727 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -737,8 +737,8 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!obj) goto done; - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || - WARN_ON(obj->buffer.length != 4)) + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length != 4) goto done; caps->status = 0; @@ -773,8 +773,8 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) if (!obj) goto done; - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || - WARN_ON(obj->buffer.length != 4)) + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length != 4) goto done; jt->status = 0; @@ -861,8 +861,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod) _DOD = output.pointer; - if (WARN_ON(_DOD->type != ACPI_TYPE_PACKAGE) || - WARN_ON(_DOD->package.count > ARRAY_SIZE(dod->acpiIdList))) + if (_DOD->type != ACPI_TYPE_PACKAGE || + _DOD->package.count > ARRAY_SIZE(dod->acpiIdList)) return; for (int i = 0; i < _DOD->package.count; i++) { From 46120745bb4e7e1f09959624716b4c5d6e2c2e9e Mon Sep 17 00:00:00 2001 From: Ethan Tidmore Date: Sun, 15 Feb 2026 22:04:38 -0600 Subject: [PATCH 043/828] drm/tiny: sharp-memory: fix pointer error dereference The function devm_drm_dev_alloc() returns a pointer error upon failure not NULL. Change null check to pointer error check. Detected by Smatch: drivers/gpu/drm/tiny/sharp-memory.c:549 sharp_memory_probe() error: 'smd' dereferencing possible ERR_PTR() Fixes: b8f9f21716fec ("drm/tiny: Add driver for Sharp Memory LCD") Signed-off-by: Ethan Tidmore Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260216040438.43702-1-ethantidmore06@gmail.com --- drivers/gpu/drm/tiny/sharp-memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c index 64272cd0f6e2..cbf69460ebf3 100644 --- a/drivers/gpu/drm/tiny/sharp-memory.c +++ b/drivers/gpu/drm/tiny/sharp-memory.c @@ -541,8 +541,8 @@ static int sharp_memory_probe(struct spi_device *spi) smd = devm_drm_dev_alloc(dev, &sharp_memory_drm_driver, struct sharp_memory_device, drm); - if (!smd) - return -ENOMEM; + if (IS_ERR(smd)) + return PTR_ERR(smd); spi_set_drvdata(spi, smd); From 1072020685f4b81f6efad3b412cdae0bd62bb043 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 12 Feb 2026 12:41:25 +0100 Subject: [PATCH 044/828] irqchip/sifive-plic: Fix frozen interrupt due to affinity setting PLIC ignores interrupt completion message for disabled interrupt, explained by the specification: The PLIC signals it has completed executing an interrupt handler by writing the interrupt ID it received from the claim to the claim/complete register. The PLIC does not check whether the completion ID is the same as the last claim ID for that target. If the completion ID does not match an interrupt source that is currently enabled for the target, the completion is silently ignored. This caused problems in the past, because an interrupt can be disabled while still being handled and plic_irq_eoi() had no effect. That was fixed by checking if the interrupt is disabled, and if so enable it, before sending the completion message. That check is done with irqd_irq_disabled(). However, that is not sufficient because the enable bit for the handling hart can be zero despite irqd_irq_disabled(d) being false. This can happen when affinity setting is changed while a hart is still handling the interrupt. This problem is easily reproducible by dumping a large file to uart (which generates lots of interrupts) and at the same time keep changing the uart interrupt's affinity setting. The uart port becomes frozen almost instantaneously. Fix this by checking PLIC's enable bit instead of irqd_irq_disabled(). Fixes: cc9f04f9a84f ("irqchip/sifive-plic: Implement irq_set_affinity() for SMP host") Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260212114125.3148067-1-namcao@linutronix.de --- drivers/irqchip/irq-sifive-plic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 60fd8f91762b..70058871d2fb 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -172,8 +172,13 @@ static void plic_irq_disable(struct irq_data *d) static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + u32 __iomem *reg; + bool enabled; - if (unlikely(irqd_irq_disabled(d))) { + reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32); + enabled = readl(reg) & BIT(d->hwirq % 32); + + if (unlikely(!enabled)) { plic_toggle(handler, d->hwirq, 1); writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); plic_toggle(handler, d->hwirq, 0); From ce9e40a9a5e5cff0b1b0d2fa582b3d71a8ce68e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Feb 2026 15:48:16 +0000 Subject: [PATCH 045/828] irqchip/gic-v3-its: Limit number of per-device MSIs to the range the ITS supports The ITS driver blindly assumes that EventIDs are in abundant supply, to the point where it never checks how many the hardware actually supports. It turns out that some pretty esoteric integrations make it so that only a few bits are available, all the way down to a single bit. Enforce the advertised limitation at the point of allocating the device structure, and hope that the endpoint driver can deal with such limitation. Fixes: 84a6a2e7fc18d ("irqchip: GICv3: ITS: device allocation and configuration") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Robin Murphy Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260206154816.3582887-1-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 4 ++++ include/linux/irqchip/arm-gic-v3.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 2988def30972..a51e8e6a8181 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3475,6 +3475,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, int lpi_base; int nr_lpis; int nr_ites; + int id_bits; int sz; if (!its_alloc_device_table(its, dev_id)) @@ -3486,7 +3487,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, /* * Even if the device wants a single LPI, the ITT must be * sized as a power of two (and you need at least one bit...). + * Also honor the ITS's own EID limit. */ + id_bits = FIELD_GET(GITS_TYPER_IDBITS, its->typer) + 1; + nvecs = min_t(unsigned int, nvecs, BIT(id_bits)); nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); sz = max(sz, ITS_ITT_ALIGN); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 70c0948f978e..0225121f3013 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -394,6 +394,7 @@ #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 #define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS GENMASK_ULL(12, 8) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) From f0617176be5e497b67b3c87bac35b26ebccac499 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Feb 2026 12:04:50 +0100 Subject: [PATCH 046/828] irqchip/mmp: Make icu_irq_chip variable static const File-scope 'icu_irq_chip' is not used outside of this unit and is not modified anywhere, so make it static const to silence sparse warning: irq-mmp.c:139:17: warning: symbol 'icu_irq_chip' was not declared. Should it be static? Signed-off-by: Krzysztof Kozlowski Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260216110449.160277-2-krzysztof.kozlowski@oss.qualcomm.com --- drivers/irqchip/irq-mmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 09e640430208..8e210cb451be 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -136,7 +136,7 @@ static void icu_unmask_irq(struct irq_data *d) } } -struct irq_chip icu_irq_chip = { +static const struct irq_chip icu_irq_chip = { .name = "icu_irq", .irq_mask = icu_mask_irq, .irq_mask_ack = icu_mask_ack_irq, From bffda93a51b40afd67c11bf558dc5aae83ca0943 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 12 Feb 2026 11:23:27 -0800 Subject: [PATCH 047/828] scsi: lpfc: Properly set WC for DPP mapping Using set_memory_wc() to enable write-combining for the DPP portion of the MMIO mapping is wrong as set_memory_*() is meant to operate on RAM only, not MMIO mappings. In fact, as used currently triggers a BUG_ON() with enabled CONFIG_DEBUG_VIRTUAL. Simply map the DPP region separately and in addition to the already existing mappings, avoiding any possible negative side effects for these. Fixes: 1351e69fc6db ("scsi: lpfc: Add push-to-adapter support to sli4") Signed-off-by: Mathias Krause Signed-off-by: Justin Tee Reviewed-by: Mathias Krause Link: https://patch.msgid.link/20260212192327.141104-1-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 ++ drivers/scsi/lpfc/lpfc_sli.c | 36 +++++++++++++++++++++++++++++------ drivers/scsi/lpfc/lpfc_sli4.h | 3 +++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a116a16c4a6f..b5e53c7d33e7 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -12039,6 +12039,8 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba) iounmap(phba->sli4_hba.conf_regs_memmap_p); if (phba->sli4_hba.dpp_regs_memmap_p) iounmap(phba->sli4_hba.dpp_regs_memmap_p); + if (phba->sli4_hba.dpp_regs_memmap_wc_p) + iounmap(phba->sli4_hba.dpp_regs_memmap_wc_p); break; case LPFC_SLI_INTF_IF_TYPE_1: break; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 734af3d039f8..690763e0aa55 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -15981,6 +15981,32 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) return NULL; } +static __maybe_unused void __iomem * +lpfc_dpp_wc_map(struct lpfc_hba *phba, uint8_t dpp_barset) +{ + + /* DPP region is supposed to cover 64-bit BAR2 */ + if (dpp_barset != WQ_PCI_BAR_4_AND_5) { + lpfc_log_msg(phba, KERN_WARNING, LOG_INIT, + "3273 dpp_barset x%x != WQ_PCI_BAR_4_AND_5\n", + dpp_barset); + return NULL; + } + + if (!phba->sli4_hba.dpp_regs_memmap_wc_p) { + void __iomem *dpp_map; + + dpp_map = ioremap_wc(phba->pci_bar2_map, + pci_resource_len(phba->pcidev, + PCI_64BIT_BAR4)); + + if (dpp_map) + phba->sli4_hba.dpp_regs_memmap_wc_p = dpp_map; + } + + return phba->sli4_hba.dpp_regs_memmap_wc_p; +} + /** * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs * @phba: HBA structure that EQs are on. @@ -16944,9 +16970,6 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, uint8_t dpp_barset; uint32_t dpp_offset; uint8_t wq_create_version; -#ifdef CONFIG_X86 - unsigned long pg_addr; -#endif /* sanity check on queue memory */ if (!wq || !cq) @@ -17132,14 +17155,15 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, #ifdef CONFIG_X86 /* Enable combined writes for DPP aperture */ - pg_addr = (unsigned long)(wq->dpp_regaddr) & PAGE_MASK; - rc = set_memory_wc(pg_addr, 1); - if (rc) { + bar_memmap_p = lpfc_dpp_wc_map(phba, dpp_barset); + if (!bar_memmap_p) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3272 Cannot setup Combined " "Write on WQ[%d] - disable DPP\n", wq->queue_id); phba->cfg_enable_dpp = 0; + } else { + wq->dpp_regaddr = bar_memmap_p + dpp_offset; } #else phba->cfg_enable_dpp = 0; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index ee58383492b2..b6d90604bb61 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -785,6 +785,9 @@ struct lpfc_sli4_hba { void __iomem *dpp_regs_memmap_p; /* Kernel memory mapped address for * dpp registers */ + void __iomem *dpp_regs_memmap_wc_p;/* Kernel memory mapped address for + * dpp registers with write combining + */ union { struct { /* IF Type 0, BAR 0 PCI cfg space reg mem map */ From 57297736c08233987e5d29ce6584c6ca2a831b12 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 29 Jan 2026 15:30:39 +0100 Subject: [PATCH 048/828] scsi: storvsc: Fix scheduling while atomic on PREEMPT_RT This resolves the follow splat and lock-up when running with PREEMPT_RT enabled on Hyper-V: [ 415.140818] BUG: scheduling while atomic: stress-ng-iomix/1048/0x00000002 [ 415.140822] INFO: lockdep is turned off. [ 415.140823] Modules linked in: intel_rapl_msr intel_rapl_common intel_uncore_frequency_common intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry intel_vsec ghash_clmulni_intel aesni_intel rapl binfmt_misc nls_ascii nls_cp437 vfat fat snd_pcm hyperv_drm snd_timer drm_client_lib drm_shmem_helper snd sg soundcore drm_kms_helper pcspkr hv_balloon hv_utils evdev joydev drm configfs efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common hv_sock vmw_vsock_vmci_transport vsock vmw_vmci efivarfs autofs4 ext4 crc16 mbcache jbd2 sr_mod sd_mod cdrom hv_storvsc serio_raw hid_generic scsi_transport_fc hid_hyperv scsi_mod hid hv_netvsc hyperv_keyboard scsi_common [ 415.140846] Preemption disabled at: [ 415.140847] [] storvsc_queuecommand+0x2e1/0xbe0 [hv_storvsc] [ 415.140854] CPU: 8 UID: 0 PID: 1048 Comm: stress-ng-iomix Not tainted 6.19.0-rc7 #30 PREEMPT_{RT,(full)} [ 415.140856] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/04/2024 [ 415.140857] Call Trace: [ 415.140861] [ 415.140861] ? storvsc_queuecommand+0x2e1/0xbe0 [hv_storvsc] [ 415.140863] dump_stack_lvl+0x91/0xb0 [ 415.140870] __schedule_bug+0x9c/0xc0 [ 415.140875] __schedule+0xdf6/0x1300 [ 415.140877] ? rtlock_slowlock_locked+0x56c/0x1980 [ 415.140879] ? rcu_is_watching+0x12/0x60 [ 415.140883] schedule_rtlock+0x21/0x40 [ 415.140885] rtlock_slowlock_locked+0x502/0x1980 [ 415.140891] rt_spin_lock+0x89/0x1e0 [ 415.140893] hv_ringbuffer_write+0x87/0x2a0 [ 415.140899] vmbus_sendpacket_mpb_desc+0xb6/0xe0 [ 415.140900] ? rcu_is_watching+0x12/0x60 [ 415.140902] storvsc_queuecommand+0x669/0xbe0 [hv_storvsc] [ 415.140904] ? HARDIRQ_verbose+0x10/0x10 [ 415.140908] ? __rq_qos_issue+0x28/0x40 [ 415.140911] scsi_queue_rq+0x760/0xd80 [scsi_mod] [ 415.140926] __blk_mq_issue_directly+0x4a/0xc0 [ 415.140928] blk_mq_issue_direct+0x87/0x2b0 [ 415.140931] blk_mq_dispatch_queue_requests+0x120/0x440 [ 415.140933] blk_mq_flush_plug_list+0x7a/0x1a0 [ 415.140935] __blk_flush_plug+0xf4/0x150 [ 415.140940] __submit_bio+0x2b2/0x5c0 [ 415.140944] ? submit_bio_noacct_nocheck+0x272/0x360 [ 415.140946] submit_bio_noacct_nocheck+0x272/0x360 [ 415.140951] ext4_read_bh_lock+0x3e/0x60 [ext4] [ 415.140995] ext4_block_write_begin+0x396/0x650 [ext4] [ 415.141018] ? __pfx_ext4_da_get_block_prep+0x10/0x10 [ext4] [ 415.141038] ext4_da_write_begin+0x1c4/0x350 [ext4] [ 415.141060] generic_perform_write+0x14e/0x2c0 [ 415.141065] ext4_buffered_write_iter+0x6b/0x120 [ext4] [ 415.141083] vfs_write+0x2ca/0x570 [ 415.141087] ksys_write+0x76/0xf0 [ 415.141089] do_syscall_64+0x99/0x1490 [ 415.141093] ? rcu_is_watching+0x12/0x60 [ 415.141095] ? finish_task_switch.isra.0+0xdf/0x3d0 [ 415.141097] ? rcu_is_watching+0x12/0x60 [ 415.141098] ? lock_release+0x1f0/0x2a0 [ 415.141100] ? rcu_is_watching+0x12/0x60 [ 415.141101] ? finish_task_switch.isra.0+0xe4/0x3d0 [ 415.141103] ? rcu_is_watching+0x12/0x60 [ 415.141104] ? __schedule+0xb34/0x1300 [ 415.141106] ? hrtimer_try_to_cancel+0x1d/0x170 [ 415.141109] ? do_nanosleep+0x8b/0x160 [ 415.141111] ? hrtimer_nanosleep+0x89/0x100 [ 415.141114] ? __pfx_hrtimer_wakeup+0x10/0x10 [ 415.141116] ? xfd_validate_state+0x26/0x90 [ 415.141118] ? rcu_is_watching+0x12/0x60 [ 415.141120] ? do_syscall_64+0x1e0/0x1490 [ 415.141121] ? do_syscall_64+0x1e0/0x1490 [ 415.141123] ? rcu_is_watching+0x12/0x60 [ 415.141124] ? do_syscall_64+0x1e0/0x1490 [ 415.141125] ? do_syscall_64+0x1e0/0x1490 [ 415.141127] ? irqentry_exit+0x140/0x7e0 [ 415.141129] entry_SYSCALL_64_after_hwframe+0x76/0x7e get_cpu() disables preemption while the spinlock hv_ringbuffer_write is using is converted to an rt-mutex under PREEMPT_RT. Signed-off-by: Jan Kiszka Tested-by: Florian Bezdeka Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://patch.msgid.link/0c7fb5cd-fb21-4760-8593-e04bade84744@siemens.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 19e18939d3a5..19ff0004e259 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1855,8 +1855,9 @@ static enum scsi_qc_status storvsc_queuecommand(struct Scsi_Host *host, cmd_request->payload_sz = payload_sz; /* Invokes the vsc to start an IO */ - ret = storvsc_do_io(dev, cmd_request, get_cpu()); - put_cpu(); + migrate_disable(); + ret = storvsc_do_io(dev, cmd_request, smp_processor_id()); + migrate_enable(); if (ret) scsi_dma_unmap(scmnd); From 2e6b5cd6a4b37a95b78cf8c39a979b58c915c8ed Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Mon, 9 Feb 2026 19:17:34 +0400 Subject: [PATCH 049/828] scsi: ufs: core: Fix RPMB region size detection for UFS 2.2 Older UFS spec devices (2.2 and earlier) do not expose per-region RPMB sizes, as only one RPMB region is supported. In such cases, the size of the single RPMB region can be deduced from the Logical Block Count and Logical Block Size fields in the RPMB Unit Descriptor. Add a fallback mechanism to calculate the RPMB region size from these fields if the device implements an older spec, so that the RPMB driver can work with such devices - otherwise it silently skips the whole RPMB. Section 14.1.4.6 (RPMB Unit Descriptor) Link: https://www.jedec.org/system/files/docs/JESD220C-2_2.pdf Cc: stable@vger.kernel.org Fixes: b06b8c421485 ("scsi: ufs: core: Add OP-TEE based RPMB driver for UFS devices") Reviewed-by: Bean Huo Signed-off-by: Alexey Charkov Link: https://patch.msgid.link/20260209-ufs-rpmb-v3-1-b1804e71bd38@flipper.net Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8349fe2090db..12f1bdc70587 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -5248,6 +5249,25 @@ static void ufshcd_lu_init(struct ufs_hba *hba, struct scsi_device *sdev) hba->dev_info.rpmb_region_size[1] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION1_SIZE]; hba->dev_info.rpmb_region_size[2] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION2_SIZE]; hba->dev_info.rpmb_region_size[3] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION3_SIZE]; + + if (hba->dev_info.wspecversion <= 0x0220) { + /* + * These older spec chips have only one RPMB region, + * sized between 128 kB minimum and 16 MB maximum. + * No per region size fields are provided (respective + * REGIONX_SIZE fields always contain zeros), so get + * it from the logical block count and size fields for + * compatibility + * + * (See JESD220C-2_2 Section 14.1.4.6 + * RPMB Unit Descriptor,* offset 13h, 4 bytes) + */ + hba->dev_info.rpmb_region_size[0] = + (get_unaligned_be64(desc_buf + + RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT) + << desc_buf[RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE]) + / SZ_128K; + } } From 70ca8caa96ce473647054f5c7b9dab5423902402 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 10 Feb 2026 20:18:50 +0100 Subject: [PATCH 050/828] scsi: ses: Fix devices attaching to different hosts On a multipath SAS system some devices don't end up with correct symlinks from the SCSI device to its enclosure. Some devices even have enclosure links pointing to enclosures attached to different SCSI hosts. ses_match_to_enclosure() calls enclosure_for_each_device() which iterates over all enclosures on the system, not just enclosures attached to the current SCSI host. Replace the iteration with a direct call to ses_enclosure_find_by_addr(). Reviewed-by: David Jeffery Signed-off-by: Tomas Henzl Link: https://patch.msgid.link/20260210191850.36784-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 789b170da652..98a7367d2f20 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -528,9 +528,8 @@ struct efd { }; static int ses_enclosure_find_by_addr(struct enclosure_device *edev, - void *data) + struct efd *efd) { - struct efd *efd = data; int i; struct ses_component *scomp; @@ -683,7 +682,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev, if (efd.addr) { efd.dev = &sdev->sdev_gendev; - enclosure_for_each_device(ses_enclosure_find_by_addr, &efd); + ses_enclosure_find_by_addr(edev, &efd); } } From 5b313760059c9df7d60aba7832279bcb81b4aec0 Mon Sep 17 00:00:00 2001 From: Won Jung Date: Wed, 11 Feb 2026 15:01:05 +0900 Subject: [PATCH 051/828] scsi: ufs: core: Reset urgent_bkops_lvl to allow runtime PM power mode Ensures that UFS Runtime PM can achieve power saving after System PM suspend by resetting hba->urgent_bkops_lvl. Also modify the ufshcd_bkops_exception_event_handler to avoid setting urgent_bkops_lvl when status is 0, which helps maintain optimal power management. On UFS devices supporting UFSHCD_CAP_AUTO_BKOPS_SUSPEND, a BKOPS exception event can lead to a situation where UFS Runtime PM can't enter low-power mode states even after the BKOPS exception has been resolved. BKOPS exception with bkops status 0 occurs, the driver logs: "ufshcd_bkops_exception_event_handler: device raised urgent BKOPS exception for bkops status 0" When a BKOPS exception occurs, ufshcd_bkops_exception_event_handler() reads the BKOPS status and sets hba->urgent_bkops_lvl to BKOPS_STATUS_NO_OP(0). This allows the device to perform Runtime PM without changing the UFS power mode. (__ufshcd_wl_suspend(hba, UFS_RUNTIME_PM)) During system PM suspend, ufshcd_disable_auto_bkops() is called, disabling auto bkops. After UFS System PM Resume, when runtime PM attempts to suspend again, ufshcd_urgent_bkops() is invoked. Since hba->urgent_bkops_lvl remains at BKOPS_STATUS_NO_OP(0), ufshcd_enable_auto_bkops() is triggered. However, in ufshcd_bkops_ctrl(), the driver compares the current BKOPS status with hba->urgent_bkops_lvl, and only enables auto bkops if curr_status >= hba->urgent_bkops_lvl. Since both values are 0, the condition is met As a result, __ufshcd_wl_suspend(hba, UFS_RUNTIME_PM) skips power mode transitions and remains in an active state, preventing power saving even though no urgent BKOPS condition exists. Signed-off-by: Won Jung Reviewed-by: Peter Wang Link: https://patch.msgid.link/1891546521.01770806581968.JavaMail.epsvc@epcpadp2new Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 12f1bdc70587..3bbc2b51c87b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5982,6 +5982,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(hba, "Disabled"); + hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT; hba->is_urgent_bkops_lvl_checked = false; out: return err; @@ -6085,7 +6086,7 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) * impacted or critical. Handle these device by determining their urgent * bkops status at runtime. */ - if (curr_status < BKOPS_STATUS_PERF_IMPACT) { + if ((curr_status > BKOPS_STATUS_NO_OP) && (curr_status < BKOPS_STATUS_PERF_IMPACT)) { dev_err(hba->dev, "%s: device raised urgent BKOPS exception for bkops status %d\n", __func__, curr_status); /* update the current status as the urgent bkops level */ From fa96392ebebc8fade2b878acb14cce0f71016503 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 12 Feb 2026 12:30:26 +0530 Subject: [PATCH 052/828] scsi: mpi3mr: Add NULL checks when resetting request and reply queues The driver encountered a crash during resource cleanup when the reply and request queues were NULL due to freed memory. This issue occurred when the creation of reply or request queues failed, and the driver freed the memory first, but attempted to mem set the content of the freed memory, leading to a system crash. Add NULL pointer checks for reply and request queues before accessing the reply/request memory during cleanup Signed-off-by: Ranjan Kumar Link: https://patch.msgid.link/20260212070026.30263-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 1cfbdb773353..04d4a2aea7d7 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -4806,21 +4806,25 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) } for (i = 0; i < mrioc->num_queues; i++) { - mrioc->op_reply_qinfo[i].qid = 0; - mrioc->op_reply_qinfo[i].ci = 0; - mrioc->op_reply_qinfo[i].num_replies = 0; - mrioc->op_reply_qinfo[i].ephase = 0; - atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0); - atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0); - mpi3mr_memset_op_reply_q_buffers(mrioc, i); + if (mrioc->op_reply_qinfo) { + mrioc->op_reply_qinfo[i].qid = 0; + mrioc->op_reply_qinfo[i].ci = 0; + mrioc->op_reply_qinfo[i].num_replies = 0; + mrioc->op_reply_qinfo[i].ephase = 0; + atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0); + atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0); + mpi3mr_memset_op_reply_q_buffers(mrioc, i); + } - mrioc->req_qinfo[i].ci = 0; - mrioc->req_qinfo[i].pi = 0; - mrioc->req_qinfo[i].num_requests = 0; - mrioc->req_qinfo[i].qid = 0; - mrioc->req_qinfo[i].reply_qid = 0; - spin_lock_init(&mrioc->req_qinfo[i].q_lock); - mpi3mr_memset_op_req_q_buffers(mrioc, i); + if (mrioc->req_qinfo) { + mrioc->req_qinfo[i].ci = 0; + mrioc->req_qinfo[i].pi = 0; + mrioc->req_qinfo[i].num_requests = 0; + mrioc->req_qinfo[i].qid = 0; + mrioc->req_qinfo[i].reply_qid = 0; + spin_lock_init(&mrioc->req_qinfo[i].q_lock); + mpi3mr_memset_op_req_q_buffers(mrioc, i); + } } atomic_set(&mrioc->pend_large_data_sz, 0); From 38353c26db28efd984f51d426eac2396d299cca7 Mon Sep 17 00:00:00 2001 From: Salomon Dushimirimana Date: Fri, 13 Feb 2026 19:28:06 +0000 Subject: [PATCH 053/828] scsi: pm8001: Fix use-after-free in pm8001_queue_command() Commit e29c47fe8946 ("scsi: pm8001: Simplify pm8001_task_exec()") refactors pm8001_queue_command(), however it introduces a potential cause of a double free scenario when it changes the function to return -ENODEV in case of phy down/device gone state. In this path, pm8001_queue_command() updates task status and calls task_done to indicate to upper layer that the task has been handled. However, this also frees the underlying SAS task. A -ENODEV is then returned to the caller. When libsas sas_ata_qc_issue() receives this error value, it assumes the task wasn't handled/queued by LLDD and proceeds to clean up and free the task again, resulting in a double free. Since pm8001_queue_command() handles the SAS task in this case, it should return 0 to the caller indicating that the task has been handled. Fixes: e29c47fe8946 ("scsi: pm8001: Simplify pm8001_task_exec()") Signed-off-by: Salomon Dushimirimana Reviewed-by: Damien Le Moal Link: https://patch.msgid.link/20260213192806.439432-1-salomondush@google.com Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 6a8d35aea93a..645524f3fe2d 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -525,8 +525,9 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags) } else { task->task_done(task); } - rc = -ENODEV; - goto err_out; + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + pm8001_dbg(pm8001_ha, IO, "pm8001_task_exec device gone\n"); + return 0; } ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_dev, task); From af3973e7b4fd91e6884b312526168869fb013d68 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 16 Feb 2026 15:10:55 +0100 Subject: [PATCH 054/828] scsi: snic: Remove unused linkstatus The (struct vnic_dev).linkstatus buffer is freed in svnic_dev_unregister() and referenced in svnic_dev_link_status() but never alloc'd. This means (struct vnic_dev).linkstatus is always null and the dealloc the reference in svnic_dev_link_status() is dead code. Signed-off-by: Thomas Fourier Acked-by: Karan Tilak Kumar Link: https://patch.msgid.link/20260216141056.59429-2-fourier.thomas@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/snic/vnic_dev.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/scsi/snic/vnic_dev.c b/drivers/scsi/snic/vnic_dev.c index 760f3f22095c..c4df0b17c86c 100644 --- a/drivers/scsi/snic/vnic_dev.c +++ b/drivers/scsi/snic/vnic_dev.c @@ -42,8 +42,6 @@ struct vnic_dev { struct vnic_devcmd_notify *notify; struct vnic_devcmd_notify notify_copy; dma_addr_t notify_pa; - u32 *linkstatus; - dma_addr_t linkstatus_pa; struct vnic_stats *stats; dma_addr_t stats_pa; struct vnic_devcmd_fw_info *fw_info; @@ -650,8 +648,6 @@ int svnic_dev_init(struct vnic_dev *vdev, int arg) int svnic_dev_link_status(struct vnic_dev *vdev) { - if (vdev->linkstatus) - return *vdev->linkstatus; if (!vnic_dev_notify_ready(vdev)) return 0; @@ -686,11 +682,6 @@ void svnic_dev_unregister(struct vnic_dev *vdev) sizeof(struct vnic_devcmd_notify), vdev->notify, vdev->notify_pa); - if (vdev->linkstatus) - dma_free_coherent(&vdev->pdev->dev, - sizeof(u32), - vdev->linkstatus, - vdev->linkstatus_pa); if (vdev->stats) dma_free_coherent(&vdev->pdev->dev, sizeof(struct vnic_stats), From 97af85787c1964a7e7b146c5d4e021f089af47ea Mon Sep 17 00:00:00 2001 From: Karan Tilak Kumar Date: Tue, 17 Feb 2026 12:46:58 -0800 Subject: [PATCH 055/828] scsi: snic: MAINTAINERS: Update snic maintainers Update snic maintainers. Signed-off-by: Karan Tilak Kumar Link: https://patch.msgid.link/20260217204658.5465-1-kartilak@cisco.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 70292bd3ed71..0900af786cce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6115,6 +6115,7 @@ F: drivers/scsi/fnic/ CISCO SCSI HBA DRIVER M: Karan Tilak Kumar +M: Narsimhulu Musini M: Sesidhar Baddela L: linux-scsi@vger.kernel.org S: Supported From a41dbf5e004edbe1260883c43a8bd134d9cb0c1c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 14 Feb 2026 16:22:13 +0100 Subject: [PATCH 056/828] mount: hold namespace_sem across copy in create_new_namespace() Fix an oversight when creating a new mount namespace. If someone had the bright idea to make the real rootfs a shared or dependent mount and it is later copied the copy will become a peer of the old real rootfs mount or a dependent mount of it. The namespace semaphore is dropped and we use mount lock exact to lock the new real root mount. If that fails or the subsequent do_loopback() fails we rely on the copy of the real root mount to be cleaned up by path_put(). The problem is that this doesn't deal with mount propagation and will leave the mounts linked in the propagation lists. When creating a new mount namespace create_new_namespace() first acquires namespace_sem to clone the nullfs root, drops it, then reacquires it via LOCK_MOUNT_EXACT which takes inode_lock first to respect the inode_lock -> namespace_sem lock ordering. This drop-and-reacquire pattern is fragile and was the source of the propagation cleanup bug fixed in the preceding commit. Extend lock_mount_exact() with a copy_mount mode that clones the mount under the locks atomically. When copy_mount is true, path_overmounted() is skipped since we're copying the mount, not mounting on top of it - the nullfs root always has rootfs mounted on top so the check would always fail. If clone_mnt() fails after get_mountpoint() has pinned the mountpoint, __unlock_mount() is used to properly unpin the mountpoint and release both locks. This allows create_new_namespace() to use LOCK_MOUNT_EXACT_COPY which takes inode_lock and namespace_sem once and holds them throughout the clone and subsequent mount operations, eliminating the drop-and-reacquire pattern entirely. Reported-by: syzbot+a89f9434fb5a001ccd58@syzkaller.appspotmail.com Fixes: 9b8a0ba68246 ("mount: add OPEN_TREE_NAMESPACE") # mainline only Link: https://lore.kernel.org/699047f6.050a0220.2757fb.0024.GAE@google.com Signed-off-by: Christian Brauner --- fs/namespace.c | 117 +++++++++++++++++++++++++------------------------ 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 90700df65f0d..022e59afcb5e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2791,7 +2791,8 @@ static inline void unlock_mount(struct pinned_mountpoint *m) } static void lock_mount_exact(const struct path *path, - struct pinned_mountpoint *mp); + struct pinned_mountpoint *mp, bool copy_mount, + unsigned int copy_flags); #define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ @@ -2799,7 +2800,10 @@ static void lock_mount_exact(const struct path *path, #define LOCK_MOUNT(mp, path) LOCK_MOUNT_MAYBE_BENEATH(mp, (path), false) #define LOCK_MOUNT_EXACT(mp, path) \ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ - lock_mount_exact((path), &mp) + lock_mount_exact((path), &mp, false, 0) +#define LOCK_MOUNT_EXACT_COPY(mp, path, copy_flags) \ + struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ + lock_mount_exact((path), &mp, true, (copy_flags)) static int graft_tree(struct mount *mnt, const struct pinned_mountpoint *mp) { @@ -3073,16 +3077,13 @@ static struct file *open_detached_copy(struct path *path, unsigned int flags) return file; } -DEFINE_FREE(put_empty_mnt_ns, struct mnt_namespace *, - if (!IS_ERR_OR_NULL(_T)) free_mnt_ns(_T)) - static struct mnt_namespace *create_new_namespace(struct path *path, unsigned int flags) { - struct mnt_namespace *new_ns __free(put_empty_mnt_ns) = NULL; - struct path to_path __free(path_put) = {}; struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct user_namespace *user_ns = current_user_ns(); - struct mount *new_ns_root; + struct mnt_namespace *new_ns; + struct mount *new_ns_root, *old_ns_root; + struct path to_path; struct mount *mnt; unsigned int copy_flags = 0; bool locked = false; @@ -3094,71 +3095,63 @@ static struct mnt_namespace *create_new_namespace(struct path *path, unsigned in if (IS_ERR(new_ns)) return ERR_CAST(new_ns); - scoped_guard(namespace_excl) { - new_ns_root = clone_mnt(ns->root, ns->root->mnt.mnt_root, copy_flags); - if (IS_ERR(new_ns_root)) - return ERR_CAST(new_ns_root); + old_ns_root = ns->root; + to_path.mnt = &old_ns_root->mnt; + to_path.dentry = old_ns_root->mnt.mnt_root; - /* - * If the real rootfs had a locked mount on top of it somewhere - * in the stack, lock the new mount tree as well so it can't be - * exposed. - */ - mnt = ns->root; - while (mnt->overmount) { - mnt = mnt->overmount; - if (mnt->mnt.mnt_flags & MNT_LOCKED) - locked = true; - } + VFS_WARN_ON_ONCE(old_ns_root->mnt.mnt_sb->s_type != &nullfs_fs_type); + + LOCK_MOUNT_EXACT_COPY(mp, &to_path, copy_flags); + if (IS_ERR(mp.parent)) { + free_mnt_ns(new_ns); + return ERR_CAST(mp.parent); + } + new_ns_root = mp.parent; + + /* + * If the real rootfs had a locked mount on top of it somewhere + * in the stack, lock the new mount tree as well so it can't be + * exposed. + */ + mnt = old_ns_root; + while (mnt->overmount) { + mnt = mnt->overmount; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + locked = true; } /* - * We dropped the namespace semaphore so we can actually lock - * the copy for mounting. The copied mount isn't attached to any - * mount namespace and it is thus excluded from any propagation. - * So realistically we're isolated and the mount can't be - * overmounted. - */ - - /* Borrow the reference from clone_mnt(). */ - to_path.mnt = &new_ns_root->mnt; - to_path.dentry = dget(new_ns_root->mnt.mnt_root); - - /* Now lock for actual mounting. */ - LOCK_MOUNT_EXACT(mp, &to_path); - if (unlikely(IS_ERR(mp.parent))) - return ERR_CAST(mp.parent); - - /* - * We don't emulate unshare()ing a mount namespace. We stick to the - * restrictions of creating detached bind-mounts. It has a lot - * saner and simpler semantics. + * We don't emulate unshare()ing a mount namespace. We stick + * to the restrictions of creating detached bind-mounts. It + * has a lot saner and simpler semantics. */ mnt = __do_loopback(path, flags, copy_flags); - if (IS_ERR(mnt)) - return ERR_CAST(mnt); - scoped_guard(mount_writer) { + if (IS_ERR(mnt)) { + emptied_ns = new_ns; + umount_tree(new_ns_root, 0); + return ERR_CAST(mnt); + } + if (locked) mnt->mnt.mnt_flags |= MNT_LOCKED; /* - * Now mount the detached tree on top of the copy of the - * real rootfs we created. + * now mount the detached tree on top of the copy + * of the real rootfs we created. */ attach_mnt(mnt, new_ns_root, mp.mp); if (user_ns != ns->user_ns) lock_mnt_tree(new_ns_root); } - /* Add all mounts to the new namespace. */ - for (struct mount *p = new_ns_root; p; p = next_mnt(p, new_ns_root)) { - mnt_add_to_ns(new_ns, p); + for (mnt = new_ns_root; mnt; mnt = next_mnt(mnt, new_ns_root)) { + mnt_add_to_ns(new_ns, mnt); new_ns->nr_mounts++; } - new_ns->root = real_mount(no_free_ptr(to_path.mnt)); + new_ns->root = new_ns_root; ns_tree_add_raw(new_ns); - return no_free_ptr(new_ns); + return new_ns; } static struct file *open_new_namespace(struct path *path, unsigned int flags) @@ -3840,16 +3833,20 @@ static int do_new_mount(const struct path *path, const char *fstype, } static void lock_mount_exact(const struct path *path, - struct pinned_mountpoint *mp) + struct pinned_mountpoint *mp, bool copy_mount, + unsigned int copy_flags) { struct dentry *dentry = path->dentry; int err; + /* Assert that inode_lock() locked the correct inode. */ + VFS_WARN_ON_ONCE(copy_mount && !path_mounted(path)); + inode_lock(dentry->d_inode); namespace_lock(); if (unlikely(cant_mount(dentry))) err = -ENOENT; - else if (path_overmounted(path)) + else if (!copy_mount && path_overmounted(path)) err = -EBUSY; else err = get_mountpoint(dentry, mp); @@ -3857,9 +3854,15 @@ static void lock_mount_exact(const struct path *path, namespace_unlock(); inode_unlock(dentry->d_inode); mp->parent = ERR_PTR(err); - } else { - mp->parent = real_mount(path->mnt); + return; } + + if (copy_mount) + mp->parent = clone_mnt(real_mount(path->mnt), dentry, copy_flags); + else + mp->parent = real_mount(path->mnt); + if (unlikely(IS_ERR(mp->parent))) + __unlock_mount(mp); } int finish_automount(struct vfsmount *__m, const struct path *path) From 4a403d7aa9074f527f064ef0806aaab38d14b07c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 29 Jan 2026 14:52:22 +0100 Subject: [PATCH 057/828] namespace: fix proc mount iteration The m->index isn't updated when m->show() overflows and retains its value before the current mount causing a restart to start at the same value. If that happens in short order to due a quickly expanding mount table this would cause the same mount to be shown again and again. Ensure that *pos always equals the mount id of the mount that was returned by start/next. On restart after overflow mnt_find_id_at(*pos) finds the exact mount. This should avoid duplicates, avoid skips and should handle concurrent modification just fine. Cc: Fixed: 2eea9ce4310d8 ("mounts: keep list of mounts in an rbtree") Link: https://patch.msgid.link/20260129-geleckt-treuhand-4bb940acacd9@brauner Signed-off-by: Christian Brauner --- fs/namespace.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 022e59afcb5e..31483c66ace8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1531,23 +1531,33 @@ static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_mounts *p = m->private; + struct mount *mnt; down_read(&namespace_sem); - return mnt_find_id_at(p->ns, *pos); + mnt = mnt_find_id_at(p->ns, *pos); + if (mnt) + *pos = mnt->mnt_id_unique; + return mnt; } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct mount *next = NULL, *mnt = v; + struct mount *mnt = v; struct rb_node *node = rb_next(&mnt->mnt_node); - ++*pos; if (node) { - next = node_to_mount(node); + struct mount *next = node_to_mount(node); *pos = next->mnt_id_unique; + return next; } - return next; + + /* + * No more mounts. Set pos past current mount's ID so that if + * iteration restarts, mnt_find_id_at() returns NULL. + */ + *pos = mnt->mnt_id_unique + 1; + return NULL; } static void m_stop(struct seq_file *m, void *v) From ef0b64741a53e47ce8022c973099e969094aa536 Mon Sep 17 00:00:00 2001 From: Jori Koolstra Date: Mon, 1 Dec 2025 13:23:38 +0100 Subject: [PATCH 058/828] minix: Correct errno in minix_new_inode The cases (!j || j > sbi->s_ninodes) can never occur unless the filesystem is broken, so this should not return ENOSPC, but EFSCORRUPTED. Signed-off-by: Jori Koolstra Link: https://patch.msgid.link/20251201122338.90568-1-jkoolstra@xs4all.nl Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/minix/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 7da66ca184f4..abec438330a7 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -247,7 +247,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode) j += i * bits_per_zone; if (!j || j > sbi->s_ninodes) { iput(inode); - return ERR_PTR(-ENOSPC); + return ERR_PTR(-EFSCORRUPTED); } inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = j; From 6c4b2243cb6c0755159bd567130d5e12e7b10d9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 7 Feb 2026 08:25:24 +0000 Subject: [PATCH 059/828] unshare: fix unshare_fs() handling There's an unpleasant corner case in unshare(2), when we have a CLONE_NEWNS in flags and current->fs hadn't been shared at all; in that case copy_mnt_ns() gets passed current->fs instead of a private copy, which causes interesting warts in proof of correctness] > I guess if private means fs->users == 1, the condition could still be true. Unfortunately, it's worse than just a convoluted proof of correctness. Consider the case when we have CLONE_NEWCGROUP in addition to CLONE_NEWNS (and current->fs->users == 1). We pass current->fs to copy_mnt_ns(), all right. Suppose it succeeds and flips current->fs->{pwd,root} to corresponding locations in the new namespace. Now we proceed to copy_cgroup_ns(), which fails (e.g. with -ENOMEM). We call put_mnt_ns() on the namespace created by copy_mnt_ns(), it's destroyed and its mount tree is dissolved, but... current->fs->root and current->fs->pwd are both left pointing to now detached mounts. They are pinning those, so it's not a UAF, but it leaves the calling process with unshare(2) failing with -ENOMEM _and_ leaving it with pwd and root on detached isolated mounts. The last part is clearly a bug. There is other fun related to that mess (races with pivot_root(), including the one between pivot_root() and fork(), of all things), but this one is easy to isolate and fix - treat CLONE_NEWNS as "allocate a new fs_struct even if it hadn't been shared in the first place". Sure, we could go for something like "if both CLONE_NEWNS *and* one of the things that might end up failing after copy_mnt_ns() call in create_new_namespaces() are set, force allocation of new fs_struct", but let's keep it simple - the cost of copy_fs_struct() is trivial. Another benefit is that copy_mnt_ns() with CLONE_NEWNS *always* gets a freshly allocated fs_struct, yet to be attached to anything. That seriously simplifies the analysis... FWIW, that bug had been there since the introduction of unshare(2) ;-/ Signed-off-by: Al Viro Link: https://patch.msgid.link/20260207082524.GE3183987@ZenIV Tested-by: Waiman Long Signed-off-by: Christian Brauner --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index e832da9d15a4..65113a304518 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3085,7 +3085,7 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) return 0; /* don't need lock here; in the worst case we'll do useless copy */ - if (fs->users == 1) + if (!(unshare_flags & CLONE_NEWNS) && fs->users == 1) return 0; *new_fsp = copy_fs_struct(fs); From 7be41fb00e2c2a823f271a8318b453ca11812f1e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Oct 2025 08:30:11 +0300 Subject: [PATCH 060/828] accel: ethosu: Fix shift overflow in cmd_to_addr() The "((cmd[0] & 0xff0000) << 16)" shift is zero. This was intended to be (((u64)cmd[0] & 0xff0000) << 16). Move the cast to the correct location. Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQGmY64tWcwOGFP4@stanley.mountain Signed-off-by: Rob Herring (Arm) --- drivers/accel/ethosu/ethosu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c index 473b5f5d7514..7b073116314b 100644 --- a/drivers/accel/ethosu/ethosu_gem.c +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -154,7 +154,7 @@ static void cmd_state_init(struct cmd_state *st) static u64 cmd_to_addr(u32 *cmd) { - return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1]; + return (((u64)cmd[0] & 0xff0000) << 16) | cmd[1]; } static u64 dma_length(struct ethosu_validated_cmdstream_info *info, From 249013e673fce3506c61063c7cbedd75b4c668d8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 22:09:21 -0800 Subject: [PATCH 061/828] fsnotify: drop unused helper Remove this helper now that all users have been converted to fserror_report_metadata as of 7.0-rc1. Cc: jack@suse.cz Cc: amir73il@gmail.com Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/177148129543.716249.980530449513340111.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/fsnotify.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 28a9cb13fbfa..079c18bcdbde 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -495,19 +495,6 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) fsnotify_dentry(dentry, mask); } -static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, - int error) -{ - struct fs_error_report report = { - .error = error, - .inode = inode, - .sb = sb, - }; - - return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, - NULL, NULL, NULL, 0); -} - static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) { fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); From 294f54f849d846f4643a67db9b41b63867dc8bfe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 22:09:37 -0800 Subject: [PATCH 062/828] fserror: fix lockdep complaint when igrabbing inode Christoph Hellwig reported a lockdep splat in generic/108: ================================ WARNING: inconsistent lock state 6.19.0+ #4827 Tainted: G N -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. swapper/1/0 [HC1[1]:SC0[0]:HE0:SE1] takes: ffff88811ed1b140 (&sb->s_type->i_lock_key#33){?.+.}-{3:3}, at: igrab+0x1a/0xb0 {HARDIRQ-ON-W} state was registered at: lock_acquire+0xca/0x2c0 _raw_spin_lock+0x2e/0x40 unlock_new_inode+0x2c/0xc0 xfs_iget+0xcf4/0x1080 xfs_trans_metafile_iget+0x3d/0x100 xfs_metafile_iget+0x2b/0x50 xfs_mount_setup_metadir+0x20/0x60 xfs_mountfs+0x457/0xa60 xfs_fs_fill_super+0x6b3/0xa90 get_tree_bdev_flags+0x13c/0x1e0 vfs_get_tree+0x27/0xe0 vfs_cmd_create+0x54/0xe0 __do_sys_fsconfig+0x309/0x620 do_syscall_64+0x8b/0xf80 entry_SYSCALL_64_after_hwframe+0x76/0x7e irq event stamp: 139080 hardirqs last enabled at (139079): [] do_idle+0x1ec/0x270 hardirqs last disabled at (139080): [] common_interrupt+0x19/0xe0 softirqs last enabled at (139032): [] __irq_exit_rcu+0xc3/0x120 softirqs last disabled at (139025): [] __irq_exit_rcu+0xc3/0x120 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&sb->s_type->i_lock_key#33); lock(&sb->s_type->i_lock_key#33); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffff8881052c81a0 (&vblk->vqs[i].lock){-.-.}-{3:3}, at: virtblk_done+0x4b/0x110 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G N 6.19.0+ #4827 PREEMPT(full) Tainted: [N]=TEST Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x5b/0x80 print_usage_bug.part.0+0x22c/0x2c0 mark_lock+0xa6f/0xe90 __lock_acquire+0x10b6/0x25e0 lock_acquire+0xca/0x2c0 _raw_spin_lock+0x2e/0x40 igrab+0x1a/0xb0 fserror_report+0x135/0x260 iomap_finish_ioend_buffered+0x170/0x210 clone_endio+0x8f/0x1c0 blk_update_request+0x1e4/0x4d0 blk_mq_end_request+0x1b/0x100 virtblk_done+0x6f/0x110 vring_interrupt+0x59/0x80 __handle_irq_event_percpu+0x8a/0x2e0 handle_irq_event+0x33/0x70 handle_edge_irq+0xdd/0x1e0 __common_interrupt+0x6f/0x180 common_interrupt+0xb7/0xe0 It looks like the concern here is that inode::i_lock is sometimes taken in IRQ context, and sometimes it is held when going to IRQ context, though it's a little difficult to tell since I think this is a kernel from after the actual 6.19 release but before 7.0-rc1. Either way, we don't need to take i_lock, because filesystems should not report files to fserror if they're about to be freed or have not yet been exposed to other threads, because the resulting fsnotify report will be meaningless. Therefore, bump inode::i_count directly and clarify the preconditions on the inode being passed in. Link: https://lore.kernel.org/linux-fsdevel/aY7BndIgQg3ci_6s@infradead.org/ Reported-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/177148129564.716249.3069780698231701540.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/ioend.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index e4d57cb969f1..4d1ef8a2cee9 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -69,11 +69,57 @@ static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) return folio_count; } +static DEFINE_SPINLOCK(failed_ioend_lock); +static LIST_HEAD(failed_ioend_list); + +static void +iomap_fail_ioends( + struct work_struct *work) +{ + struct iomap_ioend *ioend; + struct list_head tmp; + unsigned long flags; + + spin_lock_irqsave(&failed_ioend_lock, flags); + list_replace_init(&failed_ioend_list, &tmp); + spin_unlock_irqrestore(&failed_ioend_lock, flags); + + while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend, + io_list))) { + list_del_init(&ioend->io_list); + iomap_finish_ioend_buffered(ioend); + cond_resched(); + } +} + +static DECLARE_WORK(failed_ioend_work, iomap_fail_ioends); + +static void iomap_fail_ioend_buffered(struct iomap_ioend *ioend) +{ + unsigned long flags; + + /* + * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions + * in the fserror code. The caller no longer owns the ioend reference + * after the spinlock drops. + */ + spin_lock_irqsave(&failed_ioend_lock, flags); + if (list_empty(&failed_ioend_list)) + WARN_ON_ONCE(!schedule_work(&failed_ioend_work)); + list_add_tail(&ioend->io_list, &failed_ioend_list); + spin_unlock_irqrestore(&failed_ioend_lock, flags); +} + static void ioend_writeback_end_bio(struct bio *bio) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); ioend->io_error = blk_status_to_errno(bio->bi_status); + if (ioend->io_error) { + iomap_fail_ioend_buffered(ioend); + return; + } + iomap_finish_ioend_buffered(ioend); } From 023cd6d90f8aa2ef7b72d84be84a18e61ecebd64 Mon Sep 17 00:00:00 2001 From: Piotr Mazek Date: Thu, 5 Feb 2026 23:05:02 +0100 Subject: [PATCH 063/828] ACPI: PM: Save NVS memory on Lenovo G70-35 [821d6f0359b0614792ab8e2fb93b503e25a65079] prevented machines produced later than 2012 from saving NVS region to accelerate S3. Despite being made after 2012, Lenovo G70-35 still needs NVS memory saving during S3. A quirk is introduced for this platform. Signed-off-by: Piotr Mazek [ rjw: Subject adjustment ] Link: https://patch.msgid.link/GV2PPF3CD5B63CC2442EE3F76F8443EAD90D499A@GV2PPF3CD5B63CC.EURP251.PROD.OUTLOOK.COM Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 66ec81e306d4..132a9df98471 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -386,6 +386,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E1"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Lenovo G70-35", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "80Q5"), + }, + }, /* * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using * the Low Power S0 Idle firmware interface (see From ab140365fb62c0bdab22b2f516aff563b2559e3b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 19 Feb 2026 15:20:12 +0100 Subject: [PATCH 064/828] drbd: fix "LOGIC BUG" in drbd_al_begin_io_nonblock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though we check that we "should" be able to do lc_get_cumulative() while holding the device->al_lock spinlock, it may still fail, if some other code path decided to do lc_try_lock() with bad timing. If that happened, we logged "LOGIC BUG for enr=...", but still did not return an error. The rest of the code now assumed that this request has references for the relevant activity log extents. The implcations are that during an active resync, mutual exclusivity of resync versus application IO is not guaranteed. And a potential crash at this point may not realizs that these extents could have been target of in-flight IO and would need to be resynced just in case. Also, once the request completes, it will give up activity log references it does not even hold, which will trigger a BUG_ON(refcnt == 0) in lc_put(). Fix: Do not crash the kernel for a condition that is harmless during normal operation: also catch "e->refcnt == 0", not only "e == NULL" when being noisy about "al_complete_io() called on inactive extent %u\n". And do not try to be smart and "guess" whether something will work, then be surprised when it does not. Deal with the fact that it may or may not work. If it does not, remember a possible "partially in activity log" state (only possible for requests that cross extent boundaries), and return an error code from drbd_al_begin_io_nonblock(). A latter call for the same request will then resume from where we left off. Cc: stable@vger.kernel.org Signed-off-by: Lars Ellenberg Signed-off-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 53 +++++++++++++----------------- drivers/block/drbd/drbd_interval.h | 5 ++- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 742b2908ff68..b3dbf6c76e98 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -483,38 +483,20 @@ void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i) int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) { - struct lru_cache *al = device->act_log; /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - unsigned nr_al_extents; - unsigned available_update_slots; unsigned enr; - D_ASSERT(device, first <= last); - - nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ - available_update_slots = min(al->nr_elements - al->used, - al->max_pending_changes - al->pending_changes); - - /* We want all necessary updates for a given request within the same transaction - * We could first check how many updates are *actually* needed, - * and use that instead of the worst-case nr_al_extents */ - if (available_update_slots < nr_al_extents) { - /* Too many activity log extents are currently "hot". - * - * If we have accumulated pending changes already, - * we made progress. - * - * If we cannot get even a single pending change through, - * stop the fast path until we made some progress, - * or requests to "cold" extents could be starved. */ - if (!al->pending_changes) - __set_bit(__LC_STARVING, &device->act_log->flags); - return -ENOBUFS; + if (i->partially_in_al_next_enr) { + D_ASSERT(device, first < i->partially_in_al_next_enr); + D_ASSERT(device, last >= i->partially_in_al_next_enr); + first = i->partially_in_al_next_enr; } + D_ASSERT(device, first <= last); + /* Is resync active in this area? */ for (enr = first; enr <= last; enr++) { struct lc_element *tmp; @@ -529,14 +511,21 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval * } } - /* Checkout the refcounts. - * Given that we checked for available elements and update slots above, - * this has to be successful. */ + /* Try to checkout the refcounts. */ for (enr = first; enr <= last; enr++) { struct lc_element *al_ext; al_ext = lc_get_cumulative(device->act_log, enr); - if (!al_ext) - drbd_info(device, "LOGIC BUG for enr=%u\n", enr); + + if (!al_ext) { + /* Did not work. We may have exhausted the possible + * changes per transaction. Or raced with someone + * "locking" it against changes. + * Remember where to continue from. + */ + if (enr > first) + i->partially_in_al_next_enr = enr; + return -ENOBUFS; + } } return 0; } @@ -556,7 +545,11 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) for (enr = first; enr <= last; enr++) { extent = lc_find(device->act_log, enr); - if (!extent) { + /* Yes, this masks a bug elsewhere. However, during normal + * operation this is harmless, so no need to crash the kernel + * by the BUG_ON(refcount == 0) in lc_put(). + */ + if (!extent || extent->refcnt == 0) { drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); continue; } diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 366489b72fe9..5d3213b81eed 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -8,12 +8,15 @@ struct drbd_interval { struct rb_node rb; sector_t sector; /* start sector of the interval */ - unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + unsigned int size; /* size in bytes */ unsigned int local:1 /* local or remote request? */; unsigned int waiting:1; /* someone is waiting for completion */ unsigned int completed:1; /* this has been completed already; * ignore for conflict detection */ + + /* to resume a partially successful drbd_al_begin_io_nonblock(); */ + unsigned int partially_in_al_next_enr; }; static inline void drbd_clear_interval(struct drbd_interval *i) From 81b1f046ff8a5ad5da2c970cff354b61dfa1d6b1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 12 Jan 2026 18:04:12 +0100 Subject: [PATCH 065/828] drbd: Replace deprecated strcpy with strscpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strcpy() has been deprecated [1] because it performs no bounds checking on the destination buffer, which can lead to buffer overflows. Replace it with the safer strscpy(). No functional changes. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [1] Signed-off-by: Thorsten Blum Reviewed-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 14 +++++++++----- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1f6ac9202b66..ba00b5f21a49 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -732,9 +733,9 @@ int drbd_send_sync_param(struct drbd_peer_device *peer_device) } if (apv >= 88) - strcpy(p->verify_alg, nc->verify_alg); + strscpy(p->verify_alg, nc->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, nc->csums_alg); + strscpy(p->csums_alg, nc->csums_alg); rcu_read_unlock(); return drbd_send_command(peer_device, sock, cmd, size, NULL, 0); @@ -745,6 +746,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm struct drbd_socket *sock; struct p_protocol *p; struct net_conf *nc; + size_t integrity_alg_len; int size, cf; sock = &connection->data; @@ -762,8 +764,10 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm } size = sizeof(*p); - if (connection->agreed_pro_version >= 87) - size += strlen(nc->integrity_alg) + 1; + if (connection->agreed_pro_version >= 87) { + integrity_alg_len = strlen(nc->integrity_alg) + 1; + size += integrity_alg_len; + } p->protocol = cpu_to_be32(nc->wire_protocol); p->after_sb_0p = cpu_to_be32(nc->after_sb_0p); @@ -778,7 +782,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm p->conn_flags = cpu_to_be32(cf); if (connection->agreed_pro_version >= 87) - strcpy(p->integrity_alg, nc->integrity_alg); + strscpy(p->integrity_alg, nc->integrity_alg, integrity_alg_len); rcu_read_unlock(); return __conn_send_command(connection, sock, cmd, size, NULL, 0); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3de919b6f0e1..3d0a061b6c40 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3801,14 +3801,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i *new_net_conf = *old_net_conf; if (verify_tfm) { - strcpy(new_net_conf->verify_alg, p->verify_alg); + strscpy(new_net_conf->verify_alg, p->verify_alg); new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; crypto_free_shash(peer_device->connection->verify_tfm); peer_device->connection->verify_tfm = verify_tfm; drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(new_net_conf->csums_alg, p->csums_alg); + strscpy(new_net_conf->csums_alg, p->csums_alg); new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; crypto_free_shash(peer_device->connection->csums_tfm); peer_device->connection->csums_tfm = csums_tfm; From 6b3e458806e34f1142592f786d3eb0ebac209cc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 19 Feb 2026 16:43:35 +0100 Subject: [PATCH 066/828] HID: Document memory allocation properties of report_fixup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointer returned by the report_fixup() hook does not get freed by the caller. Instead, report_fixup() must return (in return value and *rsize) a memory buffer with at least the same lifetime as the input buffer (defined by rdesc and original *rsize). This is usually achieved using one of the following techniques: * Returning a pointer and size to a sub-portion of the input buffer * Returning a pointer to a static buffer * Allocating a buffer with a devm_*() function, which will automatically get freed when the device is removed. Signed-off-by: Günther Noack Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/hid.h b/include/linux/hid.h index dce862cafbbd..2990b9f94cb5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -836,6 +836,12 @@ struct hid_usage_id { * raw_event and event should return negative on error, any other value will * pass the event on to .event() typically return 0 for success. * + * report_fixup must return a report descriptor pointer whose lifetime is at + * least that of the input rdesc. This is usually done by mutating the input + * rdesc and returning it or a sub-portion of it. In case a new buffer is + * allocated and returned, the implementation of report_fixup is responsible for + * freeing it later. + * * input_mapping shall return a negative value to completely ignore this usage * (e.g. doubled or invalid usage), zero to continue with parsing of this * usage by generic code (no special handling needed) or positive to skip From 239c15116d80f67d32f00acc34575f1a6b699613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 19 Feb 2026 16:43:36 +0100 Subject: [PATCH 067/828] HID: apple: avoid memory leak in apple_report_fixup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The apple_report_fixup() function was returning a newly kmemdup()-allocated buffer, but never freeing it. The caller of report_fixup() does not take ownership of the returned pointer, but it *is* permitted to return a sub-portion of the input rdesc, whose lifetime is managed by the caller. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Günther Noack Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-apple.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 233e367cce1d..894adc23367b 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -686,9 +686,7 @@ static const __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "fixing up Magic Keyboard battery report descriptor\n"); *rsize = *rsize - 1; - rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL); - if (!rdesc) - return NULL; + rdesc = rdesc + 1; rdesc[0] = 0x05; rdesc[1] = 0x01; From 91e8c6e601bdc1ccdf886479b6513c01c7e51c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 19 Feb 2026 16:43:37 +0100 Subject: [PATCH 068/828] HID: magicmouse: avoid memory leak in magicmouse_report_fixup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The magicmouse_report_fixup() function was returning a newly kmemdup()-allocated buffer, but never freeing it. The caller of report_fixup() does not take ownership of the returned pointer, but it *is* permitted to return a sub-portion of the input rdesc, whose lifetime is managed by the caller. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Günther Noack Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-magicmouse.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 91f621ceb924..17908d52c027 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -994,9 +994,7 @@ static const __u8 *magicmouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "fixing up magicmouse battery report descriptor\n"); *rsize = *rsize - 1; - rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL); - if (!rdesc) - return NULL; + rdesc = rdesc + 1; rdesc[0] = 0x05; rdesc[1] = 0x01; From 2bad24c17742fc88973d6aea526ce1353f5334a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 19 Feb 2026 16:43:38 +0100 Subject: [PATCH 069/828] HID: asus: avoid memory leak in asus_report_fixup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The asus_report_fixup() function was returning a newly allocated kmemdup()-allocated buffer, but never freeing it. Switch to devm_kzalloc() to ensure the memory is managed and freed automatically when the device is removed. The caller of report_fixup() does not take ownership of the returned pointer, but it is permitted to return a pointer whose lifetime is at least that of the input buffer. Also fix a harmless out-of-bounds read by copying only the original descriptor size. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Günther Noack Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-asus.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 8ffcd12038e8..7a08e964b9cc 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1399,14 +1399,21 @@ static const __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, */ if (*rsize == rsize_orig && rdesc[offs] == 0x09 && rdesc[offs + 1] == 0x76) { - *rsize = rsize_orig + 1; - rdesc = kmemdup(rdesc, *rsize, GFP_KERNEL); - if (!rdesc) - return NULL; + __u8 *new_rdesc; + + new_rdesc = devm_kzalloc(&hdev->dev, rsize_orig + 1, + GFP_KERNEL); + if (!new_rdesc) + return rdesc; hid_info(hdev, "Fixing up %s keyb report descriptor\n", drvdata->quirks & QUIRK_T100CHI ? "T100CHI" : "T90CHI"); + + memcpy(new_rdesc, rdesc, rsize_orig); + *rsize = rsize_orig + 1; + rdesc = new_rdesc; + memmove(rdesc + offs + 4, rdesc + offs + 2, 12); rdesc[offs] = 0x19; rdesc[offs + 1] = 0x00; From ecfa6f34492c493a9a1dc2900f3edeb01c79946b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 19 Feb 2026 15:33:54 +0100 Subject: [PATCH 070/828] HID: Add HID_CLAIMED_INPUT guards in raw_event callbacks missing them In commit 2ff5baa9b527 ("HID: appleir: Fix potential NULL dereference at raw event handle"), we handle the fact that raw event callbacks can happen even for a HID device that has not been "claimed" causing a crash if a broken device were attempted to be connected to the system. Fix up the remaining in-tree HID drivers that forgot to add this same check to resolve the same issue. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Bastien Nocera Cc: linux-input@vger.kernel.org Cc: stable Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-cmedia.c | 2 +- drivers/hid/hid-creative-sb0540.c | 2 +- drivers/hid/hid-zydacron.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-cmedia.c b/drivers/hid/hid-cmedia.c index 528d7f361215..8bf5649b0c79 100644 --- a/drivers/hid/hid-cmedia.c +++ b/drivers/hid/hid-cmedia.c @@ -99,7 +99,7 @@ static int cmhid_raw_event(struct hid_device *hid, struct hid_report *report, { struct cmhid *cm = hid_get_drvdata(hid); - if (len != CM6533_JD_RAWEV_LEN) + if (len != CM6533_JD_RAWEV_LEN || !(hid->claimed & HID_CLAIMED_INPUT)) goto out; if (memcmp(data+CM6533_JD_SFX_OFFSET, ji_sfx, sizeof(ji_sfx))) goto out; diff --git a/drivers/hid/hid-creative-sb0540.c b/drivers/hid/hid-creative-sb0540.c index b4c8e7a5d3e0..dfd6add353d1 100644 --- a/drivers/hid/hid-creative-sb0540.c +++ b/drivers/hid/hid-creative-sb0540.c @@ -153,7 +153,7 @@ static int creative_sb0540_raw_event(struct hid_device *hid, u64 code, main_code; int key; - if (len != 6) + if (len != 6 || !(hid->claimed & HID_CLAIMED_INPUT)) return 0; /* From daemons/hw_hiddev.c sb0540_rec() in lirc */ diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c index 3bdb26f45592..1aae80f848f5 100644 --- a/drivers/hid/hid-zydacron.c +++ b/drivers/hid/hid-zydacron.c @@ -114,7 +114,7 @@ static int zc_raw_event(struct hid_device *hdev, struct hid_report *report, unsigned key; unsigned short index; - if (report->id == data[0]) { + if (report->id == data[0] && (hdev->claimed & HID_CLAIMED_INPUT)) { /* break keys */ for (index = 0; index < 4; index++) { From 858d2a4f67ff69e645a43487ef7ea7f28f06deae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Feb 2026 16:12:05 +0000 Subject: [PATCH 071/828] tcp: fix potential race in tcp_v6_syn_recv_sock() Code in tcp_v6_syn_recv_sock() after the call to tcp_v4_syn_recv_sock() is done too late. After tcp_v4_syn_recv_sock(), the child socket is already visible from TCP ehash table and other cpus might use it. Since newinet->pinet6 is still pointing to the listener ipv6_pinfo bad things can happen as syzbot found. Move the problematic code in tcp_v6_mapped_child_init() and call this new helper from tcp_v4_syn_recv_sock() before the ehash insertion. This allows the removal of one tcp_sync_mss(), since tcp_v4_syn_recv_sock() will call it with the correct context. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+937b5bbb6a815b3e5d0b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69949275.050a0220.2eeac1.0145.GAE@google.com/ Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260217161205.2079883-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 4 +- include/net/tcp.h | 4 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_ipv4.c | 8 ++- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 98 +++++++++++++----------------- net/mptcp/subflow.c | 6 +- net/smc/af_smc.c | 6 +- 9 files changed, 66 insertions(+), 66 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ecb362025c4e..5cb3056d6ddc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -42,7 +42,9 @@ struct inet_connection_sock_af_ops { struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); u16 net_header_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); diff --git a/include/net/tcp.h b/include/net/tcp.h index 40e72b9cb85f..eb8bf63fdafc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -544,7 +544,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int tcp_connect(struct sock *sk); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 569befcf021b..061751aabc8e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -203,7 +203,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, bool own_req; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - NULL, &own_req); + NULL, &own_req, NULL); if (child) { refcount_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index b30090cff3cf..df803f088f45 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -333,7 +333,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, bool own_req; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - NULL, &own_req); + NULL, &own_req, NULL); if (!child) return NULL; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6264fc0b2be5..2980c175d326 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1705,7 +1705,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct inet_request_sock *ireq; bool found_dup_sk = false; @@ -1757,6 +1759,10 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); +#if IS_ENABLED(CONFIG_IPV6) + if (opt_child_init) + opt_child_init(newsk, sk); +#endif tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst4_mtu(dst)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ec128865f5c0..d9c5a43bd281 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -925,7 +925,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - req, &own_req); + req, &own_req, NULL); if (!child) goto listen_overflow; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d10487b4e5bf..e46a0efae012 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1312,11 +1312,48 @@ static void tcp_v6_restore_cb(struct sk_buff *skb) sizeof(struct inet6_skb_parm)); } +/* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ +static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) +{ + struct inet_sock *newinet = inet_sk(newsk); + struct ipv6_pinfo *newnp; + + newinet->pinet6 = newnp = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; + + memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); + + newnp->saddr = newsk->sk_v6_rcv_saddr; + + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; + if (sk_is_mptcp(newsk)) + mptcpv6_handle_mapped(newsk, true); + newsk->sk_backlog_rcv = tcp_v4_do_rcv; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; +#endif + + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->pktoptions = NULL; + newnp->opt = NULL; + + /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ + newnp->mcast_oif = newinet->mc_index; + newnp->mcast_hops = newinet->mc_ttl; + + newnp->rcv_flowinfo = 0; + if (inet6_test_bit(REPFLOW, sk)) + newnp->flow_label = 0; +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct inet_request_sock *ireq; @@ -1332,61 +1369,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #endif struct flowi6 fl6; - if (skb->protocol == htons(ETH_P_IP)) { - /* - * v6 mapped - */ - - newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, - req_unhash, own_req); - - if (!newsk) - return NULL; - - newinet = inet_sk(newsk); - newinet->pinet6 = tcp_inet6_sk(newsk); - newinet->ipv6_fl_list = NULL; - - newnp = tcp_inet6_sk(newsk); - newtp = tcp_sk(newsk); - - memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - - newnp->saddr = newsk->sk_v6_rcv_saddr; - - inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; - if (sk_is_mptcp(newsk)) - mptcpv6_handle_mapped(newsk, true); - newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) - newtp->af_specific = &tcp_sock_ipv6_mapped_specific; -#endif - - newnp->ipv6_mc_list = NULL; - newnp->ipv6_ac_list = NULL; - newnp->pktoptions = NULL; - newnp->opt = NULL; - newnp->mcast_oif = inet_iif(skb); - newnp->mcast_hops = ip_hdr(skb)->ttl; - newnp->rcv_flowinfo = 0; - if (inet6_test_bit(REPFLOW, sk)) - newnp->flow_label = 0; - - /* - * No need to charge this sock to the relevant IPv6 refcnt debug socks count - * here, tcp_create_openreq_child now does this for us, see the comment in - * that function for the gory details. -acme - */ - - /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 icsk.icsk_af_ops. - Sync it now. - */ - tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); - - return newsk; - } - + if (skb->protocol == htons(ETH_P_IP)) + return tcp_v4_syn_recv_sock(sk, skb, req, dst, + req_unhash, own_req, + tcp_v6_mapped_child_init); ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index f66129f1e649..8b25dd86ffeb 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -808,7 +808,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; @@ -855,7 +857,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, create_child: child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - req_unhash, own_req); + req_unhash, own_req, opt_child_init); if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d8201eb3ac5f..18c56b0d7ad5 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -124,7 +124,9 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct smc_sock *smc; struct sock *child; @@ -142,7 +144,7 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, /* passthrough to original syn recv sock fct */ child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, - own_req); + own_req, opt_child_init); /* child must not inherit smc or its ops */ if (child) { rcu_assign_sk_user_data(child, NULL); From f891007ab1c77436950d10e09eae54507f1865ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2026 14:13:37 +0000 Subject: [PATCH 072/828] psp: use sk->sk_hash in psp_write_headers() udp_flow_src_port() is indirectly using sk->sk_txhash as a base, because __tcp_transmit_skb() uses skb_set_hash_from_sk(). This is problematic because this field can change over the lifetime of a TCP flow, thanks to calls to sk_rethink_txhash(). Problem is that some NIC might (ab)use the PSP UDP source port in their RSS computation, and PSP packets for a given flow could jump from one queue to another. In order to avoid surprises, it is safer to let Protective Load Balancing (PLB) get its entropy from the IPv6 flowlabel, and change psp_write_headers() to use sk->sk_hash which does not change for the duration of the flow. We might add a sysctl to select the behavior, if there is a need for it. Fixes: fc724515741a ("psp: provide encapsulation helper for drivers") Signed-off-by: Eric Dumazet Reviewed-By: Daniel Zahka Link: https://patch.msgid.link/20260218141337.999945-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/psp/psp_main.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index a8534124f626..066222eb56c4 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -166,9 +166,46 @@ static void psp_write_headers(struct net *net, struct sk_buff *skb, __be32 spi, { struct udphdr *uh = udp_hdr(skb); struct psphdr *psph = (struct psphdr *)(uh + 1); + const struct sock *sk = skb->sk; uh->dest = htons(PSP_DEFAULT_UDP_PORT); - uh->source = udp_flow_src_port(net, skb, 0, 0, false); + + /* A bit of theory: Selection of the source port. + * + * We need some entropy, so that multiple flows use different + * source ports for better RSS spreading at the receiver. + * + * We also need that all packets belonging to one TCP flow + * use the same source port through their duration, + * so that all these packets land in the same receive queue. + * + * udp_flow_src_port() is using sk_txhash, inherited from + * skb_set_hash_from_sk() call in __tcp_transmit_skb(). + * This field is subject to reshuffling, thanks to + * sk_rethink_txhash() calls in various TCP functions. + * + * Instead, use sk->sk_hash which is constant through + * the whole flow duration. + */ + if (likely(sk)) { + u32 hash = sk->sk_hash; + int min, max; + + /* These operations are cheap, no need to cache the result + * in another socket field. + */ + inet_get_local_port_range(net, &min, &max); + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possibility that any useful information to an + * attacker is leaked. Only upper 16 bits are relevant in the + * computation for 16 bit port value because we use a + * reciprocal divide. + */ + hash ^= hash << 16; + uh->source = htons((((u64)hash * (max - min)) >> 32) + min); + } else { + uh->source = udp_flow_src_port(net, skb, 0, 0, false); + } uh->check = 0; uh->len = htons(udp_len); From e1512c1db9e8794d8d130addd2615ec27231d994 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 18 Feb 2026 02:16:43 +0900 Subject: [PATCH 073/828] espintcp: Fix race condition in espintcp_close() This issue was discovered during a code audit. After cancel_work_sync() is called from espintcp_close(), espintcp_tx_work() can still be scheduled from paths such as the Delayed ACK handler or ksoftirqd. As a result, the espintcp_tx_work() worker may dereference a freed espintcp ctx or sk. The following is a simple race scenario: cpu0 cpu1 espintcp_close() cancel_work_sync(&ctx->work); espintcp_write_space() schedule_work(&ctx->work); To prevent this race condition, cancel_work_sync() is replaced with disable_work_sync(). Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Hyunwoo Kim Reviewed-by: Simon Horman Link: https://patch.msgid.link/aZSie7rEdh9Nu0eM@v4bel Signed-off-by: Jakub Kicinski --- net/xfrm/espintcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index bf744ac9d5a7..8709df716e98 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -536,7 +536,7 @@ static void espintcp_close(struct sock *sk, long timeout) sk->sk_prot = &tcp_prot; barrier(); - cancel_work_sync(&ctx->work); + disable_work_sync(&ctx->work); strp_done(&ctx->strp); skb_queue_purge(&ctx->out_queue); From 64868f5ecadeb359a49bc4485bfa7c497047f13a Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Tue, 17 Feb 2026 17:50:12 +0000 Subject: [PATCH 074/828] net: usb: kaweth: remove TX queue manipulation in kaweth_set_rx_mode kaweth_set_rx_mode(), the ndo_set_rx_mode callback, calls netif_stop_queue() and netif_wake_queue(). These are TX queue flow control functions unrelated to RX multicast configuration. The premature netif_wake_queue() can re-enable TX while tx_urb is still in-flight, leading to a double usb_submit_urb() on the same URB: kaweth_start_xmit() { netif_stop_queue(); usb_submit_urb(kaweth->tx_urb); } kaweth_set_rx_mode() { netif_stop_queue(); netif_wake_queue(); // wakes TX queue before URB is done } kaweth_start_xmit() { netif_stop_queue(); usb_submit_urb(kaweth->tx_urb); // URB submitted while active } This triggers the WARN in usb_submit_urb(): "URB submitted while active" This is a similar class of bug fixed in rtl8150 by - commit 958baf5eaee3 ("net: usb: Remove disruptive netif_wake_queue in rtl8150_set_multicast"). Also kaweth_set_rx_mode() is already functionally broken, the real set_rx_mode action is performed by kaweth_async_set_rx_mode(), which in turn is not a no-op only at ndo_open() time. Suggested-by: Paolo Abeni Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ziyi Guo Link: https://patch.msgid.link/20260217175012.1234494-1-n7l8m4@u.northwestern.edu Signed-off-by: Jakub Kicinski --- drivers/net/usb/kaweth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index c9efb7df892e..e01d14f6c366 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -765,7 +765,6 @@ static void kaweth_set_rx_mode(struct net_device *net) netdev_dbg(net, "Setting Rx mode to %d\n", packet_filter_bitmap); - netif_stop_queue(net); if (net->flags & IFF_PROMISC) { packet_filter_bitmap |= KAWETH_PACKET_FILTER_PROMISCUOUS; @@ -775,7 +774,6 @@ static void kaweth_set_rx_mode(struct net_device *net) } kaweth->packet_filter_bitmap = packet_filter_bitmap; - netif_wake_queue(net); } /**************************************************************** From f1e2f0ce704e4a14e3f367d3b97d3dd2d8e183b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A5lsson?= Date: Wed, 18 Feb 2026 05:28:22 +0000 Subject: [PATCH 075/828] net: usb: lan78xx: scan all MDIO addresses on LAN7801 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LAN7801 is designed exclusively for external PHYs (unlike the LAN7800/LAN7850 which have internal PHYs), but lan78xx_mdio_init() restricts PHY scanning to MDIO addresses 0-7 by setting phy_mask to ~(0xFF). This prevents discovery of external PHYs wired to addresses outside that range. One such case is the DP83TC814 100BASE-T1 PHY, which is typically configured at MDIO address 10 via PHYAD bootstrap pins and goes undetected with the current mask. Remove the restrictive phy_mask assignment for the LAN7801 so that the default mask of 0 applies, allowing all 32 MDIO addresses to be scanned during bus registration. Fixes: 02dc1f3d613d ("lan78xx: add LAN7801 MAC only support") Signed-off-by: Martin Pålsson Link: https://patch.msgid.link/0110019c6f388aff-98d99cf0-4425-4fff-b16b-dea5ad8fafe0-000000@eu-north-1.amazonses.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 00397a807393..065588c9cfa6 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2094,8 +2094,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) dev->mdiobus->phy_mask = ~(1 << 1); break; case ID_REV_CHIP_ID_7801_: - /* scan thru PHYAD[2..0] */ - dev->mdiobus->phy_mask = ~(0xFF); break; } From bfd264fbbbca7a39ea430d7bc2baf8ee2ea958e4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 18 Feb 2026 18:05:51 +0200 Subject: [PATCH 076/828] net: dsa: sja1105: protect link replay helpers against NULL phylink instance There is a crash when unbinding the sja1105 driver under special circumstances: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 Call trace: phylink_run_resolve_and_disable+0x10/0x90 sja1105_static_config_reload+0xc0/0x410 sja1105_vlan_filtering+0x100/0x140 dsa_port_vlan_filtering+0x13c/0x368 dsa_port_reset_vlan_filtering.isra.0+0xe8/0x198 dsa_port_bridge_leave+0x130/0x248 dsa_user_changeupper.part.0+0x74/0x158 dsa_user_netdevice_event+0x50c/0xa50 notifier_call_chain+0x78/0x148 raw_notifier_call_chain+0x20/0x38 call_netdevice_notifiers_info+0x58/0xa8 __netdev_upper_dev_unlink+0xac/0x220 netdev_upper_dev_unlink+0x38/0x70 del_nbp+0x1a4/0x320 br_del_if+0x3c/0xd8 br_device_event+0xf8/0x2d8 notifier_call_chain+0x78/0x148 raw_notifier_call_chain+0x20/0x38 call_netdevice_notifiers_info+0x58/0xa8 unregister_netdevice_many_notify+0x314/0x848 unregister_netdevice_queue+0xe8/0xf8 dsa_user_destroy+0x50/0xa8 dsa_port_teardown+0x80/0x98 dsa_switch_teardown_ports+0x4c/0xb8 dsa_switch_deinit+0x94/0xb8 dsa_switch_put_tree+0x2c/0xc0 dsa_unregister_switch+0x38/0x60 sja1105_remove+0x24/0x40 spi_remove+0x38/0x60 device_remove+0x54/0x90 device_release_driver_internal+0x1d4/0x230 device_driver_detach+0x20/0x38 unbind_store+0xbc/0xc8 ---[ end trace 0000000000000000 ]--- which requires an explanation. When a port offloads a bridge, the switch must be reset to change the VLAN awareness state (the SJA1105_VLAN_FILTERING reason for sja1105_static_config_reload()). When the port leaves a VLAN-aware bridge, it must also be reset for the same reason: it is returning to operation as a VLAN-unaware standalone port. sja1105_static_config_reload() triggers the phylink link replay helpers. Because sja1105 is a switch, it has multiple user ports. During unbind, ports are torn down one by one in dsa_switch_teardown_ports() -> dsa_port_teardown() -> dsa_user_destroy(). The crash happens when the first user port is not part of the VLAN-aware bridge, but any other user port is. Tearing down the first user port causes phylink_destroy() to be called on dp->pl, and this pointer to be set to NULL. Then, when the second user port is torn down, this was offloading a VLAN-aware bridge port, so indirectly it will trigger sja1105_static_config_reload(). The latter function iterates using dsa_switch_for_each_available_port(), and unconditionally dereferences dp->pl, including for the aforementioned torn down previous port, and passes that to phylink. This is where the NULL pointer is coming from. There are multiple levels at which this could be avoided: - add an "if (dp->pl)" in sja1105_static_config_reload() - make the phylink replay helpers NULL-tolerant - mark ports as DSA_PORT_TYPE_UNUSED after dsa_port_phylink_destroy() has run, such that subsequent dsa_switch_for_each_available_port() iterations skip them - disconnect the entire switch at once from switchdev and NETDEV_CHANGEUPPER events while unbinding, not just port by port, likely using a "ds->unbinding = true" mechanism or similar however options 3 and 4 are quite heavy and might have side effects. Although 2 allows to keep the driver simpler, the phylink API it not NULL-tolerant in general and is not responsible for the NULL pointer (this is something done by dsa_port_phylink_destroy()). So I went with 1. Functionally speaking, skipping the replay helpers for ports without a phylink instance is fine, because that only happens during driver removal (an operation which cannot be cancelled). The ports are not required to work (although they probably still will - untested assumption - as long as we don't overwrite the last port speed with SJA1105_SPEED_AUTO). Fixes: 0b2edc531e0b ("net: dsa: sja1105: let phylink help with the replay of link callbacks") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260218160551.194782-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 71a817c07a90..94d17b06da40 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2278,6 +2278,12 @@ int sja1105_static_config_reload(struct sja1105_private *priv, * change it through the dynamic interface later. */ dsa_switch_for_each_available_port(dp, ds) { + /* May be called during unbind when we unoffload a VLAN-aware + * bridge from port 1 while port 0 was already torn down + */ + if (!dp->pl) + continue; + phylink_replay_link_begin(dp->pl); mac[dp->index].speed = priv->info->port_speed[SJA1105_SPEED_AUTO]; } @@ -2334,7 +2340,8 @@ int sja1105_static_config_reload(struct sja1105_private *priv, } dsa_switch_for_each_available_port(dp, ds) - phylink_replay_link_end(dp->pl); + if (dp->pl) + phylink_replay_link_end(dp->pl); rc = sja1105_reload_cbs(priv); if (rc < 0) From f6a495484a27150fb85f943e1a7464da88c2a797 Mon Sep 17 00:00:00 2001 From: Ethan Tidmore Date: Thu, 19 Feb 2026 16:10:01 -0600 Subject: [PATCH 077/828] proc: Fix pointer error dereference The function try_lookup_noperm() can return an error pointer. Add check for error pointer. Detected by Smatch: fs/proc/base.c:2148 proc_fill_cache() error: 'child' dereferencing possible ERR_PTR() Fixes: 1df98b8bbcca ("proc_fill_cache(): clean up, get rid of pointless find_inode_number() use") Signed-off-by: Ethan Tidmore Link: https://patch.msgid.link/20260219221001.1117135-1-ethantidmore06@gmail.com Signed-off-by: Christian Brauner --- fs/proc/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 4eec684baca9..4c863d17dfb4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2128,6 +2128,9 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, ino_t ino = 1; child = try_lookup_noperm(&qname, dir); + if (IS_ERR(child)) + goto end_instantiate; + if (!child) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); child = d_alloc_parallel(dir, &qname, &wq); From c5f8658f97ec392eeaf355d4e9775ae1f23ca1d3 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 4 Feb 2026 17:06:29 +0800 Subject: [PATCH 078/828] drm/imx: parallel-display: check return value of devm_drm_bridge_add() in imx_pd_probe() Return the value of devm_drm_bridge_add() in order to propagate the error properly, if it fails due to resource allocation failure or bridge registration failure. This ensures that the probe function fails safely rather than proceeding with a potentially incomplete bridge setup. Fixes: bf7e97910b9f ("drm/imx: parallel-display: add the bridge before attaching it") Signed-off-by: Chen Ni Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260204090629.2209542-1-nichen@iscas.ac.cn Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/imx/ipuv3/parallel-display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c index 6fbf505d2801..590120a33fa0 100644 --- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c +++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c @@ -256,7 +256,9 @@ static int imx_pd_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxpd); - devm_drm_bridge_add(dev, &imxpd->bridge); + ret = devm_drm_bridge_add(dev, &imxpd->bridge); + if (ret) + return ret; return component_add(dev, &imx_pd_ops); } From 496daa2759260374bb9c9b2196a849aa3bc513a8 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 6 Feb 2026 12:06:21 +0800 Subject: [PATCH 079/828] drm/bridge: synopsys: dw-dp: Check return value of devm_drm_bridge_add() in dw_dp_bind() Return the value of devm_drm_bridge_add() in order to propagate the error properly, if it fails due to resource allocation failure or bridge registration failure. This ensures that the bind function fails safely rather than proceeding with a potentially incomplete bridge setup. Fixes: b726970486d8 ("drm/bridge: synopsys: dw-dp: add bridge before attaching") Signed-off-by: Chen Ni Reviewed-by: Andy Yan Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260206040621.4095517-1-nichen@iscas.ac.cn Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/synopsys/dw-dp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c index 432342452484..07f7a2e0d9f2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-dp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c @@ -2049,7 +2049,9 @@ struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, bridge->type = DRM_MODE_CONNECTOR_DisplayPort; bridge->ycbcr_420_allowed = true; - devm_drm_bridge_add(dev, bridge); + ret = devm_drm_bridge_add(dev, bridge); + if (ret) + return ERR_PTR(ret); dp->aux.dev = dev; dp->aux.drm_dev = encoder->dev; From 803ec1faf7c1823e6e3b1f2aaa81be18528c9436 Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Mon, 9 Feb 2026 19:41:14 +0100 Subject: [PATCH 080/828] drm/bridge: samsung-dsim: Fix memory leak in error path In samsung_dsim_host_attach(), drm_bridge_add() is called to add the bridge. However, if samsung_dsim_register_te_irq() or pdata->host_ops->attach() fails afterwards, the function returns without removing the bridge, causing a memory leak. Fix this by adding proper error handling with goto labels to ensure drm_bridge_remove() is called in all error paths. Also ensure that samsung_dsim_unregister_te_irq() is called if the attach operation fails after the TE IRQ has been registered. samsung_dsim_unregister_te_irq() function is moved without changes to be before samsung_dsim_host_attach() to avoid forward declaration. Fixes: e7447128ca4a ("drm: bridge: Generalize Exynos-DSI driver into a Samsung DSIM bridge") Cc: stable@vger.kernel.org Signed-off-by: Osama Abdelkader Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260209184115.10937-1-osama.abdelkader@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/samsung-dsim.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index eabc4c32f6ab..ad8c6aa49d48 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1881,6 +1881,14 @@ static int samsung_dsim_register_te_irq(struct samsung_dsim *dsi, struct device return 0; } +static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi) +{ + if (dsi->te_gpio) { + free_irq(gpiod_to_irq(dsi->te_gpio), dsi); + gpiod_put(dsi->te_gpio); + } +} + static int samsung_dsim_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { @@ -1955,13 +1963,13 @@ of_find_panel_or_bridge: if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) { ret = samsung_dsim_register_te_irq(dsi, &device->dev); if (ret) - return ret; + goto err_remove_bridge; } if (pdata->host_ops && pdata->host_ops->attach) { ret = pdata->host_ops->attach(dsi, device); if (ret) - return ret; + goto err_unregister_te_irq; } dsi->lanes = device->lanes; @@ -1969,14 +1977,13 @@ of_find_panel_or_bridge: dsi->mode_flags = device->mode_flags; return 0; -} -static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi) -{ - if (dsi->te_gpio) { - free_irq(gpiod_to_irq(dsi->te_gpio), dsi); - gpiod_put(dsi->te_gpio); - } +err_unregister_te_irq: + if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) + samsung_dsim_unregister_te_irq(dsi); +err_remove_bridge: + drm_bridge_remove(&dsi->bridge); + return ret; } static int samsung_dsim_host_detach(struct mipi_dsi_host *host, From 0d195d3b205ca90db30d70d09d7bb6909aac178f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Fri, 20 Feb 2026 12:39:37 +0100 Subject: [PATCH 081/828] drbd: fix null-pointer dereference on local read error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In drbd_request_endio(), READ_COMPLETED_WITH_ERROR is passed to __req_mod() with a NULL peer_device: __req_mod(req, what, NULL, &m); The READ_COMPLETED_WITH_ERROR handler then unconditionally passes this NULL peer_device to drbd_set_out_of_sync(), which dereferences it, causing a null-pointer dereference. Fix this by obtaining the peer_device via first_peer_device(device), matching how drbd_req_destroy() handles the same situation. Cc: stable@vger.kernel.org Reported-by: Tuo Li Link: https://lore.kernel.org/linux-block/20260104165355.151864-1-islituo@gmail.com Signed-off-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d15826f6ee81..70f75ef07945 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -621,7 +621,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); + drbd_set_out_of_sync(first_peer_device(device), + req->i.sector, req->i.size); drbd_report_io_error(device, req); __drbd_chk_io_error(device, DRBD_READ_ERROR); fallthrough; From 9b8eeccd7110d10f9c1b1aa456f5efac23e4e383 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Thu, 19 Feb 2026 11:24:00 -0800 Subject: [PATCH 082/828] MAINTAINERS: update enic and usnic maintainers Remove Christian Benvenuti from the enic/usnic maintainer lists because he is no longer working on the drivers and the email address is no longer valid. Keep Satish Kharat as the enic maintainer and the usnic co-maintainer. Signed-off-by: Satish Kharat Acked-by: Leon Romanovsky Link: https://patch.msgid.link/20260219-enic-maintainers-update-v2-v1-1-c58aa11c2ea8@cisco.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b8d8a5c41597..c00b5f5d4203 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6219,14 +6219,13 @@ S: Supported F: drivers/scsi/snic/ CISCO VIC ETHERNET NIC DRIVER -M: Christian Benvenuti M: Satish Kharat S: Maintained F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER -M: Christian Benvenuti M: Nelson Escobar +M: Satish Kharat S: Supported F: drivers/infiniband/hw/usnic/ From 2bb995e6155cb4f254574598cbd6fe1dcc99766a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 18 Feb 2026 16:56:00 -0800 Subject: [PATCH 083/828] net: phy: qcom: qca807x: normalize return value of gpio_get The GPIO get callback is expected to return 0 or 1 (or a negative error code). Ensure that the value returned by qca807x_gpio_get() is normalized to the [0, 1] range. Fixes: 86ef402d805d ("gpiolib: sanitize the return value of gpio_chip::get()") Signed-off-by: Dmitry Torokhov Reviewed-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Link: https://patch.msgid.link/aZZeyr2ysqqk2GqA@google.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qcom/qca807x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index d8f1ce5a7128..6004da5741af 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -375,7 +375,7 @@ static int qca807x_gpio_get(struct gpio_chip *gc, unsigned int offset) reg = QCA807X_MMD7_LED_FORCE_CTRL(offset); val = phy_read_mmd(priv->phy, MDIO_MMD_AN, reg); - return FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val); + return !!FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val); } static int qca807x_gpio_set(struct gpio_chip *gc, unsigned int offset, int value) From 594163ea88a03bdb412063af50fc7177ef3cbeae Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 19 Feb 2026 12:38:50 +0100 Subject: [PATCH 084/828] net: ethernet: xscale: Check for PTP support properly In ixp4xx_get_ts_info() ixp46x_ptp_find() is called unconditionally despite this feature only existing on ixp46x, leading to the following splat from tcpdump: root@OpenWrt:~# tcpdump -vv -X -i eth0 (...) Unable to handle kernel NULL pointer dereference at virtual address 00000238 when read (...) Call trace: ptp_clock_index from ixp46x_ptp_find+0x1c/0x38 ixp46x_ptp_find from ixp4xx_get_ts_info+0x4c/0x64 ixp4xx_get_ts_info from __ethtool_get_ts_info+0x90/0x108 __ethtool_get_ts_info from __dev_ethtool+0xa00/0x2648 __dev_ethtool from dev_ethtool+0x160/0x234 dev_ethtool from dev_ioctl+0x2cc/0x460 dev_ioctl from sock_ioctl+0x1ec/0x524 sock_ioctl from sys_ioctl+0x51c/0xa94 sys_ioctl from ret_fast_syscall+0x0/0x44 (...) Segmentation fault Check for ixp46x in ixp46x_ptp_find() before trying to set up PTP to avoid this. To avoid altering the returned error code from ixp4xx_hwtstamp_set() which before this patch was -EOPNOTSUPP, we return -EOPNOTSUPP from ixp4xx_hwtstamp_set() if ixp46x_ptp_find() fails no matter the error code. The helper function ixp46x_ptp_find() helper returns -ENODEV. Fixes: 9055a2f59162 ("ixp4xx_eth: make ptp support a platform driver") Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20260219-ixp4xx-fix-ethernet-v3-1-f235ccc3cd46@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 5 +---- drivers/net/ethernet/xscale/ptp_ixp46x.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index e1e7f65553e7..b0faa0f1780d 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -403,15 +403,12 @@ static int ixp4xx_hwtstamp_set(struct net_device *netdev, int ret; int ch; - if (!cpu_is_ixp46x()) - return -EOPNOTSUPP; - if (!netif_running(netdev)) return -EINVAL; ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index); if (ret) - return ret; + return -EOPNOTSUPP; ch = PORT2CHANNEL(port); regs = port->timesync_regs; diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 94203eb46e6b..93c64db22a69 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -232,6 +232,9 @@ static struct ixp_clock ixp_clock; int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index) { + if (!cpu_is_ixp46x()) + return -ENODEV; + *regs = ixp_clock.regs; *phc_index = ptp_clock_index(ixp_clock.ptp_clock); From 470c7ca2b4c3e3a51feeb952b7f97a775b5c49cd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 19 Feb 2026 17:31:31 +0000 Subject: [PATCH 085/828] udplite: Fix null-ptr-deref in __udp_enqueue_schedule_skb(). syzbot reported null-ptr-deref of udp_sk(sk)->udp_prod_queue. [0] Since the cited commit, udp_lib_init_sock() can fail, as can udp_init_sock() and udpv6_init_sock(). Let's handle the error in udplite_sk_init() and udplitev6_sk_init(). [0]: BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:82 [inline] BUG: KASAN: null-ptr-deref in atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline] BUG: KASAN: null-ptr-deref in __udp_enqueue_schedule_skb+0x151/0x1480 net/ipv4/udp.c:1719 Read of size 4 at addr 0000000000000008 by task syz.2.18/2944 CPU: 1 UID: 0 PID: 2944 Comm: syz.2.18 Not tainted syzkaller #0 PREEMPTLAZY Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 kasan_report+0xa2/0xe0 mm/kasan/report.c:595 check_region_inline mm/kasan/generic.c:-1 [inline] kasan_check_range+0x264/0x2c0 mm/kasan/generic.c:200 instrument_atomic_read include/linux/instrumented.h:82 [inline] atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline] __udp_enqueue_schedule_skb+0x151/0x1480 net/ipv4/udp.c:1719 __udpv6_queue_rcv_skb net/ipv6/udp.c:795 [inline] udpv6_queue_rcv_one_skb+0xa2e/0x1ad0 net/ipv6/udp.c:906 udp6_unicast_rcv_skb+0x227/0x380 net/ipv6/udp.c:1064 ip6_protocol_deliver_rcu+0xe17/0x1540 net/ipv6/ip6_input.c:438 ip6_input_finish+0x191/0x350 net/ipv6/ip6_input.c:489 NF_HOOK+0x354/0x3f0 include/linux/netfilter.h:318 ip6_input+0x16c/0x2b0 net/ipv6/ip6_input.c:500 NF_HOOK+0x354/0x3f0 include/linux/netfilter.h:318 __netif_receive_skb_one_core net/core/dev.c:6149 [inline] __netif_receive_skb+0xd3/0x370 net/core/dev.c:6262 process_backlog+0x4d6/0x1160 net/core/dev.c:6614 __napi_poll+0xae/0x320 net/core/dev.c:7678 napi_poll net/core/dev.c:7741 [inline] net_rx_action+0x60d/0xdc0 net/core/dev.c:7893 handle_softirqs+0x209/0x8d0 kernel/softirq.c:622 do_softirq+0x52/0x90 kernel/softirq.c:523 __local_bh_enable_ip+0xe7/0x120 kernel/softirq.c:450 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:924 [inline] __dev_queue_xmit+0x109c/0x2dc0 net/core/dev.c:4856 __ip6_finish_output net/ipv6/ip6_output.c:-1 [inline] ip6_finish_output+0x158/0x4e0 net/ipv6/ip6_output.c:219 NF_HOOK_COND include/linux/netfilter.h:307 [inline] ip6_output+0x342/0x580 net/ipv6/ip6_output.c:246 ip6_send_skb+0x1d7/0x3c0 net/ipv6/ip6_output.c:1984 udp_v6_send_skb+0x9a5/0x1770 net/ipv6/udp.c:1442 udp_v6_push_pending_frames+0xa2/0x140 net/ipv6/udp.c:1469 udpv6_sendmsg+0xfe0/0x2830 net/ipv6/udp.c:1759 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg+0xe5/0x270 net/socket.c:742 __sys_sendto+0x3eb/0x580 net/socket.c:2206 __do_sys_sendto net/socket.c:2213 [inline] __se_sys_sendto net/socket.c:2209 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2209 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd2/0xf20 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f67b4d9c629 Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f67b5c98028 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f67b5015fa0 RCX: 00007f67b4d9c629 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f67b4e32b39 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000040000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f67b5016038 R14: 00007f67b5015fa0 R15: 00007ffe3cb66dd8 Fixes: b650bf0977d3 ("udp: remove busylock and add per NUMA queues") Reported-by: syzbot Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260219173142.310741-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/udplite.c | 3 +-- net/ipv6/udplite.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d3e621a11a1a..826e9e79eb19 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -20,10 +20,9 @@ EXPORT_SYMBOL(udplite_table); /* Designate sk as UDP-Lite socket */ static int udplite_sk_init(struct sock *sk) { - udp_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); - return 0; + return udp_init_sock(sk); } static int udplite_rcv(struct sk_buff *skb) diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 2cec542437f7..e867721cda4d 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -16,10 +16,9 @@ static int udplitev6_sk_init(struct sock *sk) { - udpv6_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); - return 0; + return udpv6_init_sock(sk); } static int udplitev6_rcv(struct sk_buff *skb) From e123d9302d223767bd910bfbcfe607bae909f8ac Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Thu, 19 Feb 2026 10:53:11 -0800 Subject: [PATCH 086/828] bnxt_en: Fix RSS context delete logic We need to free the corresponding RSS context VNIC in FW everytime an RSS context is deleted in driver. Commit 667ac333dbb7 added a check to delete the VNIC in FW only when netif_running() is true to help delete RSS contexts with interface down. Having that condition will make the driver leak VNICs in FW whenever close() happens with active RSS contexts. On the subsequent open(), as part of RSS context restoration, we will end up trying to create extra VNICs for which we did not make any reservation. FW can fail this request, thereby making us lose active RSS contexts. Suppose an RSS context is deleted already and we try to process a delete request again, then the HWRM functions will check for validity of the request and they simply return if the resource is already freed. So, even for delete-when-down cases, netif_running() check is not necessary. Remove the netif_running() condition check when deleting an RSS context. Reported-by: Jakub Kicinski Fixes: 667ac333dbb7 ("eth: bnxt: allow deleting RSS contexts when the device is down") Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260219185313.2682148-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fb45e1dd1dd7..7768d9753e2d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10889,12 +10889,10 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, struct bnxt_ntuple_filter *ntp_fltr; int i; - if (netif_running(bp->dev)) { - bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); - for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { - if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); - } + bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); + for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { + if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); } if (!all) return; From c1bbd9900d65ac65b9fce9f129e3369a04871570 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Thu, 19 Feb 2026 10:53:12 -0800 Subject: [PATCH 087/828] bnxt_en: Fix deleting of Ntuple filters Ntuple filters can be deleted when the interface is down. The current code blindly sends the filter delete command to FW. When the interface is down, all the VNICs are deleted in the FW. When the VNIC is freed in the FW, all the associated filters are also freed. We need not send the free command explicitly. Sending such command will generate FW error in the dmesg. In order to fix this, we can safely return from bnxt_hwrm_cfa_ntuple_filter_free() when BNXT_STATE_OPEN is not true which confirms the VNICs have been deleted. Fixes: 8336a974f37d ("bnxt_en: Save user configured filters in a lookup list") Suggested-by: Michael Chan Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260219185313.2682148-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7768d9753e2d..8c73a723d23f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6240,6 +6240,9 @@ int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, int rc; set_bit(BNXT_FLTR_FW_DELETED, &fltr->base.state); + if (!test_bit(BNXT_STATE_OPEN, &bp->state)) + return 0; + rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_FREE); if (rc) return rc; From ce5a0f4612dbacc805fe16719f9f6bd18352b70d Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Thu, 19 Feb 2026 10:53:13 -0800 Subject: [PATCH 088/828] selftests: drv-net: rss_ctx: test RSS contexts persist after ifdown/up Add a test to verify that RSS contexts persist across interface down/up along with their associated Ntuple filters. Another test that creates contexts/rules keeping interface down and test their persistence is also added. Tested on bnxt_en: TAP version 13 1..1 # timeout set to 0 # selftests: drivers/net/hw: rss_ctx.py # TAP version 13 # 1..2 # ok 1 rss_ctx.test_rss_context_persist_create_and_ifdown # ok 2 rss_ctx.test_rss_context_persist_ifdown_and_create # SKIP Create context not supported with interface down # # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:1 error:0 Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260219185313.2682148-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/rss_ctx.py | 100 +++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ed7e405682f0..b9b7527c2c6b 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -4,13 +4,15 @@ import datetime import random import re +import time from lib.py import ksft_run, ksft_pr, ksft_exit from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx +from lib.py import ksft_disruptive from lib.py import rand_port -from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure +from lib.py import cmd, ethtool, ip, defer, GenerateTraffic, CmdExitFailure, wait_file def _rss_key_str(key): @@ -809,6 +811,98 @@ def test_rss_default_context_rule(cfg): 'noise' : (0, 1) }) +@ksft_disruptive +def test_rss_context_persist_ifupdown(cfg, pre_down=False): + """ + Test that RSS contexts and their associated ntuple filters persist across + an interface down/up cycle. + + """ + + require_ntuple(cfg) + + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 6: + try: + ethtool(f"-L {cfg.ifname} combined 6") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + ifup = defer(ip, f"link set dev {cfg.ifname} up") + if pre_down: + ip(f"link set dev {cfg.ifname} down") + + try: + ctx1_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx1_id} delete") + except CmdExitFailure as exc: + raise KsftSkipEx("Create context not supported with interface down") from exc + + ctx2_id = ethtool_create(cfg, "-X", "context new start 4 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx2_id} delete") + + port_ctx2 = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_ctx2} context {ctx2_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + if not pre_down: + ip(f"link set dev {cfg.ifname} down") + ifup.exec() + + wait_file(f"/sys/class/net/{cfg.ifname}/carrier", + lambda x: x.strip() == "1", deadline=20) + + remote_addr = cfg.remote_addr_v[cfg.addr_ipver] + for _ in range(10): + if cmd(f"ping -c 1 -W 1 {remote_addr}", fail=False).ret == 0: + break + time.sleep(1) + else: + raise KsftSkipEx("Cannot reach remote host after interface up") + + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + + data1 = [c for c in ctxs if c.get('context') == ctx1_id] + ksft_eq(len(data1), 1, f"Context {ctx1_id} should persist after ifup") + + data2 = [c for c in ctxs if c.get('context') == ctx2_id] + ksft_eq(len(data2), 1, f"Context {ctx2_id} should persist after ifup") + + _ntuple_rule_check(cfg, ntuple_id, ctx2_id) + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + main_traffic = sum(cnts[0:2]) + ksft_ge(main_traffic, 18000, f"Main context traffic distribution: {cnts}") + ksft_lt(sum(cnts[2:6]), 500, f"Other context queues should be mostly empty: {cnts}") + + _send_traffic_check(cfg, port_ctx2, f"context {ctx2_id}", + {'target': (4, 5), + 'noise': (0, 1), + 'empty': (2, 3)}) + + +def test_rss_context_persist_create_and_ifdown(cfg): + """ + Create RSS contexts then cycle the interface down and up. + """ + test_rss_context_persist_ifupdown(cfg, pre_down=False) + + +def test_rss_context_persist_ifdown_and_create(cfg): + """ + Bring interface down first, then create RSS contexts and bring up. + """ + test_rss_context_persist_ifupdown(cfg, pre_down=True) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.context_cnt = None @@ -823,7 +917,9 @@ def main() -> None: test_rss_context_out_of_order, test_rss_context4_create_with_cfg, test_flow_add_context_missing, test_delete_rss_context_busy, test_rss_ntuple_addition, - test_rss_default_context_rule], + test_rss_default_context_rule, + test_rss_context_persist_create_and_ifdown, + test_rss_context_persist_ifdown_and_create], args=(cfg, )) ksft_exit() From d4f687fbbce45b5e88438e89b5e26c0c15847992 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 18 Feb 2026 21:08:26 +0100 Subject: [PATCH 089/828] ovpn: tcp - fix packet extraction from stream When processing TCP stream data in ovpn_tcp_recv, we receive large cloned skbs from __strp_rcv that may contain multiple coalesced packets. The current implementation has two bugs: 1. Header offset overflow: Using pskb_pull with large offsets on coalesced skbs causes skb->data - skb->head to exceed the u16 storage of skb->network_header. This causes skb_reset_network_header to fail on the inner decapsulated packet, resulting in packet drops. 2. Unaligned protocol headers: Extracting packets from arbitrary positions within the coalesced TCP stream provides no alignment guarantees for the packet data causing performance penalties on architectures without efficient unaligned access. Additionally, openvpn's 2-byte length prefix on TCP packets causes the subsequent 4-byte opcode and packet ID fields to be inherently misaligned. Fix both issues by allocating a new skb for each openvpn packet and using skb_copy_bits to extract only the packet content into the new buffer, skipping the 2-byte length prefix. Also, check the length before invoking the function that performs the allocation to avoid creating an invalid skb. If the packet has to be forwarded to userspace the 2-byte prefix can be pushed to the head safely, without misalignment. As a side effect, this approach also avoids the expensive linearization that pskb_pull triggers on cloned skbs with page fragments. In testing, this resulted in TCP throughput improvements of up to 74%. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/ovpn/tcp.c | 57 ++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index ec2bbc28c196..5499c1572f3e 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -70,37 +70,56 @@ static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, peer->tcp.sk_cb.sk_data_ready(sk); } +static struct sk_buff *ovpn_tcp_skb_packet(const struct ovpn_peer *peer, + struct sk_buff *orig_skb, + const int pkt_len, const int pkt_off) +{ + struct sk_buff *ovpn_skb; + int err; + + /* create a new skb with only the content of the current packet */ + ovpn_skb = netdev_alloc_skb(peer->ovpn->dev, pkt_len); + if (unlikely(!ovpn_skb)) + goto err; + + skb_copy_header(ovpn_skb, orig_skb); + err = skb_copy_bits(orig_skb, pkt_off, skb_put(ovpn_skb, pkt_len), + pkt_len); + if (unlikely(err)) { + net_warn_ratelimited("%s: skb_copy_bits failed for peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + kfree_skb(ovpn_skb); + goto err; + } + + consume_skb(orig_skb); + return ovpn_skb; +err: + kfree_skb(orig_skb); + return NULL; +} + static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) { struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); struct strp_msg *msg = strp_msg(skb); - size_t pkt_len = msg->full_len - 2; - size_t off = msg->offset + 2; + int pkt_len = msg->full_len - 2; u8 opcode; - /* ensure skb->data points to the beginning of the openvpn packet */ - if (!pskb_pull(skb, off)) { - net_warn_ratelimited("%s: packet too small for peer %u\n", - netdev_name(peer->ovpn->dev), peer->id); - goto err; - } - - /* strparser does not trim the skb for us, therefore we do it now */ - if (pskb_trim(skb, pkt_len) != 0) { - net_warn_ratelimited("%s: trimming skb failed for peer %u\n", - netdev_name(peer->ovpn->dev), peer->id); - goto err; - } - - /* we need the first 4 bytes of data to be accessible + /* we need at least 4 bytes of data in the packet * to extract the opcode and the key ID later on */ - if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) { + if (unlikely(pkt_len < OVPN_OPCODE_SIZE)) { net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", netdev_name(peer->ovpn->dev), peer->id); goto err; } + /* extract the packet into a new skb */ + skb = ovpn_tcp_skb_packet(peer, skb, pkt_len, msg->offset + 2); + if (unlikely(!skb)) + goto err; + /* DATA_V2 packets are handled in kernel, the rest goes to user space */ opcode = ovpn_opcode_from_skb(skb, 0); if (unlikely(opcode != OVPN_DATA_V2)) { @@ -113,7 +132,7 @@ static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) /* The packet size header must be there when sending the packet * to userspace, therefore we put it back */ - skb_push(skb, 2); + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(pkt_len); ovpn_tcp_to_userspace(peer, strp->sk, skb); return; } From 97d5c8f5c09a604c4873c8348f58de3cea69a7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Paku=C5=82a?= Date: Wed, 4 Feb 2026 22:44:55 +0100 Subject: [PATCH 090/828] HID: pidff: Fix condition effect bit clearing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported by MPDarkGuy on discord, NULL pointer dereferences were happening because not all the conditional effects bits were cleared. Properly clear all conditional effect bits from ffbit Fixes: 7f3d7bc0df4b ("HID: pidff: Better quirk assigment when searching for fields") Cc: stable@vger.kernel.org # 6.18.x Signed-off-by: Tomasz Pakuła Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-pidff.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c index a4e700b40ba9..56d6af39ba81 100644 --- a/drivers/hid/usbhid/hid-pidff.c +++ b/drivers/hid/usbhid/hid-pidff.c @@ -1452,10 +1452,13 @@ static int pidff_init_fields(struct pidff_device *pidff, struct input_dev *dev) hid_warn(pidff->hid, "unknown ramp effect layout\n"); if (PIDFF_FIND_FIELDS(set_condition, PID_SET_CONDITION, 1)) { - if (test_and_clear_bit(FF_SPRING, dev->ffbit) || - test_and_clear_bit(FF_DAMPER, dev->ffbit) || - test_and_clear_bit(FF_FRICTION, dev->ffbit) || - test_and_clear_bit(FF_INERTIA, dev->ffbit)) + bool test = false; + + test |= test_and_clear_bit(FF_SPRING, dev->ffbit); + test |= test_and_clear_bit(FF_DAMPER, dev->ffbit); + test |= test_and_clear_bit(FF_FRICTION, dev->ffbit); + test |= test_and_clear_bit(FF_INERTIA, dev->ffbit); + if (test) hid_warn(pidff->hid, "unknown condition effect layout\n"); } From cfc83a3c71517b59c1047db57da31e26a9dc2f33 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Feb 2026 11:20:29 +0100 Subject: [PATCH 091/828] batman-adv: Avoid double-rtnl_lock ELP metric worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit batadv_v_elp_get_throughput() might be called when the RTNL lock is already held. This could be problematic when the work queue item is cancelled via cancel_delayed_work_sync() in batadv_v_elp_iface_disable(). In this case, an rtnl_lock() would cause a deadlock. To avoid this, rtnl_trylock() was used in this function to skip the retrieval of the ethtool information in case the RTNL lock was already held. But for cfg80211 interfaces, batadv_get_real_netdev() was called - which also uses rtnl_lock(). The approach for __ethtool_get_link_ksettings() must also be used instead and the lockless version __batadv_get_real_netdev() has to be called. Cc: stable@vger.kernel.org Fixes: 8c8ecc98f5c6 ("batman-adv: Drop unmanaged ELP metric worker") Reported-by: Christian Schmidbauer Signed-off-by: Sven Eckelmann Tested-by: Sören Skaarup Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 10 +++++++++- net/batman-adv/hard-interface.c | 8 ++++---- net/batman-adv/hard-interface.h | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index cb16c1ed2a58..fe832093d421 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -111,7 +111,15 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, /* unsupported WiFi driver version */ goto default_throughput; - real_netdev = batadv_get_real_netdev(hard_iface->net_dev); + /* only use rtnl_trylock because the elp worker will be cancelled while + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise + * wait forever when the elp work_item was started and it is then also + * trying to rtnl_lock + */ + if (!rtnl_trylock()) + return false; + real_netdev = __batadv_get_real_netdev(hard_iface->net_dev); + rtnl_unlock(); if (!real_netdev) goto default_throughput; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 5113f879736b..1c488049d554 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -204,7 +204,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev) } /** - * batadv_get_real_netdevice() - check if the given netdev struct is a virtual + * __batadv_get_real_netdev() - check if the given netdev struct is a virtual * interface on top of another 'real' interface * @netdev: the device to check * @@ -214,7 +214,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev) * Return: the 'real' net device or the original net device and NULL in case * of an error. */ -static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) +struct net_device *__batadv_get_real_netdev(struct net_device *netdev) { struct batadv_hard_iface *hard_iface = NULL; struct net_device *real_netdev = NULL; @@ -267,7 +267,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device) struct net_device *real_netdev; rtnl_lock(); - real_netdev = batadv_get_real_netdevice(net_device); + real_netdev = __batadv_get_real_netdev(net_device); rtnl_unlock(); return real_netdev; @@ -336,7 +336,7 @@ static u32 batadv_wifi_flags_evaluate(struct net_device *net_device) if (batadv_is_cfg80211_netdev(net_device)) wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; - real_netdev = batadv_get_real_netdevice(net_device); + real_netdev = __batadv_get_real_netdev(net_device); if (!real_netdev) return wifi_flags; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 9db8a310961e..9ba8fb2bdceb 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -67,6 +67,7 @@ enum batadv_hard_if_bcast { extern struct notifier_block batadv_hard_if_notifier; +struct net_device *__batadv_get_real_netdev(struct net_device *net_device); struct net_device *batadv_get_real_netdev(struct net_device *net_device); bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface); bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface); From 08f97454b7fa39bfcf82524955c771d2d693d6fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 22 Feb 2026 13:35:13 +0000 Subject: [PATCH 092/828] KVM: arm64: Fix protected mode handling of pages larger than 4kB Since 3669ddd8fa8b5 ("KVM: arm64: Add a range to pkvm_mappings"), pKVM tracks the memory that has been mapped into a guest in a side data structure. Crucially, it uses it to find out whether a page has already been mapped, and therefore refuses to map it twice. So far, so good. However, this very patch completely breaks non-4kB page support, with guests being unable to boot. The most obvious symptom is that we take the same fault repeatedly, and not making forward progress. A quick investigation shows that this is because of the above rejection code. As it turns out, there are multiple issues at play: - while the HPFAR_EL2 register gives you the faulting IPA minus the bottom 12 bits, it will still give you the extra bits that are part of the page offset for anything larger than 4kB, even for a level-3 mapping - pkvm_pgtable_stage2_map() assumes that the address passed as a parameter is aligned to the size of the intended mapping - the faulting address is only aligned for a non-page mapping When the planets are suitably aligned (pun intended), the guest faults on a page by accessing it past the bottom 4kB, and extra bits get set in the HPFAR_EL2 register. If this results in a page mapping (which is likely with large granule sizes), nothing aligns it further down, and pkvm_mapping_iter_first() finds an intersection that doesn't really exist. We assume this is a spurious fault and return -EAGAIN. And again... This doesn't hit outside of the protected code, as the page table code always aligns the IPA down to a page boundary, hiding the issue for everyone else. Fix it by always forcing the alignment on vma_pagesize, irrespective of the value of vma_pagesize. Fixes: 3669ddd8fa8b5 ("KVM: arm64: Add a range to pkvm_mappings") Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://https://patch.msgid.link/20260222141000.3084258-1-maz@kernel.org Cc: stable@vger.kernel.org --- arch/arm64/kvm/mmu.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8c5d259810b2..3952415c4f83 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1753,14 +1753,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } /* - * Both the canonical IPA and fault IPA must be hugepage-aligned to - * ensure we find the right PFN and lay down the mapping in the right - * place. + * Both the canonical IPA and fault IPA must be aligned to the + * mapping size to ensure we find the right PFN and lay down the + * mapping in the right place. */ - if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) { - fault_ipa &= ~(vma_pagesize - 1); - ipa &= ~(vma_pagesize - 1); - } + fault_ipa = ALIGN_DOWN(fault_ipa, vma_pagesize); + ipa = ALIGN_DOWN(ipa, vma_pagesize); gfn = ipa >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); From 663c28469d3274d6456f206a6671c91493d85ff1 Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Sat, 21 Feb 2026 01:59:44 -0300 Subject: [PATCH 093/828] smb: client: fix cifs_pick_channel when channels are equally loaded cifs_pick_channel uses (start % chan_count) when channels are equally loaded, but that can return a channel that failed the eligibility checks. Drop the fallback and return the scan-selected channel instead. If none is eligible, keep the existing behavior of using the primary channel. Signed-off-by: Henrique Carvalho Acked-by: Paulo Alcantara (Red Hat) Acked-by: Meetakshi Setiya Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/transport.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 75697f6d2566..05f8099047e1 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -807,16 +807,21 @@ cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) } /* - * Return a channel (master if none) of @ses that can be used to send - * regular requests. + * cifs_pick_channel - pick an eligible channel for network operations * - * If we are currently binding a new channel (negprot/sess.setup), - * return the new incomplete channel. + * @ses: session reference + * + * Select an eligible channel (not terminating and not marked as needing + * reconnect), preferring the least loaded one. If no eligible channel is + * found, fall back to the primary channel (index 0). + * + * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is + * NULL. */ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) { uint index = 0; - unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; + unsigned int min_in_flight = UINT_MAX; struct TCP_Server_Info *server = NULL; int i, start, cur; @@ -846,14 +851,8 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) min_in_flight = server->in_flight; index = cur; } - if (server->in_flight > max_in_flight) - max_in_flight = server->in_flight; } - /* if all channels are equally loaded, fall back to round-robin */ - if (min_in_flight == max_in_flight) - index = (uint)start % ses->chan_count; - server = ses->chans[index].server; spin_unlock(&ses->chan_lock); From 2692c614f8f05929d692b3dbfd3faef1f00fbaf0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Feb 2026 14:58:22 +0100 Subject: [PATCH 094/828] device property: Allow secondary lookup in fwnode_get_next_child_node() When device_get_child_node_count() got split to the fwnode and device respective APIs, the fwnode didn't inherit the ability to traverse over the secondary fwnode. Hence any user, that switches from device to fwnode API misses this feature. In particular, this was revealed by the commit 1490cbb9dbfd ("device property: Split fwnode_get_child_node_count()") that effectively broke the GPIO enumeration on Intel Galileo boards. Fix this by moving the secondary lookup from device to fwnode API. Note, in general no device_*() API should go into the depth of the fwnode implementation. Fixes: 114dbb4fa7c4 ("drivers property: When no children in primary, try secondary") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Sakari Ailus Link: https://patch.msgid.link/20260210135822.47335-1-andriy.shevchenko@linux.intel.com Signed-off-by: Danilo Krummrich --- drivers/base/property.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 6a63860579dd..8d9a34be57fb 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -797,7 +797,18 @@ struct fwnode_handle * fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { - return fwnode_call_ptr_op(fwnode, get_next_child_node, child); + struct fwnode_handle *next; + + if (IS_ERR_OR_NULL(fwnode)) + return NULL; + + /* Try to find a child in primary fwnode */ + next = fwnode_call_ptr_op(fwnode, get_next_child_node, child); + if (next) + return next; + + /* When no more children in primary, continue with secondary */ + return fwnode_call_ptr_op(fwnode->secondary, get_next_child_node, child); } EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); @@ -841,19 +852,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child) { - const struct fwnode_handle *fwnode = dev_fwnode(dev); - struct fwnode_handle *next; - - if (IS_ERR_OR_NULL(fwnode)) - return NULL; - - /* Try to find a child in primary fwnode */ - next = fwnode_get_next_child_node(fwnode, child); - if (next) - return next; - - /* When no more children in primary, continue with secondary */ - return fwnode_get_next_child_node(fwnode->secondary, child); + return fwnode_get_next_child_node(dev_fwnode(dev), child); } EXPORT_SYMBOL_GPL(device_get_next_child_node); From 5dd69b864911ae3847365e8bafe7854e79fbeecb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 29 Jan 2026 09:51:10 -0800 Subject: [PATCH 095/828] hwmon: (macsmc) Fix regressions in Apple Silicon SMC hwmon driver The recently added macsmc-hwmon driver contained several critical bugs in its sensor population logic and float conversion routines. Specifically: - The voltage sensor population loop used the wrong prefix ("volt-" instead of "voltage-") and incorrectly assigned sensors to the temperature sensor array (hwmon->temp.sensors) instead of the voltage sensor array (hwmon->volt.sensors). This would lead to out-of-bounds memory access or data corruption when both temperature and voltage sensors were present. - The float conversion in macsmc_hwmon_write_f32() had flawed exponent logic for values >= 2^24 and lacked masking for the mantissa, which could lead to incorrect values being written to the SMC. Fix these issues to ensure correct sensor registration and reliable manual fan control. Confirm that the reported overflow in FIELD_PREP is fixed by declaring macsmc_hwmon_write_f32() as __always_inline for a compile test. Fixes: 785205fd8139 ("hwmon: Add Apple Silicon SMC hwmon driver") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/linux-hwmon/20260119195817.GA1035354@ax162/ Cc: James Calligeros Cc: Nathan Chancellor Cc: Neal Gompa Cc: Janne Grunau Signed-off-by: Guenter Roeck Tested-by: Nathan Chancellor # build only Link: https://lore.kernel.org/r/20260129175112.3751907-2-linux@roeck-us.net Reviewed-by: James Calligeros Signed-off-by: Guenter Roeck --- drivers/hwmon/macsmc-hwmon.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/macsmc-hwmon.c b/drivers/hwmon/macsmc-hwmon.c index 1c0bbec7e8eb..40d25c81b443 100644 --- a/drivers/hwmon/macsmc-hwmon.c +++ b/drivers/hwmon/macsmc-hwmon.c @@ -228,25 +228,22 @@ static int macsmc_hwmon_write_f32(struct apple_smc *smc, smc_key key, int value) { u64 val; u32 fval = 0; - int exp = 0, neg; + int exp, neg; + neg = value < 0; val = abs(value); - neg = val != value; if (val) { - int msb = __fls(val) - exp; + exp = __fls(val); - if (msb > 23) { - val >>= msb - FLT_MANT_BIAS; - exp -= msb - FLT_MANT_BIAS; - } else if (msb < 23) { - val <<= FLT_MANT_BIAS - msb; - exp += msb; - } + if (exp > 23) + val >>= exp - 23; + else + val <<= 23 - exp; fval = FIELD_PREP(FLT_SIGN_MASK, neg) | FIELD_PREP(FLT_EXP_MASK, exp + FLT_EXP_BIAS) | - FIELD_PREP(FLT_MANT_MASK, val); + FIELD_PREP(FLT_MANT_MASK, val & FLT_MANT_MASK); } return apple_smc_write_u32(smc, key, fval); @@ -663,8 +660,8 @@ static int macsmc_hwmon_populate_sensors(struct macsmc_hwmon *hwmon, if (!hwmon->volt.sensors) return -ENOMEM; - for_each_child_of_node_with_prefix(hwmon_node, key_node, "volt-") { - sensor = &hwmon->temp.sensors[hwmon->temp.count]; + for_each_child_of_node_with_prefix(hwmon_node, key_node, "voltage-") { + sensor = &hwmon->volt.sensors[hwmon->volt.count]; if (!macsmc_hwmon_create_sensor(hwmon->dev, hwmon->smc, key_node, sensor)) { sensor->attrs = HWMON_I_INPUT; From 579b86f3c26fee97996e68c1cbfb7461711f3de3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 29 Jan 2026 09:51:11 -0800 Subject: [PATCH 096/828] hwmon: (macsmc) Fix overflows, underflows, and sign extension The macsmc-hwmon driver experienced several issues related to value scaling and type conversion: 1. macsmc_hwmon_read_f32_scaled() clipped values to INT_MAX/INT_MIN. On 64-bit systems, hwmon supports long values, so clipping to 32-bit range was premature and caused loss of range for high-power sensors. Changed it to use long and clip to LONG_MAX/LONG_MIN. 2. The overflow check in macsmc_hwmon_read_f32_scaled() used 1UL, which is 32-bit on some platforms. Switched to 1ULL. 3. macsmc_hwmon_read_key() used a u32 temporary variable for f32 values. When assigned to a 64-bit long, negative values were zero-extended instead of sign-extended, resulting in large positive numbers. 4. macsmc_hwmon_read_ioft_scaled() used mult_frac() which could overflow during intermediate multiplication. Switched to mul_u64_u32_div() to handle the 64-bit multiplication safely. 5. ioft values (unsigned 48.16) could overflow long when scaled by 1,000,000. Added explicit clipping to LONG_MAX in the caller. 6. macsmc_hwmon_write_f32() truncated its long argument to int, potentially causing issues for large values. Fix these issues by using appropriate types and helper functions. Fixes: 785205fd8139 ("hwmon: Add Apple Silicon SMC hwmon driver") Cc: James Calligeros Cc: Nathan Chancellor Cc: Neal Gompa Cc: Janne Grunau Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20260129175112.3751907-3-linux@roeck-us.net Reviewed-by: James Calligeros Signed-off-by: Guenter Roeck --- drivers/hwmon/macsmc-hwmon.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/hwmon/macsmc-hwmon.c b/drivers/hwmon/macsmc-hwmon.c index 40d25c81b443..1500ec2cc9f8 100644 --- a/drivers/hwmon/macsmc-hwmon.c +++ b/drivers/hwmon/macsmc-hwmon.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -130,7 +131,7 @@ static int macsmc_hwmon_read_ioft_scaled(struct apple_smc *smc, smc_key key, if (ret < 0) return ret; - *p = mult_frac(val, scale, 65536); + *p = mul_u64_u32_div(val, scale, 65536); return 0; } @@ -140,7 +141,7 @@ static int macsmc_hwmon_read_ioft_scaled(struct apple_smc *smc, smc_key key, * them. */ static int macsmc_hwmon_read_f32_scaled(struct apple_smc *smc, smc_key key, - int *p, int scale) + long *p, int scale) { u32 fval; u64 val; @@ -162,21 +163,21 @@ static int macsmc_hwmon_read_f32_scaled(struct apple_smc *smc, smc_key key, val = 0; else if (exp < 0) val >>= -exp; - else if (exp != 0 && (val & ~((1UL << (64 - exp)) - 1))) /* overflow */ + else if (exp != 0 && (val & ~((1ULL << (64 - exp)) - 1))) /* overflow */ val = U64_MAX; else val <<= exp; if (fval & FLT_SIGN_MASK) { - if (val > (-(s64)INT_MIN)) - *p = INT_MIN; + if (val > (u64)LONG_MAX + 1) + *p = LONG_MIN; else - *p = -val; + *p = -(long)val; } else { - if (val > INT_MAX) - *p = INT_MAX; + if (val > (u64)LONG_MAX) + *p = LONG_MAX; else - *p = val; + *p = (long)val; } return 0; @@ -195,7 +196,7 @@ static int macsmc_hwmon_read_key(struct apple_smc *smc, switch (sensor->info.type_code) { /* 32-bit IEEE 754 float */ case __SMC_KEY('f', 'l', 't', ' '): { - u32 flt_ = 0; + long flt_ = 0; ret = macsmc_hwmon_read_f32_scaled(smc, sensor->macsmc_key, &flt_, scale); @@ -214,7 +215,10 @@ static int macsmc_hwmon_read_key(struct apple_smc *smc, if (ret) return ret; - *val = (long)ioft; + if (ioft > LONG_MAX) + *val = LONG_MAX; + else + *val = (long)ioft; break; } default: @@ -224,7 +228,7 @@ static int macsmc_hwmon_read_key(struct apple_smc *smc, return 0; } -static int macsmc_hwmon_write_f32(struct apple_smc *smc, smc_key key, int value) +static int macsmc_hwmon_write_f32(struct apple_smc *smc, smc_key key, long value) { u64 val; u32 fval = 0; From ac209a7d816d918582d0d6387e93d02679f9c8ed Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 14 Feb 2026 17:03:27 -0800 Subject: [PATCH 097/828] hwmon: (emc1403) correct a malformed email address Add a closing '>' to Kalhan's emaill address. line 60: Kalhan Trisal Link: https://lore.kernel.org/r/20260215010327.1687304-1-rdunlap@infradead.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/emc1403.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/emc1403.rst b/Documentation/hwmon/emc1403.rst index 57f833b1a800..77060d515323 100644 --- a/Documentation/hwmon/emc1403.rst +++ b/Documentation/hwmon/emc1403.rst @@ -57,7 +57,7 @@ Supported chips: - https://ww1.microchip.com/downloads/en/DeviceDoc/EMC1438%20DS%20Rev.%201.0%20(04-29-10).pdf Author: - Kalhan Trisal Description From b7497b5a99f54ab8dcda5b14a308385b2fb03d8d Mon Sep 17 00:00:00 2001 From: Hao Yu Date: Mon, 23 Feb 2026 01:03:31 +0800 Subject: [PATCH 098/828] hwmon: (aht10) Fix initialization commands for AHT20 According to the AHT20 datasheet (updated to V1.0 after the 2023.09 version), the initialization command for AHT20 is 0b10111110 (0xBE). The previous sequence (0xE1) used in earlier versions is no longer compatible with newer AHT20 sensors. Update the initialization command to ensure the sensor is properly initialized. While at it, use binary notation for DHT20_CMD_INIT to match the notation used in the datasheet. Fixes: d2abcb5cc885 ("hwmon: (aht10) Add support for compatible aht20") Signed-off-by: Hao Yu Link: https://lore.kernel.org/r/20260222170332.1616-3-haoyufine@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/aht10.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/aht10.c b/drivers/hwmon/aht10.c index 007befdba977..4ce019d2cc80 100644 --- a/drivers/hwmon/aht10.c +++ b/drivers/hwmon/aht10.c @@ -37,7 +37,9 @@ #define AHT10_CMD_MEAS 0b10101100 #define AHT10_CMD_RST 0b10111010 -#define DHT20_CMD_INIT 0x71 +#define AHT20_CMD_INIT 0b10111110 + +#define DHT20_CMD_INIT 0b01110001 /* * Flags in the answer byte/command @@ -341,7 +343,7 @@ static int aht10_probe(struct i2c_client *client) data->meas_size = AHT20_MEAS_SIZE; data->crc8 = true; crc8_populate_msb(crc8_table, AHT20_CRC8_POLY); - data->init_cmd = AHT10_CMD_INIT; + data->init_cmd = AHT20_CMD_INIT; break; case dht20: data->meas_size = AHT20_MEAS_SIZE; From be704107e79696e855aa41e901a926039b6d2410 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 19 Feb 2026 16:55:30 -0600 Subject: [PATCH 099/828] regulator: dt-bindings: mt6359: make regulator names unique Update the example devicetree with unique regulator names for all regulators. This reflects the same change made to the actual .dtsi file. Signed-off-by: David Lechner Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260219-mtk-mt6359-fix-regulator-names-v1-2-ee0fcebfe1d9@baylibre.com Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/mt6359-regulator.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml index d6b3b5a5c0b3..fe4ac9350ba0 100644 --- a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml @@ -287,7 +287,7 @@ examples: regulator-max-microvolt = <1700000>; }; mt6359_vrfck_1_ldo_reg: ldo_vrfck_1 { - regulator-name = "vrfck"; + regulator-name = "vrfck_1"; regulator-min-microvolt = <1240000>; regulator-max-microvolt = <1600000>; }; @@ -309,7 +309,7 @@ examples: regulator-max-microvolt = <3300000>; }; mt6359_vemc_1_ldo_reg: ldo_vemc_1 { - regulator-name = "vemc"; + regulator-name = "vemc_1"; regulator-min-microvolt = <2500000>; regulator-max-microvolt = <3300000>; }; From bfcaf4e8ae9b4f52b97fb04ce03be3a01d41cdd0 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 16 Feb 2026 14:14:33 +0100 Subject: [PATCH 100/828] rust: io: macro_export io_define_read!() and io_define_write!() Currently, the define_read!() and define_write!() I/O macros are crate public. The only user outside of the I/O module is PCI (for the configurations space I/O backend). Consequently, when CONFIG_PCI=n this causes a compile time warning [1]. In order to fix this, rename the macros to io_define_read!() and io_define_write!() and use #[macro_export] to export them. This is better than making the crate public visibility conditional, as eventually subsystems will have their own crate. Also, I/O backends are valid to be implemented by drivers as well. For instance, there are devices (such as GPUs) that run firmware which allows to program other devices only accessible through the primary device through indirect I/O. Since the macros are now public, also add the corresponding documentation. Fixes: 121d87b28e1d ("rust: io: separate generic I/O helpers from MMIO implementation") Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/driver-core/CANiq72khOYkt6t5zwMvSiyZvWWHMZuNCMERXu=7K=_5tT-8Pgg@mail.gmail.com/ [1] Reviewed-by: Alice Ryhl Reviewed-by: Daniel Almeida Link: https://patch.msgid.link/20260216131534.65008-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 131 ++++++++++++++++++++++++++++-------------- rust/kernel/pci/io.rs | 24 ++++---- 2 files changed, 101 insertions(+), 54 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index c1cca7b438c3..e5fba6bf6db0 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -139,9 +139,9 @@ pub struct Mmio(MmioRaw); /// Internal helper macros used to invoke C MMIO read functions. /// -/// This macro is intended to be used by higher-level MMIO access macros (define_read) and provides -/// a unified expansion for infallible vs. fallible read semantics. It emits a direct call into the -/// corresponding C helper and performs the required cast to the Rust return type. +/// This macro is intended to be used by higher-level MMIO access macros (io_define_read) and +/// provides a unified expansion for infallible vs. fallible read semantics. It emits a direct call +/// into the corresponding C helper and performs the required cast to the Rust return type. /// /// # Parameters /// @@ -166,9 +166,9 @@ macro_rules! call_mmio_read { /// Internal helper macros used to invoke C MMIO write functions. /// -/// This macro is intended to be used by higher-level MMIO access macros (define_write) and provides -/// a unified expansion for infallible vs. fallible write semantics. It emits a direct call into the -/// corresponding C helper and performs the required cast to the Rust return type. +/// This macro is intended to be used by higher-level MMIO access macros (io_define_write) and +/// provides a unified expansion for infallible vs. fallible write semantics. It emits a direct call +/// into the corresponding C helper and performs the required cast to the Rust return type. /// /// # Parameters /// @@ -193,7 +193,30 @@ macro_rules! call_mmio_write { }}; } -macro_rules! define_read { +/// Generates an accessor method for reading from an I/O backend. +/// +/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked +/// (infallible) or runtime bounds-checked (fallible) read methods. It abstracts the address +/// calculation and bounds checking, and delegates the actual I/O read operation to a specified +/// helper macro, making it generic over different I/O backends. +/// +/// # Parameters +/// +/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on +/// `IoKnownSize` for compile-time checks and returns the value directly. `fallible` performs +/// runtime checks against `maxsize()` and returns a `Result`. +/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., +/// `#[cfg(CONFIG_64BIT)]` or inline directives). +/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). +/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `read32`, +/// `try_read8`). +/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call +/// (e.g., `call_mmio_read`). +/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the +/// `$call_macro`. +/// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`). +#[macro_export] +macro_rules! io_define_read { (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) -> $type_name:ty) => { /// Read IO data from a given offset known at compile time. @@ -226,9 +249,33 @@ macro_rules! define_read { } }; } -pub(crate) use define_read; +pub use io_define_read; -macro_rules! define_write { +/// Generates an accessor method for writing to an I/O backend. +/// +/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked +/// (infallible) or runtime bounds-checked (fallible) write methods. It abstracts the address +/// calculation and bounds checking, and delegates the actual I/O write operation to a specified +/// helper macro, making it generic over different I/O backends. +/// +/// # Parameters +/// +/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on +/// `IoKnownSize` for compile-time checks and returns `()`. `fallible` performs runtime checks +/// against `maxsize()` and returns a `Result`. +/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., +/// `#[cfg(CONFIG_64BIT)]` or inline directives). +/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). +/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `write32`, +/// `try_write8`). +/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call +/// (e.g., `call_mmio_write`). +/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the +/// `$call_macro`. +/// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use +/// of `<-` before the type to denote a write operation. +#[macro_export] +macro_rules! io_define_write { (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <- $type_name:ty) => { /// Write IO data from a given offset known at compile time. @@ -259,7 +306,7 @@ macro_rules! define_write { } }; } -pub(crate) use define_write; +pub use io_define_write; /// Checks whether an access of type `U` at the given `offset` /// is valid within this region. @@ -509,40 +556,40 @@ impl Io for Mmio { self.0.maxsize() } - define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); - define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); - define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); - define_read!( + io_define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); + io_define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); + io_define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); + io_define_read!( fallible, #[cfg(CONFIG_64BIT)] try_read64, call_mmio_read(readq) -> u64 ); - define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); - define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); - define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); - define_write!( + io_define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); + io_define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); + io_define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); + io_define_write!( fallible, #[cfg(CONFIG_64BIT)] try_write64, call_mmio_write(writeq) <- u64 ); - define_read!(infallible, read8, call_mmio_read(readb) -> u8); - define_read!(infallible, read16, call_mmio_read(readw) -> u16); - define_read!(infallible, read32, call_mmio_read(readl) -> u32); - define_read!( + io_define_read!(infallible, read8, call_mmio_read(readb) -> u8); + io_define_read!(infallible, read16, call_mmio_read(readw) -> u16); + io_define_read!(infallible, read32, call_mmio_read(readl) -> u32); + io_define_read!( infallible, #[cfg(CONFIG_64BIT)] read64, call_mmio_read(readq) -> u64 ); - define_write!(infallible, write8, call_mmio_write(writeb) <- u8); - define_write!(infallible, write16, call_mmio_write(writew) <- u16); - define_write!(infallible, write32, call_mmio_write(writel) <- u32); - define_write!( + io_define_write!(infallible, write8, call_mmio_write(writeb) <- u8); + io_define_write!(infallible, write16, call_mmio_write(writew) <- u16); + io_define_write!(infallible, write32, call_mmio_write(writel) <- u32); + io_define_write!( infallible, #[cfg(CONFIG_64BIT)] write64, @@ -566,40 +613,40 @@ impl Mmio { unsafe { &*core::ptr::from_ref(raw).cast() } } - define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - define_read!( + io_define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); + io_define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); + io_define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); + io_define_read!( infallible, #[cfg(CONFIG_64BIT)] pub read64_relaxed, call_mmio_read(readq_relaxed) -> u64 ); - define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - define_read!( + io_define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); + io_define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); + io_define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); + io_define_read!( fallible, #[cfg(CONFIG_64BIT)] pub try_read64_relaxed, call_mmio_read(readq_relaxed) -> u64 ); - define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - define_write!( + io_define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); + io_define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); + io_define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); + io_define_write!( infallible, #[cfg(CONFIG_64BIT)] pub write64_relaxed, call_mmio_write(writeq_relaxed) <- u64 ); - define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - define_write!( + io_define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); + io_define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); + io_define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); + io_define_write!( fallible, #[cfg(CONFIG_64BIT)] pub try_write64_relaxed, diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs index 6ca4cf75594c..fb6edab2aea7 100644 --- a/rust/kernel/pci/io.rs +++ b/rust/kernel/pci/io.rs @@ -8,8 +8,8 @@ use crate::{ device, devres::Devres, io::{ - define_read, - define_write, + io_define_read, + io_define_write, Io, IoCapable, IoKnownSize, @@ -88,7 +88,7 @@ pub struct ConfigSpace<'a, S: ConfigSpaceKind = Extended> { /// Internal helper macros used to invoke C PCI configuration space read functions. /// /// This macro is intended to be used by higher-level PCI configuration space access macros -/// (define_read) and provides a unified expansion for infallible vs. fallible read semantics. It +/// (io_define_read) and provides a unified expansion for infallible vs. fallible read semantics. It /// emits a direct call into the corresponding C helper and performs the required cast to the Rust /// return type. /// @@ -117,9 +117,9 @@ macro_rules! call_config_read { /// Internal helper macros used to invoke C PCI configuration space write functions. /// /// This macro is intended to be used by higher-level PCI configuration space access macros -/// (define_write) and provides a unified expansion for infallible vs. fallible read semantics. It -/// emits a direct call into the corresponding C helper and performs the required cast to the Rust -/// return type. +/// (io_define_write) and provides a unified expansion for infallible vs. fallible read semantics. +/// It emits a direct call into the corresponding C helper and performs the required cast to the +/// Rust return type. /// /// # Parameters /// @@ -163,13 +163,13 @@ impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> { // PCI configuration space does not support fallible operations. // The default implementations from the Io trait are not used. - define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); - define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); - define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); + io_define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); + io_define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); + io_define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); - define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); - define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); - define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); + io_define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); + io_define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); + io_define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); } impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> { From c5794709bc9105935dbedef8b9cf9c06f2b559fa Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Feb 2026 20:28:29 -0800 Subject: [PATCH 101/828] ksmbd: Compare MACs in constant time To prevent timing attacks, MAC comparisons need to be constant-time. Replace the memcmp() with the correct function, crypto_memneq(). Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/Kconfig | 1 + fs/smb/server/auth.c | 4 +++- fs/smb/server/smb2pdu.c | 5 +++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig index 2775162c535c..12594879cb64 100644 --- a/fs/smb/server/Kconfig +++ b/fs/smb/server/Kconfig @@ -13,6 +13,7 @@ config SMB_SERVER select CRYPTO_LIB_MD5 select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 + select CRYPTO_LIB_UTILS select CRYPTO_CMAC select CRYPTO_AEAD2 select CRYPTO_CCM diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 580c4d303dc3..5fe8c667c6b1 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -165,7 +166,8 @@ int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE, sess->sess_key); - if (memcmp(ntlmv2->ntlmv2_hash, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE) != 0) + if (crypto_memneq(ntlmv2->ntlmv2_hash, ntlmv2_rsp, + CIFS_HMAC_MD5_HASH_SIZE)) return -EINVAL; return 0; } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 95901a78951c..743c629fe7ec 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4,6 +4,7 @@ * Copyright (C) 2018 Samsung Electronics Co., Ltd. */ +#include #include #include #include @@ -8880,7 +8881,7 @@ int smb2_check_sign_req(struct ksmbd_work *work) ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, 1, signature); - if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) { + if (crypto_memneq(signature, signature_req, SMB2_SIGNATURE_SIZE)) { pr_err("bad smb2 signature\n"); return 0; } @@ -8968,7 +8969,7 @@ int smb3_check_sign_req(struct ksmbd_work *work) if (ksmbd_sign_smb3_pdu(conn, signing_key, iov, 1, signature)) return 0; - if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) { + if (crypto_memneq(signature, signature_req, SMB2_SIGNATURE_SIZE)) { pr_err("bad smb2 signature\n"); return 0; } From 6b4f875aac344cdd52a1f34cc70ed2f874a65757 Mon Sep 17 00:00:00 2001 From: Nicholas Carlini Date: Thu, 19 Feb 2026 20:58:57 +0900 Subject: [PATCH 102/828] ksmbd: fix signededness bug in smb_direct_prepare_negotiation() smb_direct_prepare_negotiation() casts an unsigned __u32 value from sp->max_recv_size and req->preferred_send_size to a signed int before computing min_t(int, ...). A maliciously provided preferred_send_size of 0x80000000 will return as smaller than max_recv_size, and then be used to set the maximum allowed alowed receive size for the next message. By sending a second message with a large value (>1420 bytes) the attacker can then achieve a heap buffer overflow. This fix replaces min_t(int, ...) with min_t(u32) Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Nicholas Carlini Reviewed-by: Stefan Metzmacher Acked-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 7c53b78b818e..188572491d53 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2540,9 +2540,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t) goto put; req = (struct smbdirect_negotiate_req *)recvmsg->packet; - sp->max_recv_size = min_t(int, sp->max_recv_size, + sp->max_recv_size = min_t(u32, sp->max_recv_size, le32_to_cpu(req->preferred_send_size)); - sp->max_send_size = min_t(int, sp->max_send_size, + sp->max_send_size = min_t(u32, sp->max_send_size, le32_to_cpu(req->max_receive_size)); sp->max_fragmented_send_size = le32_to_cpu(req->max_fragmented_size); From 47322c469d4a63ac45b705ca83680671ff71c975 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 9 Feb 2026 16:38:05 +0100 Subject: [PATCH 103/828] dma-mapping: avoid random addr value print out on error path dma_addr is unitialized in dma_direct_map_phys() when swiotlb is forced and DMA_ATTR_MMIO is set which leads to random value print out in warning. Fix that by just returning DMA_MAPPING_ERROR. Fixes: e53d29f957b3 ("dma-mapping: convert dma_direct_*map_page to be phys_addr_t based") Signed-off-by: Jiri Pirko Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260209153809.250835-2-jiri@resnulli.us --- kernel/dma/direct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index f476c63b668c..e89f175e9c2d 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -85,7 +85,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, if (is_swiotlb_force_bounce(dev)) { if (attrs & DMA_ATTR_MMIO) - goto err_overflow; + return DMA_MAPPING_ERROR; return swiotlb_map(dev, phys, size, dir, attrs); } From d5b5e8149af0f5efed58653cbebf1cb3258ce49a Mon Sep 17 00:00:00 2001 From: Stian Halseth Date: Wed, 18 Feb 2026 13:00:24 +0100 Subject: [PATCH 104/828] sparc: Fix page alignment in dma mapping 'phys' may include an offset within the page, while previously used 'base_paddr' was already page-aligned. This caused incorrect DMA mapping in dma_4u_map_phys and dma_4v_map_phys. Fix both functions by masking 'phys' with IO_PAGE_MASK, covering both generic SPARC code and sun4v. Fixes: 38c0d0ebf520 ("sparc: Use physical address DMA mapping") Reported-by: Stian Halseth Closes: https://github.com/sparclinux/issues/issues/75 Suggested-by: Marek Szyprowski Signed-off-by: Stian Halseth Tested-by: Nathaniel Roach Tested-by: Han Gao # on SPARC Enterprise T5220 [mszyprow: adjusted commit description a bit] Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260218120056.3366-2-stian@itx.no --- arch/sparc/kernel/iommu.c | 2 ++ arch/sparc/kernel/pci_sun4v.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 46ef88bc9c26..7613ab0ffb89 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -312,6 +312,8 @@ static dma_addr_t dma_4u_map_phys(struct device *dev, phys_addr_t phys, if (direction != DMA_TO_DEVICE) iopte_protection |= IOPTE_WRITE; + phys &= IO_PAGE_MASK; + for (i = 0; i < npages; i++, base++, phys += IO_PAGE_SIZE) iopte_val(*base) = iopte_protection | phys; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 440284cc804e..61f14b4c8f90 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -410,6 +410,8 @@ static dma_addr_t dma_4v_map_phys(struct device *dev, phys_addr_t phys, iommu_batch_start(dev, prot, entry); + phys &= IO_PAGE_MASK; + for (i = 0; i < npages; i++, phys += IO_PAGE_SIZE) { long err = iommu_batch_add(phys, mask); if (unlikely(err < 0L)) From c8d7f21ead727485ebf965e2b4d42d4a4f0840f6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Feb 2026 19:12:20 +0100 Subject: [PATCH 105/828] wifi: cfg80211: wext: fix IGTK key ID off-by-one The IGTK key ID must be 4 or 5, but the code checks against key ID + 1, so must check against 5/6 rather than 4/5. Fix that. Reported-by: Jouni Malinen Fixes: 08645126dd24 ("cfg80211: implement wext key handling") Link: https://patch.msgid.link/20260209181220.362205-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 1241fda78a68..680500fa57cf 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -684,7 +684,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, idx = erq->flags & IW_ENCODE_INDEX; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - if (idx < 4 || idx > 5) { + if (idx < 5 || idx > 6) { idx = wdev->wext.default_mgmt_key; if (idx < 0) return -EINVAL; From 767d23ade706d5fa51c36168e92a9c5533c351a1 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Wed, 11 Feb 2026 11:20:24 +0300 Subject: [PATCH 106/828] wifi: cfg80211: cancel rfkill_block work in wiphy_unregister() There is a use-after-free error in cfg80211_shutdown_all_interfaces found by syzkaller: BUG: KASAN: use-after-free in cfg80211_shutdown_all_interfaces+0x213/0x220 Read of size 8 at addr ffff888112a78d98 by task kworker/0:5/5326 CPU: 0 UID: 0 PID: 5326 Comm: kworker/0:5 Not tainted 6.19.0-rc2 #2 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: events cfg80211_rfkill_block_work Call Trace: dump_stack_lvl+0x116/0x1f0 print_report+0xcd/0x630 kasan_report+0xe0/0x110 cfg80211_shutdown_all_interfaces+0x213/0x220 cfg80211_rfkill_block_work+0x1e/0x30 process_one_work+0x9cf/0x1b70 worker_thread+0x6c8/0xf10 kthread+0x3c5/0x780 ret_from_fork+0x56d/0x700 ret_from_fork_asm+0x1a/0x30 The problem arises due to the rfkill_block work is not cancelled when wiphy is being unregistered. In order to fix the issue cancel the corresponding work in wiphy_unregister(). Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 1f87f7d3a3b4 ("cfg80211: add rfkill support") Cc: stable@vger.kernel.org Signed-off-by: Daniil Dulov Link: https://patch.msgid.link/20260211082024.1967588-1-d.dulov@aladdin.ru Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 9af85d655027..d35cf04cbc81 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1212,6 +1212,7 @@ void wiphy_unregister(struct wiphy *wiphy) /* this has nothing to do now but make sure it's gone */ cancel_work_sync(&rdev->wiphy_work); + cancel_work_sync(&rdev->rfkill_block); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); From c854758abe0b8d86f9c43dc060ff56a0ee5b31e0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Feb 2026 13:05:26 +0100 Subject: [PATCH 107/828] wifi: radiotap: reject radiotap with unknown bits The radiotap parser is currently only used with the radiotap namespace (not with vendor namespaces), but if the undefined field 18 is used, the alignment/size is unknown as well. In this case, iterator->_next_ns_data isn't initialized (it's only set for skipping vendor namespaces), and syzbot points out that we later compare against this uninitialized value. Fix this by moving the rejection of unknown radiotap fields down to after the in-namespace lookup, so it will really use iterator->_next_ns_data only for vendor namespaces, even in case undefined fields are present. Cc: stable@vger.kernel.org Fixes: 33e5a2f776e3 ("wireless: update radiotap parser") Reported-by: syzbot+b09c1af8764c0097bb19@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/69944a91.a70a0220.2c38d7.00fc.GAE@google.com Link: https://patch.msgid.link/20260217120526.162647-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/wireless/radiotap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 326faea38ca3..c85eaa583a46 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -239,14 +239,14 @@ int ieee80211_radiotap_iterator_next( default: if (!iterator->current_namespace || iterator->_arg_index >= iterator->current_namespace->n_bits) { - if (iterator->current_namespace == &radiotap_ns) - return -ENOENT; align = 0; } else { align = iterator->current_namespace->align_size[iterator->_arg_index].align; size = iterator->current_namespace->align_size[iterator->_arg_index].size; } if (!align) { + if (iterator->current_namespace == &radiotap_ns) + return -ENOENT; /* skip all subsequent data */ iterator->_arg = iterator->_next_ns_data; /* give up on this namespace */ From 243307a0d1b0d01538e202c00454c28b21d4432e Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 3 Feb 2026 11:21:33 +0100 Subject: [PATCH 108/828] wifi: brcmfmac: Fix potential kernel oops when probe fails When probe of the sdio brcmfmac device fails for some reasons (i.e. missing firmware), the sdiodev->bus is set to error instead of NULL, thus the cleanup later in brcmf_sdio_remove() tries to free resources via invalid bus pointer. This happens because sdiodev->bus is set 2 times: first in brcmf_sdio_probe() and second time in brcmf_sdiod_probe(). Fix this by chaning the brcmf_sdio_probe() function to return the error code and set sdio->bus only there. Fixes: 0ff0843310b7 ("wifi: brcmfmac: Add optional lpo clock enable support") Signed-off-by: Marek Szyprowski Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260203102133.1478331-1-m.szyprowski@samsung.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 7 +++---- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 7 ++++--- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 6a3f187320fc..13952dfeb3e3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -951,11 +951,10 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev) goto out; /* try to attach to the target device */ - sdiodev->bus = brcmf_sdio_probe(sdiodev); - if (IS_ERR(sdiodev->bus)) { - ret = PTR_ERR(sdiodev->bus); + ret = brcmf_sdio_probe(sdiodev); + if (ret) goto out; - } + brcmf_sdiod_host_fixup(sdiodev->func2->card->host); out: if (ret) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8cf9d7e7c3f7..4e6ed02c1591 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4445,7 +4445,7 @@ brcmf_sdio_prepare_fw_request(struct brcmf_sdio *bus) return fwreq; } -struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) +int brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) { int ret; struct brcmf_sdio *bus; @@ -4551,11 +4551,12 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) goto fail; } - return bus; + return 0; fail: brcmf_sdio_remove(bus); - return ERR_PTR(ret); + sdiodev->bus = NULL; + return ret; } /* Detach and free everything */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h index 0d18ed15b403..80180d5c6c87 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h @@ -358,7 +358,7 @@ void brcmf_sdiod_freezer_uncount(struct brcmf_sdio_dev *sdiodev); int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev); int brcmf_sdiod_remove(struct brcmf_sdio_dev *sdiodev); -struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev); +int brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev); void brcmf_sdio_remove(struct brcmf_sdio *bus); void brcmf_sdio_isr(struct brcmf_sdio *bus, bool in_isr); From 25723454f67980712234a42caca606b6710fd1e1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 10 Feb 2026 18:03:34 +0800 Subject: [PATCH 109/828] wifi: mwifiex: Fix dev_alloc_name() return value check dev_alloc_name() returns the allocated ID on success, which could be over 0. Fix the return value check to check for negative error codes. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aYmsQfujoAe5qO02@stanley.mountain/ Fixes: 7bab5bdb81e3 ("wifi: mwifiex: Allocate dev name earlier for interface workqueue name") Signed-off-by: Chen-Yu Tsai Reviewed-by: Francesco Dolcini Link: https://patch.msgid.link/20260210100337.1131279-1-wenst@chromium.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a66d18e380fc..bc0a946e8e6e 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3148,7 +3148,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, SET_NETDEV_DEV(dev, adapter->dev); ret = dev_alloc_name(dev, name); - if (ret) + if (ret < 0) goto err_alloc_name; priv->dfs_cac_workqueue = alloc_workqueue("MWIFIEX_DFS_CAC-%s", From 03cc8f90d0537fcd4985c3319b4fafbf2e3fb1f0 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Fri, 6 Feb 2026 14:53:56 -0500 Subject: [PATCH 110/828] wifi: libertas: fix use-after-free in lbs_free_adapter() The lbs_free_adapter() function uses timer_delete() (non-synchronous) for both command_timer and tx_lockup_timer before the structure is freed. This is incorrect because timer_delete() does not wait for any running timer callback to complete. If a timer callback is executing when lbs_free_adapter() is called, the callback will access freed memory since lbs_cfg_free() frees the containing structure immediately after lbs_free_adapter() returns. Both timer callbacks (lbs_cmd_timeout_handler and lbs_tx_lockup_handler) access priv->driver_lock, priv->cur_cmd, priv->dev, and other fields, which would all be use-after-free violations. Use timer_delete_sync() instead to ensure any running timer callback has completed before returning. This bug was introduced in commit 8f641d93c38a ("libertas: detect TX lockups and reset hardware") where del_timer() was used instead of del_timer_sync() in the cleanup path. The command_timer has had the same issue since the driver was first written. Fixes: 8f641d93c38a ("libertas: detect TX lockups and reset hardware") Fixes: 954ee164f4f4 ("[PATCH] libertas: reorganize and simplify init sequence") Cc: stable@vger.kernel.org Signed-off-by: Daniel Hodges Link: https://patch.msgid.link/20260206195356.15647-1-git@danielhodges.dev Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index d44e02c6fe38..dd97f1b61f4d 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -799,8 +799,8 @@ static void lbs_free_adapter(struct lbs_private *priv) { lbs_free_cmd_buffer(priv); kfifo_free(&priv->event_fifo); - timer_delete(&priv->command_timer); - timer_delete(&priv->tx_lockup_timer); + timer_delete_sync(&priv->command_timer); + timer_delete_sync(&priv->tx_lockup_timer); } static const struct net_device_ops lbs_netdev_ops = { From 2259d14499d16b115ef8d5d2ddc867e2be7cb5b5 Mon Sep 17 00:00:00 2001 From: Ramanathan Choodamani Date: Thu, 5 Feb 2026 15:12:16 +0530 Subject: [PATCH 111/828] wifi: mac80211: set default WMM parameters on all links Currently, mac80211 only initializes default WMM parameters on the deflink during do_open(). For MLO cases, this leaves the additional links without proper WMM defaults if hostapd does not supply per-link WMM parameters, leading to inconsistent QoS behavior across links. Set default WMM parameters for each link during ieee80211_vif_update_links(), because this ensures all individual links in an MLD have valid WMM settings during bring-up and behave consistently across different BSS. Signed-off-by: Ramanathan Choodamani Signed-off-by: Aishwarya R Link: https://patch.msgid.link/20260205094216.3093542-1-aishwarya.r@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/link.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 17bf55dabd31..a1f67bab8ba1 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -281,6 +281,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]; struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS]; bool use_deflink = old_links == 0; /* set for error case */ + bool non_sta = sdata->vif.type != NL80211_IFTYPE_STATION; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -337,6 +338,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, link = links[link_id]; ieee80211_link_init(sdata, link_id, &link->data, &link->conf); ieee80211_link_setup(&link->data); + ieee80211_set_wmm_default(&link->data, true, non_sta); } if (new_links == 0) From 1cb3c20688fc8380c9b365d03aea7e84faf6a9fd Mon Sep 17 00:00:00 2001 From: Sean Rhodes Date: Thu, 19 Feb 2026 20:14:26 +0000 Subject: [PATCH 112/828] ALSA: hda/realtek: Fix speaker pop on Star Labs StarFighter On Star Labs StarFighter (Realtek ALC233/235), the internal speakers can emit an audible pop when entering or leaving runtime suspend. Mute the speaker output paths via snd_hda_gen_shutup_speakers() in the Realtek shutup callback before the codec is powered down. This is enough to avoid the pop without special EAPD handling. Test results: - runtime PM pop fixed - still reaches D3 (PCI 0000:00:1f.3 power_state=D3hot) - does not address pops on cold boot (G3 exit) or around display manager start/shutdown journalctl -k (boot): - snd_hda_codec_alc269 hdaudioC0D0: ALC233: picked fixup for PCI SSID 7017:2014 - snd_hda_codec_alc269 hdaudioC0D0: autoconfig for ALC233: line_outs=1 (0x1b/0x0/0x0/0x0/0x0) type:speaker Suggested-by: Takashi Iwai Tested-by: Sean Rhodes Signed-off-by: Sean Rhodes Link: https://patch.msgid.link/4d5fb71b132bb283fd41c622b8413770b2065242.1771532060.git.sean@starlabs.systems Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 36053042ca77..9f64bb97c3f9 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -1017,6 +1017,24 @@ static int alc269_resume(struct hda_codec *codec) return 0; } +#define STARLABS_STARFIGHTER_SHUTUP_DELAY_MS 30 + +static void starlabs_starfighter_shutup(struct hda_codec *codec) +{ + if (snd_hda_gen_shutup_speakers(codec)) + msleep(STARLABS_STARFIGHTER_SHUTUP_DELAY_MS); +} + +static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->shutup = starlabs_starfighter_shutup; +} + static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4040,6 +4058,7 @@ enum { ALC245_FIXUP_CLEVO_NOISY_MIC, ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE, ALC233_FIXUP_MEDION_MTL_SPK, + ALC233_FIXUP_STARLABS_STARFIGHTER, ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, ALC294_FIXUP_ASUS_CS35L41_SPI_2, @@ -6499,6 +6518,10 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC233_FIXUP_STARLABS_STARFIGHTER] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_starlabs_starfighter, + }, [ALC294_FIXUP_BASS_SPEAKER_15] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_bass_speaker_15, @@ -7651,6 +7674,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x2782, 0x4900, "MEDION E15443", ALC233_FIXUP_MEDION_MTL_SPK), + SND_PCI_QUIRK(0x7017, 0x2014, "Star Labs StarFighter", ALC233_FIXUP_STARLABS_STARFIGHTER), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), @@ -7747,6 +7771,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_TPT470_DOCK_FIX, .name = "tpt470-dock-fix"}, {.id = ALC298_FIXUP_TPT470_DOCK, .name = "tpt470-dock"}, {.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, + {.id = ALC233_FIXUP_STARLABS_STARFIGHTER, .name = "starlabs-starfighter"}, {.id = ALC700_FIXUP_INTEL_REFERENCE, .name = "alc700-ref"}, {.id = ALC269_FIXUP_SONY_VAIO, .name = "vaio"}, {.id = ALC269_FIXUP_DELL_M101Z, .name = "dell-m101z"}, From 1d241483368f2fd87fbaba64d6aec6bad3a1e12e Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 20 Feb 2026 21:58:48 +1030 Subject: [PATCH 113/828] ALSA: scarlett2: Fix DSP filter control array handling scarlett2_add_dsp_ctls() was incorrectly storing the precomp and PEQ filter coefficient control pointers into the precomp_flt_switch_ctls and peq_flt_switch_ctls arrays instead of the intended targets precomp_flt_ctls and peq_flt_ctls. Pass NULL instead, as the filter coefficient control pointers are not used, and remove the unused precomp_flt_ctls and peq_flt_ctls arrays from struct scarlett2_data. Additionally, scarlett2_update_filter_values() was reading dsp_input_count * peq_flt_count values for SCARLETT2_CONFIG_PEQ_FLT_SWITCH, but the peq_flt_switch array is indexed only by dsp_input_count (one switch per DSP input, not per filter). Fix the read count. Fixes: b64678eb4e70 ("ALSA: scarlett2: Add DSP controls") Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/86497b71db060677d97c38a6ce5f89bb3b25361b.1771581197.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett2.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 85a0316889d4..ef3150581eab 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -1328,8 +1328,6 @@ struct scarlett2_data { struct snd_kcontrol *mux_ctls[SCARLETT2_MUX_MAX]; struct snd_kcontrol *mix_ctls[SCARLETT2_MIX_MAX]; struct snd_kcontrol *compressor_ctls[SCARLETT2_COMPRESSOR_CTLS_MAX]; - struct snd_kcontrol *precomp_flt_ctls[SCARLETT2_PRECOMP_FLT_CTLS_MAX]; - struct snd_kcontrol *peq_flt_ctls[SCARLETT2_PEQ_FLT_CTLS_MAX]; struct snd_kcontrol *precomp_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX]; struct snd_kcontrol *peq_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX]; struct snd_kcontrol *direct_monitor_ctl; @@ -3447,7 +3445,6 @@ static int scarlett2_update_autogain(struct usb_mixer_interface *mixer) private->autogain_status[i] = private->num_autogain_status_texts - 1; - for (i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++) if (scarlett2_has_config_item(private, scarlett2_ag_target_configs[i])) { @@ -5372,8 +5369,7 @@ static int scarlett2_update_filter_values(struct usb_mixer_interface *mixer) err = scarlett2_usb_get_config( mixer, SCARLETT2_CONFIG_PEQ_FLT_SWITCH, - info->dsp_input_count * info->peq_flt_count, - private->peq_flt_switch); + info->dsp_input_count, private->peq_flt_switch); if (err < 0) return err; @@ -6546,7 +6542,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i) err = scarlett2_add_new_ctl( mixer, &scarlett2_precomp_flt_ctl, i * info->precomp_flt_count + j, - 1, s, &private->precomp_flt_switch_ctls[j]); + 1, s, NULL); if (err < 0) return err; } @@ -6556,7 +6552,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i) err = scarlett2_add_new_ctl( mixer, &scarlett2_peq_flt_ctl, i * info->peq_flt_count + j, - 1, s, &private->peq_flt_switch_ctls[j]); + 1, s, NULL); if (err < 0) return err; } From cbddd303416456db5ceeedaf9e262096f079e861 Mon Sep 17 00:00:00 2001 From: Panagiotis Foliadis Date: Sat, 21 Feb 2026 19:40:58 +0000 Subject: [PATCH 114/828] ALSA: hda/realtek: Add quirk for Acer Aspire V3-572G The Acer Aspire V3-572G has a combo jack (ALC283) but the BIOS sets pin 0x19 to 0x411111f0 (not connected), so the headset mic is not detected. Add a quirk to override pin 0x19 as a headset mic and enable headset mode. Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221075 Suggested-by: Charalampos Mitrodimas Signed-off-by: Panagiotis Foliadis Reviewed-by: Charalampos Mitrodimas Link: https://patch.msgid.link/20260221-fix-detect-mic-v1-1-b6e427b5275d@posteo.net Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 9f64bb97c3f9..bba173645b1f 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6614,6 +6614,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x0943, "Acer Aspire V3-572G", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), From 43a44fb7f2fa163926b23149805e989ba2395db1 Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Sun, 22 Feb 2026 21:26:08 +0900 Subject: [PATCH 115/828] ALSA: hda/realtek: fix model name typo for Samsung Galaxy Book Flex (NT950QCG-X716) There's no product named "Samsung Galaxy Flex Book". Use the correct "Samsung Galaxy Book Flex" name. Link: https://www.samsung.com/sec/support/model/NT950QCG-X716 Link: https://www.samsung.com/us/computing/galaxy-books/galaxy-book-flex/galaxy-book-flex-15-6-qled-512gb-storage-s-pen-included-np950qcg-k01us Cc: Signed-off-by: Juhyung Park Link: https://patch.msgid.link/20260222122609.281191-1-qkrwngud825@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index bba173645b1f..7166cbeb0925 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7335,7 +7335,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), - SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Book Flex (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a4, "Samsung Galaxy Book Pro 360 (NT935QBD)", ALC298_FIXUP_SAMSUNG_AMP), From 9fb16a5c5ff93058851099a2b80a899b0c53fe3f Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Sun, 22 Feb 2026 21:26:09 +0900 Subject: [PATCH 116/828] ALSA: hda/realtek: add quirk for Samsung Galaxy Book Flex (NT950QCT-A38A) Similar to other Samsung laptops, NT950QCT also requires the ALC298_FIXUP_SAMSUNG_AMP quirk applied. Cc: Signed-off-by: Juhyung Park Link: https://patch.msgid.link/20260222122609.281191-2-qkrwngud825@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 7166cbeb0925..43ecfc63ef87 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7335,6 +7335,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc188, "Samsung Galaxy Book Flex (NT950QCT-A38A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Book Flex (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), From 24d2d3c5f94007a5a0554065ab7349bb69e28bcb Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sat, 21 Feb 2026 02:33:45 +1030 Subject: [PATCH 117/828] ALSA: usb-audio: Improve Focusrite sample rate filtering Replace the bLength == 10 max_rate check in focusrite_valid_sample_rate() with filtering that also examines the bmControls VAL_ALT_SETTINGS bit. When VAL_ALT_SETTINGS is readable, the device uses strict per-altsetting rate filtering (only the highest rate pair for that altsetting is valid). When it is not readable, all rates up to max_rate are valid. For devices without the bLength == 10 Format Type descriptor extension but with VAL_ALT_SETTINGS readable and multiple altsettings (only seen in Scarlett 18i8 3rd Gen playback), fall back to the Focusrite convention: alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz. This produces correct rate tables for all tested Focusrite devices (all Scarlett 2nd, 3rd, and 4th Gen, Clarett+, and Vocaster) using only USB descriptors, allowing QUIRK_FLAG_VALIDATE_RATES to be removed for Focusrite in the next commit. Signed-off-by: Geoffrey D. Bennett Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/7e18c1f393a6ecb6fc75dd867a2c4dbe135e3e22.1771594828.git.g@b4.vu --- sound/usb/format.c | 70 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 64cfe4a9d8cd..1207c507882a 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -305,17 +305,48 @@ static bool s1810c_valid_sample_rate(struct audioformat *fp, } /* - * Many Focusrite devices supports a limited set of sampling rates per - * altsetting. Maximum rate is exposed in the last 4 bytes of Format Type - * descriptor which has a non-standard bLength = 10. + * Focusrite devices use rate pairs: 44100/48000, 88200/96000, and + * 176400/192000. Return true if rate is in the pair for max_rate. + */ +static bool focusrite_rate_pair(unsigned int rate, + unsigned int max_rate) +{ + switch (max_rate) { + case 48000: return rate == 44100 || rate == 48000; + case 96000: return rate == 88200 || rate == 96000; + case 192000: return rate == 176400 || rate == 192000; + default: return true; + } +} + +/* + * Focusrite devices report all supported rates in a single clock + * source but only a subset is valid per altsetting. + * + * Detection uses two descriptor features: + * + * 1. Format Type descriptor bLength == 10: non-standard extension + * with max sample rate in bytes 6..9. + * + * 2. bmControls VAL_ALT_SETTINGS readable bit: when set, the device + * only supports the highest rate pair for that altsetting, and when + * clear, all rates up to max_rate are valid. + * + * For devices without the bLength == 10 extension but with + * VAL_ALT_SETTINGS readable and multiple altsettings (only seen in + * Scarlett 18i8 3rd Gen playback), fall back to the Focusrite + * convention: alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz. */ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { + struct usb_interface *iface; struct usb_host_interface *alts; + struct uac2_as_header_descriptor *as; unsigned char *fmt; unsigned int max_rate; + bool val_alt; alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); if (!alts) @@ -326,9 +357,21 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, if (!fmt) return true; + as = snd_usb_find_csint_desc(alts->extra, alts->extralen, + NULL, UAC_AS_GENERAL); + if (!as) + return true; + + val_alt = uac_v2v3_control_is_readable(as->bmControls, + UAC2_AS_VAL_ALT_SETTINGS); + if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); + if (val_alt) + return focusrite_rate_pair(rate, max_rate); + + /* No val_alt: rates fall through from higher */ switch (max_rate) { case 192000: if (rate == 176400 || rate == 192000) @@ -344,12 +387,29 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); - return true; } } - return true; + if (!val_alt) + return true; + + /* Multi-altsetting device with val_alt but no max_rate + * in the format descriptor. Use Focusrite convention: + * alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz. + */ + iface = usb_ifnum_to_if(chip->dev, fp->iface); + if (!iface || iface->num_altsetting <= 2) + return true; + + switch (fp->altsetting) { + case 1: max_rate = 48000; break; + case 2: max_rate = 96000; break; + case 3: max_rate = 192000; break; + default: return true; + } + + return focusrite_rate_pair(rate, max_rate); } /* From a8cc55bf81a45772cad44c83ea7bb0e98431094a Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sat, 21 Feb 2026 02:34:48 +1030 Subject: [PATCH 118/828] ALSA: usb-audio: Remove VALIDATE_RATES quirk for Focusrite devices Remove QUIRK_FLAG_VALIDATE_RATES for Focusrite. With the previous commit, focusrite_valid_sample_rate() produces correct rate tables without USB probing. QUIRK_FLAG_VALIDATE_RATES sends SET_CUR requests for each rate (~25ms each) and leaves the device at 192kHz. This is a problem because that rate: 1) disables the internal mixer, so outputs are silent until an application opens the PCM and sets a lower rate, and 2) the Air and Safe modes get disabled. Fixes: 5963e5262180 ("ALSA: usb-audio: Enable rate validation for Scarlett devices") Signed-off-by: Geoffrey D. Bennett Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/09b9c012024c998c4ca14bd876ef0dce0d0b6101.1771594828.git.g@b4.vu --- sound/usb/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4cac0dfb0094..3164c9431d29 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2424,7 +2424,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), VENDOR_FLG(0x1235, /* Focusrite Novation */ - QUIRK_FLAG_VALIDATE_RATES), + 0), VENDOR_FLG(0x1511, /* AURALiC */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x152a, /* Thesycon devices */ From 38c322068a26a01d7ff64da92179e68cdde9860b Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sat, 21 Feb 2026 02:36:35 +1030 Subject: [PATCH 119/828] ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP Add a quirk flag to skip the usb_set_interface(), snd_usb_init_pitch(), and snd_usb_init_sample_rate() calls in __snd_usb_parse_audio_interface(). These are redundant with snd_usb_endpoint_prepare() at stream-open time. Enable the quirk for Focusrite devices, as init_sample_rate(rate_max) sets 192kHz during probing, which disables the internal mixer and Air and Safe modes. Fixes: 16f1f838442d ("Revert "ALSA: usb-audio: Drop superfluous interface setup at parsing"") Signed-off-by: Geoffrey D. Bennett Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/65a7909b15f9feb76c2a6f4f8814c240ddc50737.1771594828.git.g@b4.vu --- sound/usb/quirks.c | 3 ++- sound/usb/stream.c | 3 +++ sound/usb/usbaudio.h | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 3164c9431d29..b13d0f4d25ac 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2424,7 +2424,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), VENDOR_FLG(0x1235, /* Focusrite Novation */ - 0), + QUIRK_FLAG_SKIP_IFACE_SETUP), VENDOR_FLG(0x1511, /* AURALiC */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x152a, /* Thesycon devices */ @@ -2506,6 +2506,7 @@ static const char *const snd_usb_audio_quirk_flag_names[] = { QUIRK_STRING_ENTRY(MIC_RES_384), QUIRK_STRING_ENTRY(MIXER_PLAYBACK_MIN_MUTE), QUIRK_STRING_ENTRY(MIXER_CAPTURE_MIN_MUTE), + QUIRK_STRING_ENTRY(SKIP_IFACE_SETUP), NULL }; diff --git a/sound/usb/stream.c b/sound/usb/stream.c index ac4d92065dd9..d38c39e28f38 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -1259,6 +1259,9 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, set_iface_first = true; /* try to set the interface... */ + if (chip->quirk_flags & QUIRK_FLAG_SKIP_IFACE_SETUP) + continue; + usb_set_interface(chip->dev, iface_no, 0); if (set_iface_first) usb_set_interface(chip->dev, iface_no, altno); diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 79978cae9799..085530cf62d9 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -224,6 +224,10 @@ extern bool snd_usb_skip_validation; * playback value represents muted state instead of minimum audible volume * QUIRK_FLAG_MIXER_CAPTURE_MIN_MUTE * Similar to QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE, but for capture streams + * QUIRK_FLAG_SKIP_IFACE_SETUP + * Skip the probe-time interface setup (usb_set_interface, + * init_pitch, init_sample_rate); redundant with + * snd_usb_endpoint_prepare() at stream-open time */ enum { @@ -253,6 +257,7 @@ enum { QUIRK_TYPE_MIC_RES_384 = 23, QUIRK_TYPE_MIXER_PLAYBACK_MIN_MUTE = 24, QUIRK_TYPE_MIXER_CAPTURE_MIN_MUTE = 25, + QUIRK_TYPE_SKIP_IFACE_SETUP = 26, /* Please also edit snd_usb_audio_quirk_flag_names */ }; @@ -284,5 +289,6 @@ enum { #define QUIRK_FLAG_MIC_RES_384 QUIRK_FLAG(MIC_RES_384) #define QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE QUIRK_FLAG(MIXER_PLAYBACK_MIN_MUTE) #define QUIRK_FLAG_MIXER_CAPTURE_MIN_MUTE QUIRK_FLAG(MIXER_CAPTURE_MIN_MUTE) +#define QUIRK_FLAG_SKIP_IFACE_SETUP QUIRK_FLAG(SKIP_IFACE_SETUP) #endif /* __USBAUDIO_H */ From 0d58273be0b9c3cec3be5488ca37f6ddbaf13cf0 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sat, 21 Feb 2026 02:36:56 +1030 Subject: [PATCH 120/828] ALSA: usb-audio: Skip clock selector for Focusrite devices Add QUIRK_FLAG_SKIP_CLOCK_SELECTOR for Focusrite devices. During interface parsing, snd_usb_clock_find_source() reads the clock selector value then writes it back unchanged. On Focusrite devices this redundant write results in a ~300ms delay per altsetting, adding ~1.8s to probe time on a typical device with 6 altsettings. Enabling SKIP_CLOCK_SELECTOR skips the redundant write-back. Signed-off-by: Geoffrey D. Bennett Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/00e53ae0a508b41516b41833daa17823381a649c.1771594828.git.g@b4.vu --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b13d0f4d25ac..fbceed8e8d36 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2424,6 +2424,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), VENDOR_FLG(0x1235, /* Focusrite Novation */ + QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_SKIP_IFACE_SETUP), VENDOR_FLG(0x1511, /* AURALiC */ QUIRK_FLAG_DSD_RAW), From 1f96b84835eafb3e6f366dc3a66c0e69504cec9d Mon Sep 17 00:00:00 2001 From: Florian Eckert Date: Thu, 5 Feb 2026 13:55:45 +0100 Subject: [PATCH 121/828] pinctrl: equilibrium: rename irq_chip function callbacks Renaming of the irq_chip callback functions to improve clarity. Signed-off-by: Florian Eckert Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-equilibrium.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c index 48b55c5bf8d4..49c8232b525a 100644 --- a/drivers/pinctrl/pinctrl-equilibrium.c +++ b/drivers/pinctrl/pinctrl-equilibrium.c @@ -23,7 +23,7 @@ #define PIN_NAME_LEN 10 #define PAD_REG_OFF 0x100 -static void eqbr_gpio_disable_irq(struct irq_data *d) +static void eqbr_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -36,7 +36,7 @@ static void eqbr_gpio_disable_irq(struct irq_data *d) gpiochip_disable_irq(gc, offset); } -static void eqbr_gpio_enable_irq(struct irq_data *d) +static void eqbr_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -50,7 +50,7 @@ static void eqbr_gpio_enable_irq(struct irq_data *d) raw_spin_unlock_irqrestore(&gctrl->lock, flags); } -static void eqbr_gpio_ack_irq(struct irq_data *d) +static void eqbr_irq_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -62,10 +62,10 @@ static void eqbr_gpio_ack_irq(struct irq_data *d) raw_spin_unlock_irqrestore(&gctrl->lock, flags); } -static void eqbr_gpio_mask_ack_irq(struct irq_data *d) +static void eqbr_irq_mask_ack(struct irq_data *d) { - eqbr_gpio_disable_irq(d); - eqbr_gpio_ack_irq(d); + eqbr_irq_mask(d); + eqbr_irq_ack(d); } static inline void eqbr_cfg_bit(void __iomem *addr, @@ -92,7 +92,7 @@ static int eqbr_irq_type_cfg(struct gpio_irq_type *type, return 0; } -static int eqbr_gpio_set_irq_type(struct irq_data *d, unsigned int type) +static int eqbr_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -166,11 +166,11 @@ static void eqbr_irq_handler(struct irq_desc *desc) static const struct irq_chip eqbr_irq_chip = { .name = "gpio_irq", - .irq_mask = eqbr_gpio_disable_irq, - .irq_unmask = eqbr_gpio_enable_irq, - .irq_ack = eqbr_gpio_ack_irq, - .irq_mask_ack = eqbr_gpio_mask_ack_irq, - .irq_set_type = eqbr_gpio_set_irq_type, + .irq_ack = eqbr_irq_ack, + .irq_mask = eqbr_irq_mask, + .irq_mask_ack = eqbr_irq_mask_ack, + .irq_unmask = eqbr_irq_unmask, + .irq_set_type = eqbr_irq_set_type, .flags = IRQCHIP_IMMUTABLE, GPIOCHIP_IRQ_RESOURCE_HELPERS, }; From 3e00b1b332e54ba50cca6691f628b9c06574024f Mon Sep 17 00:00:00 2001 From: Florian Eckert Date: Thu, 5 Feb 2026 13:55:46 +0100 Subject: [PATCH 122/828] pinctrl: equilibrium: fix warning trace on load The callback functions 'eqbr_irq_mask()' and 'eqbr_irq_ack()' are also called in the callback function 'eqbr_irq_mask_ack()'. This is done to avoid source code duplication. The problem, is that in the function 'eqbr_irq_mask()' also calles the gpiolib function 'gpiochip_disable_irq()' This generates the following warning trace in the log for every gpio on load. [ 6.088111] ------------[ cut here ]------------ [ 6.092440] WARNING: CPU: 3 PID: 1 at drivers/gpio/gpiolib.c:3810 gpiochip_disable_irq+0x39/0x50 [ 6.097847] Modules linked in: [ 6.097847] CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.12.59+ #0 [ 6.097847] Tainted: [W]=WARN [ 6.097847] RIP: 0010:gpiochip_disable_irq+0x39/0x50 [ 6.097847] Code: 39 c6 48 19 c0 21 c6 48 c1 e6 05 48 03 b2 38 03 00 00 48 81 fe 00 f0 ff ff 77 11 48 8b 46 08 f6 c4 02 74 06 f0 80 66 09 fb c3 <0f> 0b 90 0f 1f 40 00 c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 [ 6.097847] RSP: 0000:ffffc9000000b830 EFLAGS: 00010046 [ 6.097847] RAX: 0000000000000045 RBX: ffff888001be02a0 RCX: 0000000000000008 [ 6.097847] RDX: ffff888001be9000 RSI: ffff888001b2dd00 RDI: ffff888001be02a0 [ 6.097847] RBP: ffffc9000000b860 R08: 0000000000000000 R09: 0000000000000000 [ 6.097847] R10: 0000000000000001 R11: ffff888001b2a154 R12: ffff888001be0514 [ 6.097847] R13: ffff888001be02a0 R14: 0000000000000008 R15: 0000000000000000 [ 6.097847] FS: 0000000000000000(0000) GS:ffff888041d80000(0000) knlGS:0000000000000000 [ 6.097847] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.097847] CR2: 0000000000000000 CR3: 0000000003030000 CR4: 00000000001026b0 [ 6.097847] Call Trace: [ 6.097847] [ 6.097847] ? eqbr_irq_mask+0x63/0x70 [ 6.097847] ? no_action+0x10/0x10 [ 6.097847] eqbr_irq_mask_ack+0x11/0x60 In an other driver (drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c) the interrupt is not disabled here. To fix this, do not call the 'eqbr_irq_mask()' and 'eqbr_irq_ack()' function. Implement instead this directly without disabling the interrupts. Fixes: 52066a53bd11 ("pinctrl: equilibrium: Convert to immutable irq_chip") Signed-off-by: Florian Eckert Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-equilibrium.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c index 49c8232b525a..ba1c867b7b89 100644 --- a/drivers/pinctrl/pinctrl-equilibrium.c +++ b/drivers/pinctrl/pinctrl-equilibrium.c @@ -64,8 +64,15 @@ static void eqbr_irq_ack(struct irq_data *d) static void eqbr_irq_mask_ack(struct irq_data *d) { - eqbr_irq_mask(d); - eqbr_irq_ack(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); + unsigned int offset = irqd_to_hwirq(d); + unsigned long flags; + + raw_spin_lock_irqsave(&gctrl->lock, flags); + writel(BIT(offset), gctrl->membase + GPIO_IRNENCLR); + writel(BIT(offset), gctrl->membase + GPIO_IRNCR); + raw_spin_unlock_irqrestore(&gctrl->lock, flags); } static inline void eqbr_cfg_bit(void __iomem *addr, From 09a30b7a035f9f4ac918c8a9af89d70e43462152 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Mon, 9 Feb 2026 09:33:44 +0530 Subject: [PATCH 123/828] pinctrl: qcom: qcs615: Add missing dual edge GPIO IRQ errata flag Wakeup capable GPIOs uses PDC as parent IRQ chip and PDC on qcs615 do not support dual edge IRQs. Add missing wakeirq_dual_edge_errata configuration to enable workaround for dual edge GPIO IRQs. Fixes: b698f36a9d40 ("pinctrl: qcom: add the tlmm driver for QCS615 platform") Signed-off-by: Maulik Shah Reviewed-by: Dmitry Baryshkov Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-qcs615.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 4dfa820d4e77..f1c827ddbfbf 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1067,6 +1067,7 @@ static const struct msm_pinctrl_soc_data qcs615_tlmm = { .ntiles = ARRAY_SIZE(qcs615_tiles), .wakeirq_map = qcs615_pdc_map, .nwakeirq_map = ARRAY_SIZE(qcs615_pdc_map), + .wakeirq_dual_edge_errata = true, }; static const struct of_device_id qcs615_tlmm_of_match[] = { From 32e0a7ad9c841f46549ccac0f1cca347a40d8685 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Fri, 20 Feb 2026 17:34:51 +0800 Subject: [PATCH 124/828] gpio: shared: fix memory leaks On a Snapdragon X1 Elite laptop (Lenovo Yoga Slim 7x), kmemleak reports three sets of: unreferenced object 0xffff00080187f400 (size 1024): comm "swapper/0", pid 1, jiffies 4294667327 hex dump (first 32 bytes): 58 bd 70 01 08 00 ff ff 58 bd 70 01 08 00 ff ff X.p.....X.p..... 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ backtrace (crc 1665d1f8): kmemleak_alloc+0xf4/0x12c __kmalloc_cache_noprof+0x370/0x49c gpio_shared_make_ref+0x70/0x16c gpio_shared_of_traverse+0x4e8/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_init+0x34/0x1c4 do_one_initcall+0x50/0x280 kernel_init_freeable+0x290/0x33c kernel_init+0x28/0x14c ret_from_fork+0x10/0x20 unreferenced object 0xffff00080170c140 (size 8): comm "swapper/0", pid 1, jiffies 4294667327 hex dump (first 8 bytes): 72 65 73 65 74 00 00 00 reset... backtrace (crc fc24536): kmemleak_alloc+0xf4/0x12c __kmalloc_node_track_caller_noprof+0x3c4/0x584 kstrdup+0x4c/0xcc gpio_shared_make_ref+0x8c/0x16c gpio_shared_of_traverse+0x4e8/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_init+0x34/0x1c4 do_one_initcall+0x50/0x280 kernel_init_freeable+0x290/0x33c kernel_init+0x28/0x14c ret_from_fork+0x10/0x20 Fix this by decrementing the reference count of each list entry rather than only the first. Fix verified on the same laptop. Fixes: a060b8c511abb gpiolib: implement low-level, shared GPIO support Signed-off-by: Daniel J Blueman Link: https://patch.msgid.link/20260220093452.101655-1-daniel@quora.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-shared.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c index d2614ace4de1..17a7128b6bd9 100644 --- a/drivers/gpio/gpiolib-shared.c +++ b/drivers/gpio/gpiolib-shared.c @@ -748,14 +748,14 @@ static bool gpio_shared_entry_is_really_shared(struct gpio_shared_entry *entry) static void gpio_shared_free_exclusive(void) { struct gpio_shared_entry *entry, *epos; + struct gpio_shared_ref *ref, *rpos; list_for_each_entry_safe(entry, epos, &gpio_shared_list, list) { if (gpio_shared_entry_is_really_shared(entry)) continue; - gpio_shared_drop_ref(list_first_entry(&entry->refs, - struct gpio_shared_ref, - list)); + list_for_each_entry_safe(ref, rpos, &entry->refs, list) + gpio_shared_drop_ref(ref); gpio_shared_drop_entry(entry); } } From 03c0d030f5874eec6ce22750b2b8751d6d4303b5 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Sat, 14 Feb 2026 03:02:48 +0000 Subject: [PATCH 125/828] erofs: allow sharing page cache with the same aops only Inode with identical data but different @aops cannot be mixed because the page cache is managed by different subsystems (e.g., @aops for compressed on-disk inodes cannot handle plain on-disk inodes). In this patch, we never allow inodes to share the page cache among plain, compressed, and fileio cases. When a shared inode is created, we initialize @aops that is the same as the initial real inode, and subsequent inodes cannot share the page cache if the inferred @aops differ from the corresponding shared inode. This is reasonable as a first step because, in typical use cases, if an inode is compressible, it will fall into compressed inodes across different filesystem images unless users use plain filesystems. However, in that cases, users will use plain filesystems all the time. Fixes: 5ef3208e3be5 ("erofs: introduce the page cache share feature") Signed-off-by: Hongbo Li Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang --- fs/erofs/inode.c | 7 ++++++- fs/erofs/internal.h | 16 +++++++--------- fs/erofs/ishare.c | 14 +++++++++----- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 4f86169c23f1..4b3d21402e10 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -222,6 +222,7 @@ err_out: static int erofs_fill_inode(struct inode *inode) { + const struct address_space_operations *aops; int err; trace_erofs_fill_inode(inode); @@ -254,7 +255,11 @@ static int erofs_fill_inode(struct inode *inode) } mapping_set_large_folios(inode->i_mapping); - return erofs_inode_set_aops(inode, inode, false); + aops = erofs_get_aops(inode, false); + if (IS_ERR(aops)) + return PTR_ERR(aops); + inode->i_mapping->a_ops = aops; + return 0; } /* diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d1634455e389..a4f0a42cf8c3 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -471,26 +471,24 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) return NULL; } -static inline int erofs_inode_set_aops(struct inode *inode, - struct inode *realinode, bool no_fscache) +static inline const struct address_space_operations * +erofs_get_aops(struct inode *realinode, bool no_fscache) { if (erofs_inode_is_data_compressed(EROFS_I(realinode)->datalayout)) { if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP)) - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); DO_ONCE_LITE_IF(realinode->i_blkbits != PAGE_SHIFT, erofs_info, realinode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); - inode->i_mapping->a_ops = &z_erofs_aops; - return 0; + return &z_erofs_aops; } - inode->i_mapping->a_ops = &erofs_aops; if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !no_fscache && erofs_is_fscache_mode(realinode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + return &erofs_fscache_access_aops; if (IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && erofs_is_fileio_mode(EROFS_SB(realinode->i_sb))) - inode->i_mapping->a_ops = &erofs_fileio_aops; - return 0; + return &erofs_fileio_aops; + return &erofs_aops; } int erofs_register_sysfs(struct super_block *sb); diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c index ce980320a8b9..829d50d5c717 100644 --- a/fs/erofs/ishare.c +++ b/fs/erofs/ishare.c @@ -40,10 +40,14 @@ bool erofs_ishare_fill_inode(struct inode *inode) { struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); struct erofs_inode *vi = EROFS_I(inode); + const struct address_space_operations *aops; struct erofs_inode_fingerprint fp; struct inode *sharedinode; unsigned long hash; + aops = erofs_get_aops(inode, true); + if (IS_ERR(aops)) + return false; if (erofs_xattr_fill_inode_fingerprint(&fp, inode, sbi->domain_id)) return false; hash = xxh32(fp.opaque, fp.size, 0); @@ -56,15 +60,15 @@ bool erofs_ishare_fill_inode(struct inode *inode) } if (inode_state_read_once(sharedinode) & I_NEW) { - if (erofs_inode_set_aops(sharedinode, inode, true)) { - iget_failed(sharedinode); - kfree(fp.opaque); - return false; - } + sharedinode->i_mapping->a_ops = aops; sharedinode->i_size = vi->vfs_inode.i_size; unlock_new_inode(sharedinode); } else { kfree(fp.opaque); + if (aops != sharedinode->i_mapping->a_ops) { + iput(sharedinode); + return false; + } if (sharedinode->i_size != vi->vfs_inode.i_size) { _erofs_printk(inode->i_sb, KERN_WARNING "size(%lld:%lld) not matches for the same fingerprint\n", From aa280a08e7d8fae58557acc345b36b3dc329d595 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Tue, 6 Jan 2026 13:15:04 +0000 Subject: [PATCH 126/828] x86/fred: Correct speculative safety in fred_extint() array_index_nospec() is no use if the result gets spilled to the stack, as it makes the believed safe-under-speculation value subject to memory predictions. For all practical purposes, this means array_index_nospec() must be used in the expression that accesses the array. As the code currently stands, it's the wrong side of irqentry_enter(), and 'index' is put into %ebp across the function call. Remove the index variable and reposition array_index_nospec(), so it's calculated immediately before the array access. Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code") Signed-off-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260106131504.679932-1-andrew.cooper3@citrix.com --- arch/x86/entry/entry_fred.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index a9b72997103d..88c757ac8ccd 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -160,8 +160,6 @@ void __init fred_complete_exception_setup(void) static noinstr void fred_extint(struct pt_regs *regs) { unsigned int vector = regs->fred_ss.vector; - unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR, - NR_SYSTEM_VECTORS); if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR)) return; @@ -170,7 +168,8 @@ static noinstr void fred_extint(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); instrumentation_begin(); - sysvec_table[index](regs); + sysvec_table[array_index_nospec(vector - FIRST_SYSTEM_VECTOR, + NR_SYSTEM_VECTORS)](regs); instrumentation_end(); irqentry_exit(regs, state); } else { From a0cb371b521dde44f32cfe954b6ef6f82b407393 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Sat, 10 Jan 2026 11:47:37 +0800 Subject: [PATCH 127/828] x86/bug: Handle __WARN_printf() trap in early_fixup_exception() The commit 5b472b6e5bd9 ("x86_64/bug: Implement __WARN_printf()") implemented __WARN_printf(), which changed the mechanism to use UD1 instead of UD2. However, it only handles the trap in the runtime IDT handler, while the early booting IDT handler lacks this handling. As a result, the usage of WARN() before the runtime IDT setup can lead to kernel crashes. Since KMSAN is enabled after the runtime IDT setup, it is safe to use handle_bug() directly in early_fixup_exception() to address this issue. Fixes: 5b472b6e5bd9 ("x86_64/bug: Implement __WARN_printf()") Signed-off-by: Hou Wenlong Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/c4fb3645f60d3a78629d9870e8fcc8535281c24f.1768016713.git.houwenlong.hwl@antgroup.com --- arch/x86/include/asm/traps.h | 2 ++ arch/x86/kernel/traps.c | 2 +- arch/x86/mm/extable.c | 7 ++----- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 869b88061801..3f24cc472ce9 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -25,6 +25,8 @@ extern int ibt_selftest_noendbr(void); void handle_invalid_op(struct pt_regs *regs); #endif +noinstr bool handle_bug(struct pt_regs *regs); + static inline int get_si_code(unsigned long condition) { if (condition & DR_STEP) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5a6a772e0a6c..4dbff8ef9b1c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -397,7 +397,7 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -static noinstr bool handle_bug(struct pt_regs *regs) +noinstr bool handle_bug(struct pt_regs *regs) { unsigned long addr = regs->ip; bool handled = false; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 2fdc1f1f5adb..6b9ff1c6cafa 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -411,14 +411,11 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) return; if (trapnr == X86_TRAP_UD) { - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { - /* Skip the ud2. */ - regs->ip += LEN_UD2; + if (handle_bug(regs)) return; - } /* - * If this was a BUG and report_bug returns or if this + * If this was a BUG and handle_bug returns or if this * was just a normal #UD, we want to continue onward and * crash. */ From 24c8147abb39618d74fcc36e325765e8fe7bdd7a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Feb 2026 13:59:43 +0100 Subject: [PATCH 128/828] x86/cfi: Fix CFI rewrite for odd alignments Rustam reported his clang builds did not boot properly; turns out his .config has: CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y set. Fix up the FineIBT code to deal with this unusual alignment. Fixes: 931ab63664f0 ("x86/ibt: Implement FineIBT") Reported-by: Rustam Kovhaev Signed-off-by: Peter Zijlstra (Intel) Tested-by: Rustam Kovhaev --- arch/x86/include/asm/cfi.h | 12 ++++++++---- arch/x86/include/asm/linkage.h | 4 ++-- arch/x86/kernel/alternative.c | 29 ++++++++++++++++++++++------- arch/x86/net/bpf_jit_comp.c | 13 ++----------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index c40b9ebc1fb4..ab3fbbd947ed 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -111,6 +111,12 @@ extern bhi_thunk __bhi_args_end[]; struct pt_regs; +#ifdef CONFIG_CALL_PADDING +#define CFI_OFFSET (CONFIG_FUNCTION_PADDING_CFI+5) +#else +#define CFI_OFFSET 5 +#endif + #ifdef CONFIG_CFI enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall @@ -119,11 +125,9 @@ static inline int cfi_get_offset(void) { switch (cfi_mode) { case CFI_FINEIBT: - return 16; + return /* fineibt_prefix_size */ 16; case CFI_KCFI: - if (IS_ENABLED(CONFIG_CALL_PADDING)) - return 16; - return 5; + return CFI_OFFSET; default: return 0; } diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 9d38ae744a2e..a7294656ad90 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -68,7 +68,7 @@ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the * CFI symbol layout changes. * - * Without CALL_THUNKS: + * Without CALL_PADDING: * * .align FUNCTION_ALIGNMENT * __cfi_##name: @@ -77,7 +77,7 @@ * .long __kcfi_typeid_##name * name: * - * With CALL_THUNKS: + * With CALL_PADDING: * * .align FUNCTION_ALIGNMENT * __cfi_##name: diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a888ae0f01fb..e87da25d1236 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1182,7 +1182,7 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) poison_endbr(addr); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_cfi(addr - 16); + poison_cfi(addr - CFI_OFFSET); } } @@ -1389,6 +1389,8 @@ extern u8 fineibt_preamble_end[]; #define fineibt_preamble_ud 0x13 #define fineibt_preamble_hash 5 +#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) + /* * : * 0: b8 78 56 34 12 mov $0x12345678, %eax @@ -1634,7 +1636,7 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) * have determined there are no indirect calls to it and we * don't need no CFI either. */ - if (!is_endbr(addr + 16)) + if (!is_endbr(addr + CFI_OFFSET)) continue; hash = decode_preamble_hash(addr, &arity); @@ -1642,6 +1644,15 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) addr, addr, 5, addr)) return -EINVAL; + /* + * FineIBT relies on being at func-16, so if the preamble is + * actually larger than that, place it the tail end. + * + * NOTE: this is possible with things like DEBUG_CALL_THUNKS + * and DEBUG_FORCE_FUNCTION_ALIGN_64B. + */ + addr += CFI_OFFSET - fineibt_prefix_size; + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); text_poke_early(addr + fineibt_preamble_hash, &hash, 4); @@ -1664,10 +1675,10 @@ static void cfi_rewrite_endbr(s32 *start, s32 *end) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - if (!exact_endbr(addr + 16)) + if (!exact_endbr(addr + CFI_OFFSET)) continue; - poison_endbr(addr + 16); + poison_endbr(addr + CFI_OFFSET); } } @@ -1772,7 +1783,8 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, if (FINEIBT_WARN(fineibt_preamble_size, 20) || FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) || FINEIBT_WARN(fineibt_caller_size, 14) || - FINEIBT_WARN(fineibt_paranoid_size, 20)) + FINEIBT_WARN(fineibt_paranoid_size, 20) || + WARN_ON_ONCE(CFI_OFFSET < fineibt_prefix_size)) return; if (cfi_mode == CFI_AUTO) { @@ -1885,6 +1897,11 @@ static void poison_cfi(void *addr) */ switch (cfi_mode) { case CFI_FINEIBT: + /* + * FineIBT preamble is at func-16. + */ + addr += CFI_OFFSET - fineibt_prefix_size; + /* * FineIBT prefix should start with an ENDBR. */ @@ -1923,8 +1940,6 @@ static void poison_cfi(void *addr) } } -#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) - /* * When regs->ip points to a 0xD6 byte in the FineIBT preamble, * return true and fill out target and type. diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8f10080e6fe3..e9b78040d703 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -438,17 +438,8 @@ static void emit_kcfi(u8 **pprog, u32 hash) EMIT1_off32(0xb8, hash); /* movl $hash, %eax */ #ifdef CONFIG_CALL_PADDING - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); + for (int i = 0; i < CONFIG_FUNCTION_PADDING_CFI; i++) + EMIT1(0x90); #endif EMIT_ENDBR(); From 237dc6a054f6787c2a8f61c59086030267e5e1c5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 18 Dec 2025 19:20:29 +0100 Subject: [PATCH 129/828] x86/headers: Replace __ASSEMBLY__ stragglers with __ASSEMBLER__ After converting the __ASSEMBLY__ statements to __ASSEMBLER__ in commit 24a295e4ef1ca ("x86/headers: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-UAPI headers"), some new code has been added that uses __ASSEMBLY__ again. Convert these stragglers, too. This is a mechanical patch, done with a simple "sed -i" command. Signed-off-by: Thomas Huth Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251218182029.166993-1-thuth@redhat.com --- arch/x86/include/asm/bug.h | 6 +++--- arch/x86/include/asm/irqflags.h | 4 ++-- arch/x86/include/asm/percpu.h | 2 +- arch/x86/include/asm/runtime-const.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 9b4e04690e1a..80c1696d8d59 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -7,7 +7,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct bug_entry; extern void __WARN_trap(struct bug_entry *bug, ...); #endif @@ -137,7 +137,7 @@ do { \ #ifdef HAVE_ARCH_BUG_FORMAT_ARGS -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include DECLARE_STATIC_CALL(WARN_trap, __WARN_trap); @@ -153,7 +153,7 @@ struct arch_va_list { struct sysv_va_list args; }; extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define __WARN_bug_entry(flags, format) ({ \ struct bug_entry *bug; \ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index a1193e9d65f2..462754b0bf8a 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -77,7 +77,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags) #endif #ifndef CONFIG_PARAVIRT -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Used in the idle loop; sti takes one instruction cycle * to complete: @@ -95,7 +95,7 @@ static __always_inline void halt(void) { native_halt(); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_PARAVIRT */ #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index c55058f3d75e..409981468cba 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -20,7 +20,7 @@ #define PER_CPU_VAR(var) __percpu(var)__percpu_rel -#else /* !__ASSEMBLY__: */ +#else /* !__ASSEMBLER__: */ #include #include diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index e5a13dc8816e..4cd94fdcb45e 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -6,7 +6,7 @@ #error "Cannot use runtime-const infrastructure from modules" #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro RUNTIME_CONST_PTR sym reg movq $0x0123456789abcdef, %\reg @@ -16,7 +16,7 @@ .popsection .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define runtime_const_ptr(sym) ({ \ typeof(sym) __ret; \ @@ -74,5 +74,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), } } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif From b3d99f43c72b56cf7a104a364e7fb34b0702828b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Feb 2026 15:28:16 +0100 Subject: [PATCH 130/828] sched/fair: Fix zero_vruntime tracking It turns out that zero_vruntime tracking is broken when there is but a single task running. Current update paths are through __{en,de}queue_entity(), and when there is but a single task, pick_next_task() will always return that one task, and put_prev_set_next_task() will end up in neither function. This can cause entity_key() to grow indefinitely large and cause overflows, leading to much pain and suffering. Furtermore, doing update_zero_vruntime() from __{de,en}queue_entity(), which are called from {set_next,put_prev}_entity() has problems because: - set_next_entity() calls __dequeue_entity() before it does cfs_rq->curr = se. This means the avg_vruntime() will see the removal but not current, missing the entity for accounting. - put_prev_entity() calls __enqueue_entity() before it does cfs_rq->curr = NULL. This means the avg_vruntime() will see the addition *and* current, leading to double accounting. Both cases are incorrect/inconsistent. Noting that avg_vruntime is already called on each {en,de}queue, remove the explicit avg_vruntime() calls (which removes an extra 64bit division for each {en,de}queue) and have avg_vruntime() update zero_vruntime itself. Additionally, have the tick call avg_vruntime() -- discarding the result, but for the side-effect of updating zero_vruntime. While there, optimize avg_vruntime() by noting that the average of one value is rather trivial to compute. Test case: # taskset -c -p 1 $$ # taskset -c 2 bash -c 'while :; do :; done&' # cat /sys/kernel/debug/sched/debug | awk '/^cpu#/ {P=0} /^cpu#2,/ {P=1} {if (P) print $0}' | grep -e zero_vruntime -e "^>" PRE: .zero_vruntime : 31316.407903 >R bash 487 50787.345112 E 50789.145972 2.800000 50780.298364 16 120 0.000000 0.000000 0.000000 / .zero_vruntime : 382548.253179 >R bash 487 427275.204288 E 427276.003584 2.800000 427268.157540 23 120 0.000000 0.000000 0.000000 / POST: .zero_vruntime : 17259.709467 >R bash 526 17259.709467 E 17262.509467 2.800000 16915.031624 9 120 0.000000 0.000000 0.000000 / .zero_vruntime : 18702.723356 >R bash 526 18702.723356 E 18705.523356 2.800000 18358.045513 9 120 0.000000 0.000000 0.000000 / Fixes: 79f3f9bedd14 ("sched/eevdf: Fix min_vruntime vs avg_vruntime") Reported-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Tested-by: K Prateek Nayak Tested-by: Shubhang Kaushik Link: https://patch.msgid.link/20260219080624.438854780%40infradead.org --- kernel/sched/fair.c | 84 ++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eea99ec01a3f..56dddd4fd208 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -589,6 +589,21 @@ static inline bool entity_before(const struct sched_entity *a, return vruntime_cmp(a->deadline, "<", b->deadline); } +/* + * Per avg_vruntime() below, cfs_rq::zero_vruntime is only slightly stale + * and this value should be no more than two lag bounds. Which puts it in the + * general order of: + * + * (slice + TICK_NSEC) << NICE_0_LOAD_SHIFT + * + * which is around 44 bits in size (on 64bit); that is 20 for + * NICE_0_LOAD_SHIFT, another 20 for NSEC_PER_MSEC and then a handful for + * however many msec the actual slice+tick ends up begin. + * + * (disregarding the actual divide-by-weight part makes for the worst case + * weight of 2, which nicely cancels vs the fuzz in zero_vruntime not actually + * being the zero-lag point). + */ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) { return vruntime_op(se->vruntime, "-", cfs_rq->zero_vruntime); @@ -676,39 +691,61 @@ sum_w_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) } static inline -void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) +void update_zero_vruntime(struct cfs_rq *cfs_rq, s64 delta) { /* - * v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight + * v' = v + d ==> sum_w_vruntime' = sum_w_vruntime - d*sum_weight */ cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta; + cfs_rq->zero_vruntime += delta; } /* - * Specifically: avg_runtime() + 0 must result in entity_eligible() := true + * Specifically: avg_vruntime() + 0 must result in entity_eligible() := true * For this to be so, the result of this function must have a left bias. + * + * Called in: + * - place_entity() -- before enqueue + * - update_entity_lag() -- before dequeue + * - entity_tick() + * + * This means it is one entry 'behind' but that puts it close enough to where + * the bound on entity_key() is at most two lag bounds. */ u64 avg_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; - s64 avg = cfs_rq->sum_w_vruntime; - long load = cfs_rq->sum_weight; + long weight = cfs_rq->sum_weight; + s64 delta = 0; - if (curr && curr->on_rq) { - unsigned long weight = scale_load_down(curr->load.weight); + if (curr && !curr->on_rq) + curr = NULL; - avg += entity_key(cfs_rq, curr) * weight; - load += weight; - } + if (weight) { + s64 runtime = cfs_rq->sum_w_vruntime; + + if (curr) { + unsigned long w = scale_load_down(curr->load.weight); + + runtime += entity_key(cfs_rq, curr) * w; + weight += w; + } - if (load) { /* sign flips effective floor / ceiling */ - if (avg < 0) - avg -= (load - 1); - avg = div_s64(avg, load); + if (runtime < 0) + runtime -= (weight - 1); + + delta = div_s64(runtime, weight); + } else if (curr) { + /* + * When there is but one element, it is the average. + */ + delta = curr->vruntime - cfs_rq->zero_vruntime; } - return cfs_rq->zero_vruntime + avg; + update_zero_vruntime(cfs_rq, delta); + + return cfs_rq->zero_vruntime; } /* @@ -777,16 +814,6 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) return vruntime_eligible(cfs_rq, se->vruntime); } -static void update_zero_vruntime(struct cfs_rq *cfs_rq) -{ - u64 vruntime = avg_vruntime(cfs_rq); - s64 delta = vruntime_op(vruntime, "-", cfs_rq->zero_vruntime); - - sum_w_vruntime_update(cfs_rq, delta); - - cfs_rq->zero_vruntime = vruntime; -} - static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq) { struct sched_entity *root = __pick_root_entity(cfs_rq); @@ -856,7 +883,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { sum_w_vruntime_add(cfs_rq, se); - update_zero_vruntime(cfs_rq); se->min_vruntime = se->vruntime; se->min_slice = se->slice; rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, @@ -868,7 +894,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, &min_vruntime_cb); sum_w_vruntime_sub(cfs_rq, se); - update_zero_vruntime(cfs_rq); } struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq) @@ -5524,6 +5549,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) update_load_avg(cfs_rq, curr, UPDATE_TG); update_cfs_group(curr); + /* + * Pulls along cfs_rq::zero_vruntime. + */ + avg_vruntime(cfs_rq); + #ifdef CONFIG_SCHED_HRTICK /* * queued ticks are scheduled to match the slice, so don't bother From bcd74b2ffdd0a2233adbf26b65c62fc69a809c8e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 Jan 2026 16:49:09 +0100 Subject: [PATCH 131/828] sched/fair: Only set slice protection at pick time We should not (re)set slice protection in the sched_change pattern which calls put_prev_task() / set_next_task(). Fixes: 63304558ba5d ("sched/eevdf: Curb wakeup-preemption") Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Tested-by: K Prateek Nayak Tested-by: Shubhang Kaushik Link: https://patch.msgid.link/20260219080624.561421378%40infradead.org --- kernel/sched/fair.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 56dddd4fd208..f2b46c33a8c5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5445,7 +5445,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) } static void -set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool first) { clear_buddies(cfs_rq, se); @@ -5460,7 +5460,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) __dequeue_entity(cfs_rq, se); update_load_avg(cfs_rq, se, UPDATE_TG); - set_protect_slice(cfs_rq, se); + if (first) + set_protect_slice(cfs_rq, se); } update_stats_curr_start(cfs_rq, se); @@ -8978,13 +8979,13 @@ again: pse = parent_entity(pse); } if (se_depth >= pse_depth) { - set_next_entity(cfs_rq_of(se), se); + set_next_entity(cfs_rq_of(se), se, true); se = parent_entity(se); } } put_prev_entity(cfs_rq, pse); - set_next_entity(cfs_rq, se); + set_next_entity(cfs_rq, se, true); __set_next_task_fair(rq, p, true); } @@ -13598,7 +13599,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - set_next_entity(cfs_rq, se); + set_next_entity(cfs_rq, se, first); /* ensure bandwidth has been allocated on our new cfs_rq */ account_cfs_rq_runtime(cfs_rq, 0); } From ff38424030f98976150e42ca35f4b00e6ab8fa23 Mon Sep 17 00:00:00 2001 From: Wang Tao Date: Tue, 20 Jan 2026 12:31:13 +0000 Subject: [PATCH 132/828] sched/eevdf: Update se->vprot in reweight_entity() In the EEVDF framework with Run-to-Parity protection, `se->vprot` is an independent variable defining the virtual protection timestamp. When `reweight_entity()` is called (e.g., via nice/renice), it performs the following actions to preserve Lag consistency: 1. Scales `se->vlag` based on the new weight. 2. Calls `place_entity()`, which recalculates `se->vruntime` based on the new weight and scaled lag. However, the current implementation fails to update `se->vprot`, leading to mismatches between the task's actual runtime and its expected duration. Fixes: 63304558ba5d ("sched/eevdf: Curb wakeup-preemption") Suggested-by: Zhang Qiao Signed-off-by: Wang Tao Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Tested-by: K Prateek Nayak Tested-by: Shubhang Kaushik Link: https://patch.msgid.link/20260120123113.3518950-1-wangtao554@huawei.com --- kernel/sched/fair.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f2b46c33a8c5..93fa5b8313e4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3815,6 +3815,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; + bool rel_vprot = false; + u64 vprot; if (se->on_rq) { /* commit outstanding execution time */ @@ -3822,6 +3824,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, update_entity_lag(cfs_rq, se); se->deadline -= se->vruntime; se->rel_deadline = 1; + if (curr && protect_slice(se)) { + vprot = se->vprot - se->vruntime; + rel_vprot = true; + } + cfs_rq->nr_queued--; if (!curr) __dequeue_entity(cfs_rq, se); @@ -3837,6 +3844,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, if (se->rel_deadline) se->deadline = div_s64(se->deadline * se->load.weight, weight); + if (rel_vprot) + vprot = div_s64(vprot * se->load.weight, weight); + update_load_set(&se->load, weight); do { @@ -3848,6 +3858,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, enqueue_load_avg(cfs_rq, se); if (se->on_rq) { place_entity(cfs_rq, se, 0); + if (rel_vprot) + se->vprot = se->vruntime + vprot; update_load_add(&cfs_rq->load, se->load.weight); if (!curr) __enqueue_entity(cfs_rq, se); From 6e3c0a4e1ad1e0455b7880fad02b3ee179f56c09 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Apr 2025 12:16:28 +0200 Subject: [PATCH 133/828] sched/fair: Fix lag clamp Vincent reported that he was seeing undue lag clamping in a mixed slice workload. Implement the max_slice tracking as per the todo comment. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Reported-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Vincent Guittot Tested-by: K Prateek Nayak Tested-by: Shubhang Kaushik Link: https://patch.msgid.link/20250422101628.GA33555@noisy.programming.kicks-ass.net --- include/linux/sched.h | 1 + kernel/sched/fair.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 074ad4ef3d81..a7b4a980eb2f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -579,6 +579,7 @@ struct sched_entity { u64 deadline; u64 min_vruntime; u64 min_slice; + u64 max_slice; struct list_head group_node; unsigned char on_rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 93fa5b8313e4..f4446cbe8ffa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -748,6 +748,8 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) return cfs_rq->zero_vruntime; } +static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq); + /* * lag_i = S - s_i = w_i * (V - v_i) * @@ -761,17 +763,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * EEVDF gives the following limit for a steady state system: * * -r_max < lag < max(r_max, q) - * - * XXX could add max_slice to the augmented data to track this. */ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) { + u64 max_slice = cfs_rq_max_slice(cfs_rq) + TICK_NSEC; s64 vlag, limit; WARN_ON_ONCE(!se->on_rq); vlag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); + limit = calc_delta_fair(max_slice, se); se->vlag = clamp(vlag, -limit, limit); } @@ -829,6 +830,21 @@ static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq) return min_slice; } +static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq) +{ + struct sched_entity *root = __pick_root_entity(cfs_rq); + struct sched_entity *curr = cfs_rq->curr; + u64 max_slice = 0ULL; + + if (curr && curr->on_rq) + max_slice = curr->slice; + + if (root) + max_slice = max(max_slice, root->max_slice); + + return max_slice; +} + static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) { return entity_before(__node_2_se(a), __node_2_se(b)); @@ -853,6 +869,15 @@ static inline void __min_slice_update(struct sched_entity *se, struct rb_node *n } } +static inline void __max_slice_update(struct sched_entity *se, struct rb_node *node) +{ + if (node) { + struct sched_entity *rse = __node_2_se(node); + if (rse->max_slice > se->max_slice) + se->max_slice = rse->max_slice; + } +} + /* * se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime) */ @@ -860,6 +885,7 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit) { u64 old_min_vruntime = se->min_vruntime; u64 old_min_slice = se->min_slice; + u64 old_max_slice = se->max_slice; struct rb_node *node = &se->run_node; se->min_vruntime = se->vruntime; @@ -870,8 +896,13 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit) __min_slice_update(se, node->rb_right); __min_slice_update(se, node->rb_left); + se->max_slice = se->slice; + __max_slice_update(se, node->rb_right); + __max_slice_update(se, node->rb_left); + return se->min_vruntime == old_min_vruntime && - se->min_slice == old_min_slice; + se->min_slice == old_min_slice && + se->max_slice == old_max_slice; } RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, From 4c652a47722f69c6f2685f05b17490ea97f643a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Feb 2026 08:41:13 +0100 Subject: [PATCH 134/828] rseq: Mark rseq_arm_slice_extension_timer() __always_inline objtool warns about this function being called inside of a uaccess section: kernel/entry/common.o: warning: objtool: irqentry_exit+0x1dc: call to rseq_arm_slice_extension_timer() with UACCESS enabled Interestingly, this happens with CONFIG_RSEQ_SLICE_EXTENSION disabled, so this is an empty function, as the normal implementation is already marked __always_inline. I could reproduce this multiple times with gcc-11 but not with gcc-15, so the compiler probably got better at identifying the trivial function. Mark all the empty helpers for !RSEQ_SLICE_EXTENSION as __always_inline for consistency, avoiding this warning. Fixes: 0ac3b5c3dc45 ("rseq: Implement time slice extension enforcement timer") Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260206074122.709580-1-arnd@kernel.org --- include/linux/rseq_entry.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index cbc4a791618b..c6831c93cd6e 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -216,10 +216,10 @@ efault: } #else /* CONFIG_RSEQ_SLICE_EXTENSION */ -static inline bool rseq_slice_extension_enabled(void) { return false; } -static inline bool rseq_arm_slice_extension_timer(void) { return false; } -static inline void rseq_slice_clear_grant(struct task_struct *t) { } -static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +static __always_inline bool rseq_slice_extension_enabled(void) { return false; } +static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } +static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } +static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); From 5324953c06bd929c135d9e04be391ee2c11b5a19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Feb 2026 17:33:29 +0100 Subject: [PATCH 135/828] sched/core: Fix wakeup_preempt's next_class tracking Kernel test robot reported that tools/testing/selftests/kvm/hardware_disable_test was failing due to commit 704069649b5b ("sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()") It turns out there were two related problems that could lead to a missed preemption: - when hitting newidle balance from the idle thread, it would elevate rb->next_class from &idle_sched_class to &fair_sched_class, causing later wakeup_preempt() calls to not hit the sched_class_above() case, and not issue resched_curr(). Notably, this modification pattern should only lower the next_class, and never raise it. Create two new helper functions to wrap this. - when doing schedule_idle(), it was possible to miss (re)setting rq->next_class to &idle_sched_class, leading to the very same problem. Cc: Sean Christopherson Fixes: 704069649b5b ("sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202602122157.4e861298-lkp@intel.com Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260218163329.GQ1395416@noisy.programming.kicks-ass.net --- kernel/sched/core.c | 1 + kernel/sched/ext.c | 4 ++-- kernel/sched/fair.c | 4 ++-- kernel/sched/sched.h | 11 +++++++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 759777694c78..b7f77c165a6e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6830,6 +6830,7 @@ static void __sched notrace __schedule(int sched_mode) /* SCX must consult the BPF scheduler to tell if rq is empty */ if (!rq->nr_running && !scx_enabled()) { next = prev; + rq->next_class = &idle_sched_class; goto picked; } } else if (!preempt && prev_state) { diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 62b1f3ac5630..06cc0a4aec66 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2460,7 +2460,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) /* see kick_cpus_irq_workfn() */ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); - rq->next_class = &ext_sched_class; + rq_modified_begin(rq, &ext_sched_class); rq_unpin_lock(rq, rf); balance_one(rq, prev); @@ -2475,7 +2475,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) * If @force_scx is true, always try to pick a SCHED_EXT task, * regardless of any higher-priority sched classes activity. */ - if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class)) + if (!force_scx && rq_modified_above(rq, &ext_sched_class)) return RETRY_TASK; keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f4446cbe8ffa..bf948db905ed 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12982,7 +12982,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) t0 = sched_clock_cpu(this_cpu); __sched_balance_update_blocked_averages(this_rq); - this_rq->next_class = &fair_sched_class; + rq_modified_begin(this_rq, &fair_sched_class); raw_spin_rq_unlock(this_rq); for_each_domain(this_cpu, sd) { @@ -13049,7 +13049,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) pulled_task = 1; /* If a higher prio class was modified, restart the pick */ - if (sched_class_above(this_rq->next_class, &fair_sched_class)) + if (rq_modified_above(this_rq, &fair_sched_class)) pulled_task = -1; out: diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b82fb70a9d54..43bbf0693cca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2748,6 +2748,17 @@ static inline const struct sched_class *next_active_class(const struct sched_cla #define sched_class_above(_a, _b) ((_a) < (_b)) +static inline void rq_modified_begin(struct rq *rq, const struct sched_class *class) +{ + if (sched_class_above(rq->next_class, class)) + rq->next_class = class; +} + +static inline bool rq_modified_above(struct rq *rq, const struct sched_class *class) +{ + return sched_class_above(rq->next_class, class); +} + static inline bool sched_stop_runnable(struct rq *rq) { return rq->stop && task_on_rq_queued(rq->stop); From 26d43a90be81fc90e26688a51d3ec83188602731 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 20 Feb 2026 15:06:40 -0500 Subject: [PATCH 136/828] rseq: Clarify rseq registration rseq_size bound check comment The rseq registration validates that the rseq_size argument is greater or equal to 32 (the original rseq size), but the comment associated with this check does not clearly state this. Clarify the comment to that effect. Fixes: ee3e3ac05c26 ("rseq: Introduce extensible rseq ABI") Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260220200642.1317826-2-mathieu.desnoyers@efficios.com --- kernel/rseq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/rseq.c b/kernel/rseq.c index b0973d19f366..e349f86cc945 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -449,8 +449,9 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq * size, the required alignment is the original struct rseq alignment. * - * In order to be valid, rseq_len is either the original rseq size, or - * large enough to contain all supported fields, as communicated to + * The rseq_len is required to be greater or equal to the original rseq + * size. In order to be valid, rseq_len is either the original rseq size, + * or large enough to contain all supported fields, as communicated to * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. */ if (rseq_len < ORIG_RSEQ_SIZE || From 3b68df978133ac3d46d570af065a73debbb68248 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 20 Feb 2026 15:06:41 -0500 Subject: [PATCH 137/828] rseq: slice ext: Ensure rseq feature size differs from original rseq size Before rseq became extensible, its original size was 32 bytes even though the active rseq area was only 20 bytes. This had the following impact in terms of userspace ecosystem evolution: * The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set to 32, even though the size of the active rseq area is really 20. * The GNU libc 2.40 changes this __rseq_size to 20, thus making it express the active rseq area. * Starting from glibc 2.41, __rseq_size corresponds to the AT_RSEQ_FEATURE_SIZE from getauxval(3). This means that users of __rseq_size can always expect it to correspond to the active rseq area, except for the value 32, for which the active rseq area is 20 bytes. Exposing a 32 bytes feature size would make life needlessly painful for userspace. Therefore, add a reserved field at the end of the rseq area to bump the feature size to 33 bytes. This reserved field is expected to be replaced with whatever field will come next, expecting that this field will be larger than 1 byte. The effect of this change is to increase the size from 32 to 64 bytes before we actually have fields using that memory. Clarify the allocation size and alignment requirements in the struct rseq uapi comment. Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the value of the active rseq area size rounded up to next power of 2, which guarantees that the rseq structure will always be aligned on the nearest power of two large enough to contain it, even as it grows. Change the alignment check in the rseq registration accordingly. This will minimize the amount of ABI corner-cases we need to document and require userspace to play games with. The rule stays simple when __rseq_size != 32: #define rseq_field_available(field) (__rseq_size >= offsetofend(struct rseq_abi, field)) Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com --- fs/binfmt_elf.c | 3 ++- include/linux/rseq.h | 12 ++++++++++++ include/uapi/linux/rseq.h | 26 ++++++++++++++++++++++---- kernel/rseq.c | 3 ++- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8e89cc5b2820..fb857faaf0d6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } #ifdef CONFIG_RSEQ NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end)); - NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq)); + NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align()); #endif #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 7a01a0760405..b9d62fc2140d 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) t->rseq = current->rseq; } +/* + * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq + * registration. This is the active rseq area size rounded up to next + * power of 2, which guarantees that the rseq structure will always be + * aligned on the nearest power of two large enough to contain it, even + * as it grows. + */ +static inline unsigned int rseq_alloc_align(void) +{ + return 1U << get_count_order(offsetof(struct rseq, end)); +} + #else /* CONFIG_RSEQ */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 863c4a00a66b..f69344fe6c08 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -87,10 +87,17 @@ struct rseq_slice_ctrl { }; /* - * struct rseq is aligned on 4 * 8 bytes to ensure it is always - * contained within a single cache-line. + * The original size and alignment of the allocation for struct rseq is + * 32 bytes. * - * A single struct rseq per thread is allowed. + * The allocation size needs to be greater or equal to + * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to + * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32). + * + * As an alternative, userspace is allowed to use both the original size + * and alignment of 32 bytes for backward compatibility. + * + * A single active struct rseq registration per thread is allowed. */ struct rseq { /* @@ -180,10 +187,21 @@ struct rseq { */ struct rseq_slice_ctrl slice_ctrl; + /* + * Before rseq became extensible, its original size was 32 bytes even + * though the active rseq area was only 20 bytes. + * Exposing a 32 bytes feature size would make life needlessly painful + * for userspace. Therefore, add a reserved byte after byte 32 + * to bump the rseq feature size from 32 to 33. + * The next field to be added to the rseq area will be larger + * than one byte, and will replace this reserved byte. + */ + __u8 __reserved; + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(32))); #endif /* _UAPI_LINUX_RSEQ_H */ diff --git a/kernel/rseq.c b/kernel/rseq.c index e349f86cc945..38d3ef540760 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -456,7 +457,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 */ if (rseq_len < ORIG_RSEQ_SIZE || (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || - (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || rseq_len < offsetof(struct rseq, end)))) return -EINVAL; if (!access_ok(rseq, rseq_len)) From 486ff5ad49bc50315bcaf6d45f04a33ef0a45ced Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Jun 2025 21:51:05 -0700 Subject: [PATCH 138/828] perf/core: Fix invalid wait context in ctx_sched_in() Lockdep found a bug in the event scheduling when a pinned event was failed and wakes up the threads in the ring buffer like below. It seems it should not grab a wait-queue lock under perf-context lock. Let's do it with irq_work. [ 39.913691] ============================= [ 39.914157] [ BUG: Invalid wait context ] [ 39.914623] 6.15.0-next-20250530-next-2025053 #1 Not tainted [ 39.915271] ----------------------------- [ 39.915731] repro/837 is trying to lock: [ 39.916191] ffff88801acfabd8 (&event->waitq){....}-{3:3}, at: __wake_up+0x26/0x60 [ 39.917182] other info that might help us debug this: [ 39.917761] context-{5:5} [ 39.918079] 4 locks held by repro/837: [ 39.918530] #0: ffffffff8725cd00 (rcu_read_lock){....}-{1:3}, at: __perf_event_task_sched_in+0xd1/0xbc0 [ 39.919612] #1: ffff88806ca3c6f8 (&cpuctx_lock){....}-{2:2}, at: __perf_event_task_sched_in+0x1a7/0xbc0 [ 39.920748] #2: ffff88800d91fc18 (&ctx->lock){....}-{2:2}, at: __perf_event_task_sched_in+0x1f9/0xbc0 [ 39.921819] #3: ffffffff8725cd00 (rcu_read_lock){....}-{1:3}, at: perf_event_wakeup+0x6c/0x470 Fixes: f4b07fd62d4d ("perf/core: Use POLLHUP for a pinned event in error") Closes: https://lore.kernel.org/lkml/aD2w50VDvGIH95Pf@ly-workstation Reported-by: "Lai, Yi" Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Lai, Yi" Link: https://patch.msgid.link/20250603045105.1731451-1-namhyung@kernel.org --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ac70d68217b6..4f86d22f281a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4138,7 +4138,8 @@ static int merge_sched_in(struct perf_event *event, void *data) if (*perf_event_fasync(event)) event->pending_kill = POLL_ERR; - perf_event_wakeup(event); + event->pending_wakeup = 1; + irq_work_queue(&event->pending_irq); } else { struct perf_cpu_pmu_context *cpc = this_cpc(event->pmu_ctx->pmu); From 6a8a48644c4b804123e59dbfc5d6cd29a0194046 Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Mon, 9 Feb 2026 16:52:25 -0800 Subject: [PATCH 139/828] perf/x86/intel/uncore: Add per-scheduler IMC CAS count events IMC on SPR and EMR does not support sub-channels. In contrast, CPUs that use gnr_uncores[] (e.g. Granite Rapids and Sierra Forest) implement two command schedulers (SCH0/SCH1) per memory channel, providing logically independent command and data paths. Do not reuse the spr_uncore_imc[] configuration for these CPUs. Instead, introduce a dedicated gnr_uncore_imc[] with per-scheduler events, so userspace can monitor SCH0 and SCH1 independently. On these CPUs, replace cas_count_{read,write} with cas_count_{read,write}_sch{0,1}. This may break existing userspace that relies on cas_count_{read,write}, prompting it to switch to the per-scheduler events, as the legacy event reports only partial traffic (SCH0). Fixes: 632c4bf6d007 ("perf/x86/intel/uncore: Support Granite Rapids") Fixes: cb4a6ccf3583 ("perf/x86/intel/uncore: Support Sierra Forest and Grand Ridge") Reported-by: Reinette Chatre Signed-off-by: Zide Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260210005225.20311-1-zide.chen@intel.com --- arch/x86/events/intel/uncore_snbep.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 5ed6e0b7e715..0a1d08136cc1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = { .attr_update = uncore_alias_groups, }; +static struct uncore_event_desc gnr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0, "event=0x05,umask=0xcf"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1, "event=0x06,umask=0xcf"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type gnr_uncore_imc = { + SPR_UNCORE_MMIO_COMMON_FORMAT(), + .name = "imc", + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = gnr_uncore_imc_events, +}; + static struct intel_uncore_type gnr_uncore_pciex8 = { SPR_UNCORE_PCI_COMMON_FORMAT(), .name = "pciex8", @@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { NULL, &spr_uncore_pcu, &gnr_uncore_ubox, - &spr_uncore_imc, + &gnr_uncore_imc, NULL, &gnr_uncore_upi, NULL, From 77de62ad3de3967818c3dbe656b7336ebee461d2 Mon Sep 17 00:00:00 2001 From: Haocheng Yu Date: Tue, 3 Feb 2026 00:20:56 +0800 Subject: [PATCH 140/828] perf/core: Fix refcount bug and potential UAF in perf_mmap Syzkaller reported a refcount_t: addition on 0; use-after-free warning in perf_mmap. The issue is caused by a race condition between a failing mmap() setup and a concurrent mmap() on a dependent event (e.g., using output redirection). In perf_mmap(), the ring_buffer (rb) is allocated and assigned to event->rb with the mmap_mutex held. The mutex is then released to perform map_range(). If map_range() fails, perf_mmap_close() is called to clean up. However, since the mutex was dropped, another thread attaching to this event (via inherited events or output redirection) can acquire the mutex, observe the valid event->rb pointer, and attempt to increment its reference count. If the cleanup path has already dropped the reference count to zero, this results in a use-after-free or refcount saturation warning. Fix this by extending the scope of mmap_mutex to cover the map_range() call. This ensures that the ring buffer initialization and mapping (or cleanup on failure) happens atomically effectively, preventing other threads from accessing a half-initialized or dying ring buffer. Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Haocheng Yu Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260202162057.7237-1-yuhaocheng035@gmail.com --- kernel/events/core.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4f86d22f281a..22a0f405585b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7465,29 +7465,29 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ret = perf_mmap_aux(vma, event, nr_pages); if (ret) return ret; + + /* + * Since pinned accounting is per vm we cannot allow fork() to copy our + * vma. + */ + vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); + vma->vm_ops = &perf_mmap_vmops; + + mapped = get_mapped(event, event_mapped); + if (mapped) + mapped(event, vma->vm_mm); + + /* + * Try to map it into the page table. On fail, invoke + * perf_mmap_close() to undo the above, as the callsite expects + * full cleanup in this case and therefore does not invoke + * vmops::close(). + */ + ret = map_range(event->rb, vma); + if (ret) + perf_mmap_close(vma); } - /* - * Since pinned accounting is per vm we cannot allow fork() to copy our - * vma. - */ - vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); - vma->vm_ops = &perf_mmap_vmops; - - mapped = get_mapped(event, event_mapped); - if (mapped) - mapped(event, vma->vm_mm); - - /* - * Try to map it into the page table. On fail, invoke - * perf_mmap_close() to undo the above, as the callsite expects - * full cleanup in this case and therefore does not invoke - * vmops::close(). - */ - ret = map_range(event->rb, vma); - if (ret) - perf_mmap_close(vma); - return ret; } From eb4a7139e97374f42b7242cc754e77f1623fbcd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Thu, 12 Feb 2026 08:27:31 +0200 Subject: [PATCH 141/828] drm/i915/alpm: ALPM disable fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PORT_ALPM_CTL is supposed to be written only before link training. Remove writing it from ALPM disable. Also clearing ALPM_CTL_ALPM_AUX_LESS_ENABLE and is not about disabling ALPM but switching to AUX-Wake ALPM. Stop touching this bit on ALPM disable. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7153 Fixes: 1ccbf135862b ("drm/i915/psr: Enable ALPM on source side for eDP Panel replay") Cc: Animesh Manna Cc: Jani Nikula Cc: # v6.10+ Signed-off-by: Jouni Högander Reviewed-by: Michał Grzelak Link: https://patch.msgid.link/20260212062731.397801-1-jouni.hogander@intel.com (cherry picked from commit 008304c9ae75c772d3460040de56e12112cdf5e6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_alpm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 7ce8c674bb03..07ffee38974b 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -562,12 +562,7 @@ void intel_alpm_disable(struct intel_dp *intel_dp) mutex_lock(&intel_dp->alpm.lock); intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder), - ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE | - ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); - - intel_de_rmw(display, - PORT_ALPM_CTL(cpu_transcoder), - PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); + ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE, 0); drm_dbg_kms(display->drm, "Disabling ALPM\n"); mutex_unlock(&intel_dp->alpm.lock); From ec2cceadfae72304ca19650f9cac4b2a97b8a2fc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 19 Feb 2026 10:51:33 +0100 Subject: [PATCH 142/828] gpiolib: normalize the return value of gc->get() on behalf of buggy drivers Commit 86ef402d805d ("gpiolib: sanitize the return value of gpio_chip::get()") started checking the return value of the .get() callback in struct gpio_chip. Now - almost a year later - it turns out that there are quite a few drivers in tree that can break with this change. Partially revert it: normalize the return value in GPIO core but also emit a warning. Cc: stable@vger.kernel.org Fixes: 86ef402d805d ("gpiolib: sanitize the return value of gpio_chip::get()") Reported-by: Dmitry Torokhov Closes: https://lore.kernel.org/all/aZSkqGTqMp_57qC7@google.com/ Reviewed-by: Linus Walleij Reviewed-by: Dmitry Torokhov Link: https://patch.msgid.link/20260219-gpiolib-set-normalize-v2-1-f84630e45796@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 86a171e96b0e..ada572aaebd6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3267,8 +3267,12 @@ static int gpiochip_get(struct gpio_chip *gc, unsigned int offset) /* Make sure this is called after checking for gc->get(). */ ret = gc->get(gc, offset); - if (ret > 1) - ret = -EBADE; + if (ret > 1) { + gpiochip_warn(gc, + "invalid return value from gc->get(): %d, consider fixing the driver\n", + ret); + ret = !!ret; + } return ret; } From af12e64ae0661546e8b4f5d30d55c5f53a11efe7 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 20 Jan 2026 22:26:46 +0800 Subject: [PATCH 143/828] mmc: mmci: Fix device_node reference leak in of_get_dml_pipe_index() When calling of_parse_phandle_with_args(), the caller is responsible to call of_node_put() to release the reference of device node. In of_get_dml_pipe_index(), it does not release the reference. Fixes: 9cb15142d0e3 ("mmc: mmci: Add qcom dml support to the driver.") Signed-off-by: Felix Gu Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_qcom_dml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/mmci_qcom_dml.c b/drivers/mmc/host/mmci_qcom_dml.c index 3da6112fbe39..67371389cc33 100644 --- a/drivers/mmc/host/mmci_qcom_dml.c +++ b/drivers/mmc/host/mmci_qcom_dml.c @@ -109,6 +109,7 @@ static int of_get_dml_pipe_index(struct device_node *np, const char *name) &dma_spec)) return -ENODEV; + of_node_put(dma_spec.np); if (dma_spec.args_count) return dma_spec.args[0]; From 6465a8bbb0f6ad98aeb66dc9ea19c32c193a610b Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 16 Jan 2026 08:55:30 +0800 Subject: [PATCH 144/828] mmc: dw_mmc-rockchip: Fix runtime PM support for internal phase support RK3576 is the first platform to introduce internal phase support, and subsequent platforms are expected to adopt a similar design. In this architecture, runtime suspend powers off the attached power domain, which resets registers, including vendor-specific ones such as SDMMC_TIMING_CON0, SDMMC_TIMING_CON1, and SDMMC_MISC_CON. These registers must be saved and restored, a requirement that falls outside the scope of the dw_mmc core. Fixes: 59903441f5e4 ("mmc: dw_mmc-rockchip: Add internal phase support") Signed-off-by: Shawn Lin Tested-by: Marco Schirrmeister Reviewed-by: Heiko Stuebner Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-rockchip.c | 38 +++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 4e3423a19bdf..ac069d0c42b2 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -36,6 +36,8 @@ struct dw_mci_rockchip_priv_data { int default_sample_phase; int num_phases; bool internal_phase; + int sample_phase; + int drv_phase; }; /* @@ -573,9 +575,43 @@ static void dw_mci_rockchip_remove(struct platform_device *pdev) dw_mci_pltfm_remove(pdev); } +static int dw_mci_rockchip_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci_rockchip_priv_data *priv = host->priv; + + if (priv->internal_phase) { + priv->sample_phase = rockchip_mmc_get_phase(host, true); + priv->drv_phase = rockchip_mmc_get_phase(host, false); + } + + return dw_mci_runtime_suspend(dev); +} + +static int dw_mci_rockchip_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci_rockchip_priv_data *priv = host->priv; + int ret; + + ret = dw_mci_runtime_resume(dev); + if (ret) + return ret; + + if (priv->internal_phase) { + rockchip_mmc_set_phase(host, true, priv->sample_phase); + rockchip_mmc_set_phase(host, false, priv->drv_phase); + mci_writel(host, MISC_CON, MEM_CLK_AUTOGATE_ENABLE); + } + + return ret; +} + static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = { SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) - RUNTIME_PM_OPS(dw_mci_runtime_suspend, dw_mci_runtime_resume, NULL) + RUNTIME_PM_OPS(dw_mci_rockchip_runtime_suspend, dw_mci_rockchip_runtime_resume, NULL) }; static struct platform_driver dw_mci_rockchip_pltfm_driver = { From 79ad471530e0baef0dce991816013df55e401d9c Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 16 Feb 2026 14:15:43 -0500 Subject: [PATCH 145/828] mmc: sdhci-brcmstb: use correct register offset for V1 pin_sel restore The restore path for SDIO_CFG_CORE_V1 was incorrectly using SDIO_CFG_SD_PIN_SEL (offset 0x44) instead of SDIO_CFG_V1_SD_PIN_SEL (offset 0x54), causing the wrong register to be written on resume. The save path already uses the correct V1-specific offset. This affects BCM7445 and BCM72116 platforms which use the V1 config core. Fixes: b7e614802e3f ("mmc: sdhci-brcmstb: save and restore registers during PM") Signed-off-by: Kamal Dasu Cc: stable@vger.kernel.org Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-brcmstb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index c9442499876c..57e45951644e 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -116,7 +116,7 @@ static void sdhci_brcmstb_restore_regs(struct mmc_host *mmc, enum cfg_core_ver v writel(sr->boot_main_ctl, priv->boot_regs + SDIO_BOOT_MAIN_CTL); if (ver == SDIO_CFG_CORE_V1) { - writel(sr->sd_pin_sel, cr + SDIO_CFG_SD_PIN_SEL); + writel(sr->sd_pin_sel, cr + SDIO_CFG_V1_SD_PIN_SEL); return; } From 162d331d833dc73a3e905a24c44dd33732af1fc5 Mon Sep 17 00:00:00 2001 From: Ariel Silver Date: Fri, 20 Feb 2026 10:11:29 +0000 Subject: [PATCH 146/828] wifi: mac80211: bounds-check link_id in ieee80211_ml_reconfiguration link_id is taken from the ML Reconfiguration element (control & 0x000f), so it can be 0..15. link_removal_timeout[] has IEEE80211_MLD_MAX_NUM_LINKS (15) elements, so index 15 is out-of-bounds. Skip subelements with link_id >= IEEE80211_MLD_MAX_NUM_LINKS to avoid a stack out-of-bounds write. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Reported-by: Ariel Silver Signed-off-by: Ariel Silver Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260220101129.1202657-1-Ariel.Silver@cybereason.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e83582b2c377..d43204ee330e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7085,6 +7085,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, control = le16_to_cpu(prof->control); link_id = control & IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID; + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + removed_links |= BIT(link_id); /* the MAC address should not be included, but handle it */ From 6510e1324bcdc8caf21f6d17efe27604c48f0d64 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 23 Feb 2026 09:36:16 +0000 Subject: [PATCH 147/828] ASoC: cs42l43: Report insert for exotic peripherals For some exotic peripherals the type detect can return a reserved value of 0x4. This will currently return an error and not report anything to user-space, update this to report the insert normally. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20260223093616.3800350-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index b83bc4de1301..3e04e6897b14 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -699,6 +699,7 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) switch (type & CS42L43_HSDET_TYPE_STS_MASK) { case 0x0: // CTIA case 0x1: // OMTP + case 0x4: return cs42l43_run_load_detect(priv, true); case 0x2: // 3-pole return cs42l43_run_load_detect(priv, false); From 901084c51a0a8fb42a3f37d2e9c62083c495f824 Mon Sep 17 00:00:00 2001 From: Penghe Geng Date: Thu, 19 Feb 2026 15:29:54 -0500 Subject: [PATCH 148/828] mmc: core: Avoid bitfield RMW for claim/retune flags Move claimed and retune control flags out of the bitfield word to avoid unrelated RMW side effects in asynchronous contexts. The host->claimed bit shared a word with retune flags. Writes to claimed in __mmc_claim_host() or retune_now in mmc_mq_queue_rq() can overwrite other bits when concurrent updates happen in other contexts, triggering spurious WARN_ON(!host->claimed). Convert claimed, can_retune, retune_now and retune_paused to bool to remove shared-word coupling. Fixes: 6c0cedd1ef952 ("mmc: core: Introduce host claiming by context") Fixes: 1e8e55b67030c ("mmc: block: Add CQE support") Cc: stable@vger.kernel.org Suggested-by: Adrian Hunter Signed-off-by: Penghe Geng Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e0e2c265e5d1..ba84f02c2a10 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -486,14 +486,12 @@ struct mmc_host { struct mmc_ios ios; /* current io bus settings */ + bool claimed; /* host exclusively claimed */ + /* group bitfields together to minimize padding */ unsigned int use_spi_crc:1; - unsigned int claimed:1; /* host exclusively claimed */ unsigned int doing_init_tune:1; /* initial tuning in progress */ - unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ - unsigned int retune_now:1; /* do re-tuning at next req */ - unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ unsigned int can_dma_map_merge:1; /* merging can be used */ unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */ @@ -508,6 +506,9 @@ struct mmc_host { int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ + bool can_retune; /* re-tuning can be used */ + bool retune_now; /* do re-tuning at next req */ + bool retune_paused; /* re-tuning is temporarily disabled */ int need_retune; /* re-tuning is needed */ int hold_retune; /* hold off re-tuning */ unsigned int retune_period; /* re-tuning period in secs */ From 7a73801fdaf8aee90d23ba77976082a48d156a21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Dec 2025 21:29:41 +0100 Subject: [PATCH 149/828] pmdomain: imx: gpcv2: Discard pm_runtime_put() return value Passing pm_runtime_put() return value to the callers is not particularly useful. Returning an error code from pm_runtime_put() merely means that it has not queued up a work item to check whether or not the device can be suspended and there are many perfectly valid situations in which that can happen, like after writing "on" to the devices' runtime PM "control" attribute in sysfs for one example. Accordingly, update imx_pgc_domain_suspend() to simply discard the return value of pm_runtime_put() and always return success to the caller. This will facilitate a planned change of the pm_runtime_put() return type to void in the future. Signed-off-by: Rafael J. Wysocki Acked-by: Peng Fan Acked-by: Ulf Hansson Link: https://patch.msgid.link/15658107.tv2OnDr8pf@rafael.j.wysocki --- drivers/pmdomain/imx/gpcv2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c index cff738e4d546..a829f8da5be7 100644 --- a/drivers/pmdomain/imx/gpcv2.c +++ b/drivers/pmdomain/imx/gpcv2.c @@ -1416,7 +1416,9 @@ static int imx_pgc_domain_suspend(struct device *dev) static int imx_pgc_domain_resume(struct device *dev) { - return pm_runtime_put(dev); + pm_runtime_put(dev); + + return 0; } #endif From 3afd8df024339c7da1a5a0302f3987866dd16e40 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Dec 2025 21:36:25 +0100 Subject: [PATCH 150/828] PM: runtime: Change pm_runtime_put() return type to void The primary role of pm_runtime_put() is to decrement the runtime PM usage counter of the given device. It always does that regardless of the value returned by it later. In addition, if the runtime PM usage counter after decrementation turns out to be zero, a work item is queued up to check whether or not the device can be suspended. This is not guaranteed to succeed though and even if it is successful, the device may still not be suspended going forward. There are multiple valid reasons why pm_runtime_put() may not decide to queue up the work item mentioned above, including, but not limited to, the case when user space has written "on" to the device's runtime PM "control" file in sysfs. In all of those cases, pm_runtime_put() returns a negative error code (even though the device's runtime PM usage counter has been successfully decremented by it) which is very confusing. In fact, its return value should only be used for debug purposes and care should be taken when doing it even in that case. Accordingly, to avoid the confusion mentioned above, change the return type of pm_runtime_put() to void. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Brian Norris Link: https://patch.msgid.link/14387202.RDIVbhacDa@rafael.j.wysocki --- include/linux/pm_runtime.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 41037c513f06..64921b10ac74 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -545,22 +545,10 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). - * - * Return: - * * 1: Success. Usage counter dropped to zero, but device was already suspended. - * * 0: Success. - * * -EINVAL: Runtime PM error. - * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status - * change ongoing. - * * -EBUSY: Runtime PM child_count non-zero. - * * -EPERM: Device PM QoS resume latency 0. - * * -EINPROGRESS: Suspend already in progress. - * * -ENOSYS: CONFIG_PM not enabled. */ -static inline int pm_runtime_put(struct device *dev) +static inline void pm_runtime_put(struct device *dev) { - return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); + __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /** From 3c99a545b372c77b5d39715968a141f523eccbf2 Mon Sep 17 00:00:00 2001 From: Krishna Chomal Date: Wed, 18 Feb 2026 10:32:35 +0530 Subject: [PATCH 151/828] platform/x86: hp-wmi: Add Omen 16-xd0xxx fan and thermal support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Omen 16-xd0xxx (board ID: 8BCD) has the same WMI interface as other Victus S boards, but requires quirks for correctly switching thermal profile (similar to HP Omen 16-wf1xxx, board ID: 8C78). Add the DMI board name to victus_s_thermal_profile_boards[] table and map it to omen_v1_thermal_params. Testing on HP Omen 16-xd0xxx confirmed that platform profile is registered successfully and fan RPMs are readable and controllable. Tested-by: Varad Amol Pisale Signed-off-by: Krishna Chomal Link: https://patch.msgid.link/20260218050235.94687-1-krishna.chomal108@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 304d9ac63c8a..b91965117edd 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -177,6 +177,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BBE") }, .driver_data = (void *)&victus_s_thermal_params, }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCD") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BD4") }, .driver_data = (void *)&victus_s_thermal_params, From 7d87ed70fc95482c12edf9493c249b6413be485e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Feb 2026 14:23:46 +0100 Subject: [PATCH 152/828] platform/x86: touchscreen_dmi: Add quirk for y-inverted Goodix touchscreen on SUPI S10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The touchscreen on the SUPI S10 tablet reports inverted Y coordinates, causing touch input to be mirrored vertically relative to the display. Add a quirk to set the "touchscreen-inverted-y" boolean device-property on the touchscreen device, so that the goodix_ts driver will fixup the coordinates. Reported-by: Yajat Kumar Closes: https://lore.kernel.org/linux-input/20251230221639.582406-1-yajatapps3@gmail.com/ Tested-by: Yajat Kumar Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20260217132346.34535-1-johannes.goede@oss.qualcomm.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/touchscreen_dmi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index bdc19cd8d3ed..d83c387821ea 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -410,6 +410,16 @@ static const struct ts_dmi_data gdix1002_upside_down_data = { .properties = gdix1001_upside_down_props, }; +static const struct property_entry gdix1001_y_inverted_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + { } +}; + +static const struct ts_dmi_data gdix1001_y_inverted_data = { + .acpi_name = "GDIX1001", + .properties = gdix1001_y_inverted_props, +}; + static const struct property_entry gp_electronic_t701_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 960), PROPERTY_ENTRY_U32("touchscreen-size-y", 640), @@ -1658,6 +1668,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "PN20170413488"), }, }, + { + /* SUPI S10 */ + .driver_data = (void *)&gdix1001_y_inverted_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SUPI"), + DMI_MATCH(DMI_PRODUCT_NAME, "S10"), + }, + }, { /* Techbite Arc 11.6 */ .driver_data = (void *)&techbite_arc_11_6_data, From 13fa3aaf02edaad9b41fc61d7f6326d2b6a4bf80 Mon Sep 17 00:00:00 2001 From: Krishna Chomal Date: Mon, 16 Feb 2026 12:50:03 +0530 Subject: [PATCH 153/828] platform/x86: hp-wmi: Add Omen 16-wf0xxx fan and thermal support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Omen 16-wf0xxx (board ID: 8BAB) has the same WMI interface as other Victus S boards, but requires quirks for correctly switching thermal profile (similar to HP Omen 16-wf1xxx, board ID: 8C78). Add the DMI board name to victus_s_thermal_profile_boards[] table and map it to omen_v1_thermal_params. Testing on HP Omen 16-wf0xxx confirmed that platform profile is registered successfully and fan RPMs are readable and controllable. Suggested-by: Noah Provenzano Tested-by: Juan Martin Morales Reported-by: Juan Martin Morales Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220639 Signed-off-by: Krishna Chomal Link: https://patch.msgid.link/20260216072003.90151-1-krishna.chomal108@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index b91965117edd..bde74a02ccb3 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -173,6 +173,10 @@ static const char * const victus_thermal_profile_boards[] = { /* DMI Board names of Victus 16-r and Victus 16-s laptops */ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = { + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BAB") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BBE") }, .driver_data = (void *)&victus_s_thermal_params, From 53e977b1d50c46f2c4ec3865cd13a822f58ad3cd Mon Sep 17 00:00:00 2001 From: Jonathan Teh Date: Mon, 16 Feb 2026 01:01:29 +0000 Subject: [PATCH 154/828] platform/x86: thinkpad_acpi: Fix errors reading battery thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check whether the battery supports the relevant charge threshold before reading the value to silence these errors: thinkpad_acpi: acpi_evalf(BCTG, dd, ...) failed: AE_NOT_FOUND ACPI: \_SB_.PCI0.LPC_.EC__.HKEY: BCTG: evaluate failed thinkpad_acpi: acpi_evalf(BCSG, dd, ...) failed: AE_NOT_FOUND ACPI: \_SB_.PCI0.LPC_.EC__.HKEY: BCSG: evaluate failed when reading the charge thresholds via sysfs on platforms that do not support them such as the ThinkPad T400. Fixes: 2801b9683f74 ("thinkpad_acpi: Add support for battery thresholds") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=202619 Signed-off-by: Jonathan Teh Reviewed-by: Mark Pearson Link: https://patch.msgid.link/MI0P293MB01967B206E1CA6F337EBFB12926CA@MI0P293MB0196.ITAP293.PROD.OUTLOOK.COM Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/thinkpad_acpi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/lenovo/thinkpad_acpi.c b/drivers/platform/x86/lenovo/thinkpad_acpi.c index f9c736777908..8982d92dfd97 100644 --- a/drivers/platform/x86/lenovo/thinkpad_acpi.c +++ b/drivers/platform/x86/lenovo/thinkpad_acpi.c @@ -9525,14 +9525,16 @@ static int tpacpi_battery_get(int what, int battery, int *ret) { switch (what) { case THRESHOLD_START: - if ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_START, ret, battery)) + if (!battery_info.batteries[battery].start_support || + ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_START, ret, battery))) return -ENODEV; /* The value is in the low 8 bits of the response */ *ret = *ret & 0xFF; return 0; case THRESHOLD_STOP: - if ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_STOP, ret, battery)) + if (!battery_info.batteries[battery].stop_support || + ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_STOP, ret, battery))) return -ENODEV; /* Value is in lower 8 bits */ *ret = *ret & 0xFF; From 6b3fa0615cd8432148581de62a52f83847af3d70 Mon Sep 17 00:00:00 2001 From: Peter Metz Date: Thu, 12 Feb 2026 23:46:27 -0500 Subject: [PATCH 155/828] platform/x86: intel-hid: Add Dell 14 Plus 2-in-1 to dmi_vgbs_allow_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dell 14 Plus 2-in-1 (model DB04250) requires the VGBS allow list entry to correctly enable the tablet mode switch. Without this, the chassis state is not reported, and the hinge rotation only emits unknown scancodes. Verified on Dell 14 Plus 2-in-1 DB04250. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221090 Signed-off-by: Peter Metz Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260213044627.203638-1-peter.metz@unarin.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 0f8684f4464b..60ce32f591ff 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -189,6 +189,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell Pro Rugged 12 Tablet RA02260"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell 14 Plus 2-in-1 DB04250"), + }, + }, { } }; From 857f5036f8fd729e097efbe3ba421b75e27346cb Mon Sep 17 00:00:00 2001 From: Peter Metz Date: Fri, 13 Feb 2026 17:58:02 -0500 Subject: [PATCH 156/828] platform/x86: intel-hid: Add Dell 16 Plus 2-in-1 to dmi_vgbs_allow_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dell 16 Plus 2-in-1 (model DB06250) requires the VGBS allow list entry to correctly enable the tablet mode switch. Without this, the chassis state is not reported, and the hinge rotation only emits unknown scancodes. Link: https://lore.kernel.org/platform-driver-x86/CAP3yi-BWm0LqkhfzTrGy5n-KQ=3+T8eRMoR+Z+7Ke2VJB43kTA@mail.gmail.com/ Signed-off-by: Peter Metz Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260213230310.299974-1-peter.metz@unarin.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 60ce32f591ff..a8ae2dba7ffd 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -195,6 +195,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell 14 Plus 2-in-1 DB04250"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell 16 Plus 2-in-1 DB06250"), + }, + }, { } }; From 37da69dbaa60a778ffa67a066b77d287d704c659 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Wed, 11 Feb 2026 22:26:59 +0100 Subject: [PATCH 157/828] platform/x86: asus-armoury: add support for G733QS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model G733QS. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260211212659.16542-1-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 6e9703bd5017..001c011fdd56 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1708,6 +1708,20 @@ static const struct dmi_system_id power_limits[] = { .requires_fan_curve = true, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "G733QS"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 15, + .ppt_pl2_sppt_max = 80, + }, + .requires_fan_curve = false, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "G814J"), From 32fc4168fa56f6301d858c778a3d712774e9657e Mon Sep 17 00:00:00 2001 From: Azamat Almazbek uulu Date: Sat, 21 Feb 2026 12:48:13 +0100 Subject: [PATCH 158/828] ASoC: amd: yc: Add ASUS EXPERTBOOK BM1503CDA to quirk table The ASUS ExpertBook BM1503CDA (Ryzen 5 7535U, Barcelo-R) has an internal DMIC connected through the AMD ACP (Audio CoProcessor) but is missing from the DMI quirk table, so the acp6x machine driver probe returns -ENODEV and no DMIC capture device is created. Add the DMI entry so the internal microphone works out of the box. Signed-off-by: Azamat Almazbek uulu Reviewed-by: Vijendar Mukunda Link: https://patch.msgid.link/20260221114813.5610-1-almazbek1608@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f1a63475100d..7af4daeb4c6f 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -703,6 +703,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vivobook_ASUSLaptop M6501RR_M6501RR"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK BM1503CDA"), + } + }, {} }; From 4b73231b2a61c4142a027613d277a19c484dfcc3 Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Sun, 22 Feb 2026 18:40:35 +0800 Subject: [PATCH 159/828] regulator: tps65185: check devm_kzalloc() result in probe tps65185_probe() dereferences the allocation result immediately by using data->regmap. If devm_kzalloc() returns NULL under memory pressure, this leads to a NULL pointer dereference. Add the missing allocation check and return -ENOMEM on failure. Signed-off-by: Yufan Chen Link: https://patch.msgid.link/20260222104035.90790-1-ericterminal@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/tps65185.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/tps65185.c b/drivers/regulator/tps65185.c index 3286c9ab33d0..786622d8d598 100644 --- a/drivers/regulator/tps65185.c +++ b/drivers/regulator/tps65185.c @@ -332,6 +332,9 @@ static int tps65185_probe(struct i2c_client *client) int i; data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->regmap = devm_regmap_init_i2c(client, ®map_config); if (IS_ERR(data->regmap)) return dev_err_probe(&client->dev, PTR_ERR(data->regmap), From f895e5df80316a308c2f7d64d13a78494630ea05 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 12 Feb 2026 21:52:48 -0600 Subject: [PATCH 160/828] ipmi:si: Don't block module unload if the BMC is messed up If the BMC is in a bad state, don't bother waiting for queues messages since there can't be any. Otherwise the unload is blocked until the BMC is back in a good state. Reported-by: Rafael J. Wysocki Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard Reviewed-by: Rafael J. Wysocki (Intel) --- drivers/char/ipmi/ipmi_si_intf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 0049e3792ba1..3667033fcc51 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2234,7 +2234,8 @@ static void wait_msg_processed(struct smi_info *smi_info) unsigned long jiffies_now; long time_diff; - while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { + while (smi_info->si_state != SI_HOSED && + (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL))) { jiffies_now = jiffies; time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) * SI_USEC_PER_JIFFY); From 62cd145453d577113f993efd025f258dd86aa183 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 12 Feb 2026 21:56:54 -0600 Subject: [PATCH 161/828] ipmi:msghandler: Handle error returns from the SMI sender It used to be, until recently, that the sender operation on the low level interfaces would not fail. That's not the case any more with recent changes. So check the return value from the sender operation, and propagate it back up from there and handle the errors in all places. Reported-by: Rafael J. Wysocki Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard Reviewed-by: Rafael J. Wysocki (Intel) --- drivers/char/ipmi/ipmi_msghandler.c | 102 +++++++++++++++++++--------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a042b1596933..f8c3c1e44520 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -1887,19 +1887,32 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf, return smi_msg; } -static void smi_send(struct ipmi_smi *intf, +static int smi_send(struct ipmi_smi *intf, const struct ipmi_smi_handlers *handlers, struct ipmi_smi_msg *smi_msg, int priority) { int run_to_completion = READ_ONCE(intf->run_to_completion); unsigned long flags = 0; + int rv = 0; ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); smi_msg = smi_add_send_msg(intf, smi_msg, priority); ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); - if (smi_msg) - handlers->sender(intf->send_info, smi_msg); + if (smi_msg) { + rv = handlers->sender(intf->send_info, smi_msg); + if (rv) { + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); + intf->curr_msg = NULL; + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); + /* + * Something may have been added to the transmit + * queue, so schedule a check for that. + */ + queue_work(system_wq, &intf->smi_work); + } + } + return rv; } static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg) @@ -2312,6 +2325,7 @@ static int i_ipmi_request(struct ipmi_user *user, struct ipmi_recv_msg *recv_msg; int run_to_completion = READ_ONCE(intf->run_to_completion); int rv = 0; + bool in_seq_table = false; if (supplied_recv) { recv_msg = supplied_recv; @@ -2365,33 +2379,50 @@ static int i_ipmi_request(struct ipmi_user *user, rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg, source_address, source_lun, retries, retry_time_ms); + in_seq_table = true; } else if (is_ipmb_direct_addr(addr)) { rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg, recv_msg, source_lun); } else if (is_lan_addr(addr)) { rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg, source_lun, retries, retry_time_ms); + in_seq_table = true; } else { - /* Unknown address type. */ + /* Unknown address type. */ ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; } - if (rv) { + if (!rv) { + dev_dbg(intf->si_dev, "Send: %*ph\n", + smi_msg->data_size, smi_msg->data); + + rv = smi_send(intf, intf->handlers, smi_msg, priority); + if (rv != IPMI_CC_NO_ERROR) + /* smi_send() returns an IPMI err, return a Linux one. */ + rv = -EIO; + if (rv && in_seq_table) { + /* + * If it's in the sequence table, it will be + * retried later, so ignore errors. + */ + rv = 0; + /* But we need to fix the timeout. */ + intf_start_seq_timer(intf, smi_msg->msgid); + ipmi_free_smi_msg(smi_msg); + smi_msg = NULL; + } + } out_err: + if (!run_to_completion) + mutex_unlock(&intf->users_mutex); + + if (rv) { if (!supplied_smi) ipmi_free_smi_msg(smi_msg); if (!supplied_recv) ipmi_free_recv_msg(recv_msg); - } else { - dev_dbg(intf->si_dev, "Send: %*ph\n", - smi_msg->data_size, smi_msg->data); - - smi_send(intf, intf->handlers, smi_msg, priority); } - if (!run_to_completion) - mutex_unlock(&intf->users_mutex); - return rv; } @@ -3965,12 +3996,12 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf, dev_dbg(intf->si_dev, "Invalid command: %*ph\n", msg->data_size, msg->data); - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr; @@ -4044,12 +4075,12 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf, msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE; msg->data_size = 5; - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr; @@ -4189,7 +4220,7 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf, struct ipmi_smi_msg *msg) { struct cmd_rcvr *rcvr; - int rv = 0; + int rv = 0; /* Free by default */ unsigned char netfn; unsigned char cmd; unsigned char chan; @@ -4242,12 +4273,12 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf, dev_dbg(intf->si_dev, "Invalid command: %*ph\n", msg->data_size, msg->data); - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr; @@ -5056,7 +5087,12 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent, ipmi_inc_stat(intf, retransmitted_ipmb_commands); - smi_send(intf, intf->handlers, smi_msg, 0); + /* If this fails we'll retry later or timeout. */ + if (smi_send(intf, intf->handlers, smi_msg, 0) != IPMI_CC_NO_ERROR) { + /* But fix the timeout. */ + intf_start_seq_timer(intf, smi_msg->msgid); + ipmi_free_smi_msg(smi_msg); + } } else ipmi_free_smi_msg(smi_msg); From cae66f1a1dcd23e17da5a015ef9d731129f9d2dd Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 13 Feb 2026 00:15:04 -0600 Subject: [PATCH 162/828] ipmi:si: Fix check for a misbehaving BMC There is a race on checking the state in the sender, it needs to be checked under a lock. But you also need a check to avoid issues with a misbehaving BMC for run to completion mode. So leave the check at the beginning for run to completion, and add a check under the lock to avoid the race. Reported-by: Rafael J. Wysocki Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard Reviewed-by: Rafael J. Wysocki (Intel) --- drivers/char/ipmi/ipmi_si_intf.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 3667033fcc51..6eda61664aaa 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -924,9 +924,14 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg) { struct smi_info *smi_info = send_info; unsigned long flags; + int rv = IPMI_CC_NO_ERROR; debug_timestamp(smi_info, "Enqueue"); + /* + * Check here for run to completion mode. A check under lock is + * later. + */ if (smi_info->si_state == SI_HOSED) return IPMI_BUS_ERR; @@ -940,18 +945,15 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg) } spin_lock_irqsave(&smi_info->si_lock, flags); - /* - * The following two lines don't need to be under the lock for - * the lock's sake, but they do need SMP memory barriers to - * avoid getting things out of order. We are already claiming - * the lock, anyway, so just do it under the lock to avoid the - * ordering problem. - */ - BUG_ON(smi_info->waiting_msg); - smi_info->waiting_msg = msg; - check_start_timer_thread(smi_info); + if (smi_info->si_state == SI_HOSED) { + rv = IPMI_BUS_ERR; + } else { + BUG_ON(smi_info->waiting_msg); + smi_info->waiting_msg = msg; + check_start_timer_thread(smi_info); + } spin_unlock_irqrestore(&smi_info->si_lock, flags); - return IPMI_CC_NO_ERROR; + return rv; } static void set_run_to_completion(void *send_info, bool i_run_to_completion) From 2a7b7652b1bb3fadc3bd47d622bfb127a93ab6b0 Mon Sep 17 00:00:00 2001 From: Leif Skunberg Date: Tue, 10 Feb 2026 14:21:29 +0100 Subject: [PATCH 163/828] platform/x86: int3472: Handle GPIO type 0x10 (DOVDD) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Lenovo ThinkPad X1 Fold 16 Gen 1 has an OV5675 sensor (ACPI HID OVTI5675) behind an INT3472 discrete PMIC controller. The INT3472 _DSM returns GPIO type 0x10 for one of the pins, which controls the DOVDD (digital I/O power) regulator enable. Type 0x10 is not currently handled by the driver, causing the GPIO to be ignored with a warning. Add INT3472_GPIO_TYPE_DOVDD (0x10) and handle it as a regulator with con_id "dovdd" to match the supply name used by sensor drivers (e.g. ov5675). Also increase GPIO_SUPPLY_NAME_LENGTH from 5 to 6 to accommodate the "dovdd" name (5 chars + null terminator). Signed-off-by: Leif Skunberg Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260210132129.17943-1-diamondback@cohunt.app Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/int3472/discrete.c | 7 +++++++ include/linux/platform_data/x86/int3472.h | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 1455d9a7afca..1c65ce87cde0 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -223,6 +223,10 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 *con_id = "avdd"; *gpio_flags = GPIO_ACTIVE_HIGH; break; + case INT3472_GPIO_TYPE_DOVDD: + *con_id = "dovdd"; + *gpio_flags = GPIO_ACTIVE_HIGH; + break; case INT3472_GPIO_TYPE_HANDSHAKE: *con_id = "dvdd"; *gpio_flags = GPIO_ACTIVE_HIGH; @@ -251,6 +255,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 * 0x0b Power enable * 0x0c Clock enable * 0x0d Privacy LED + * 0x10 DOVDD (digital I/O voltage) * 0x13 Hotplug detect * * There are some known platform specific quirks where that does not quite @@ -332,6 +337,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, case INT3472_GPIO_TYPE_CLK_ENABLE: case INT3472_GPIO_TYPE_PRIVACY_LED: case INT3472_GPIO_TYPE_POWER_ENABLE: + case INT3472_GPIO_TYPE_DOVDD: case INT3472_GPIO_TYPE_HANDSHAKE: gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, con_id, gpio_flags); if (IS_ERR(gpio)) { @@ -356,6 +362,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, case INT3472_GPIO_TYPE_POWER_ENABLE: second_sensor = int3472->quirks.avdd_second_sensor; fallthrough; + case INT3472_GPIO_TYPE_DOVDD: case INT3472_GPIO_TYPE_HANDSHAKE: ret = skl_int3472_register_regulator(int3472, gpio, enable_time_us, con_id, second_sensor); diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index b1b837583d54..dbe745dc88d5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -26,6 +26,7 @@ #define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d +#define INT3472_GPIO_TYPE_DOVDD 0x10 #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 #define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 @@ -33,8 +34,8 @@ #define INT3472_MAX_SENSOR_GPIOS 3 #define INT3472_MAX_REGULATORS 3 -/* E.g. "avdd\0" */ -#define GPIO_SUPPLY_NAME_LENGTH 5 +/* E.g. "dovdd\0" */ +#define GPIO_SUPPLY_NAME_LENGTH 6 /* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */ #define GPIO_REGULATOR_NAME_LENGTH (12 + GPIO_SUPPLY_NAME_LENGTH) /* lower- and upper-case mapping */ From b38d478dad79e61e8a65931021bdfd7a71741212 Mon Sep 17 00:00:00 2001 From: Leif Skunberg Date: Tue, 10 Feb 2026 09:56:25 +0100 Subject: [PATCH 164/828] platform/x86: intel-hid: Enable 5-button array on ThinkPad X1 Fold 16 Gen 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Lenovo ThinkPad X1 Fold 16 Gen 1 has physical volume up/down buttons that are handled through the intel-hid 5-button array interface. The firmware does not advertise 5-button array support via HEBC, so the driver relies on a DMI allowlist to enable it. Add the ThinkPad X1 Fold 16 Gen 1 to the button_array_table so the volume buttons work out of the box. Signed-off-by: Leif Skunberg Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260210085625.34380-1-diamondback@cohunt.app Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index a8ae2dba7ffd..95c405c8bac0 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -135,6 +135,13 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"), }, }, + { + .ident = "Lenovo ThinkPad X1 Fold 16 Gen 1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Fold 16 Gen 1"), + }, + }, { .ident = "Microsoft Surface Go 3", .matches = { From 249f05e625c6e6c14b27fd34a2f06a1afb9b456d Mon Sep 17 00:00:00 2001 From: Victor Lattaro Volpini Date: Tue, 10 Feb 2026 00:00:52 +0000 Subject: [PATCH 165/828] platform/x86: hp-wmi: Add Victus 16-d0xxx support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables Victus thermal profile support for the HP Victus 16-d0xxx. It does so by adding model's DMI board name 88F8 to victus_thermal_profile_boards. Tested on a Victus 16-d0xxx: - Victus thermal profile choices available (quiet, balanced, performance) instead of the default ones (cool, quiet, balanced, performance); - Profile switching works correctly; - About 4% increase in FPS using benchmark Cyberpunk 2077 on performance profile; - No noticeable regressions. Signed-off-by: Victor Lattaro Volpini Link: https://patch.msgid.link/20260210000048.250280-1-victorlattaro@proton.me Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index bde74a02ccb3..40165138cfbd 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -166,8 +166,9 @@ static const char * const omen_timed_thermal_profile_boards[] = { "8BAD", }; -/* DMI Board names of Victus 16-d1xxx laptops */ +/* DMI Board names of Victus 16-d laptops */ static const char * const victus_thermal_profile_boards[] = { + "88F8", "8A25", }; From 26a7601471f62b95d56a81c3a8ccb551b5a6630f Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Sat, 7 Feb 2026 12:16:34 -0500 Subject: [PATCH 166/828] platform/x86: dell-wmi: Add audio/mic mute key codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add audio/mic mute key codes found in Alienware m18 r1 AMD. Cc: stable@vger.kernel.org Tested-by: Olexa Bilaniuk Suggested-by: Olexa Bilaniuk Signed-off-by: Kurt Borja Acked-by: Pali Rohár Link: https://patch.msgid.link/20260207-mute-keys-v2-1-c55e5471c9c1@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-wmi-base.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 4eefbade2f5e..e7a411ae9ca1 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c @@ -80,6 +80,12 @@ static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = { static const struct key_entry dell_wmi_keymap_type_0000[] = { { KE_IGNORE, 0x003a, { KEY_CAPSLOCK } }, + /* Audio mute toggle */ + { KE_KEY, 0x0109, { KEY_MUTE } }, + + /* Mic mute toggle */ + { KE_KEY, 0x0150, { KEY_MICMUTE } }, + /* Meta key lock */ { KE_IGNORE, 0xe000, { KEY_RIGHTMETA } }, From 729ffcffa73069cb066fd54a2bc7b09e5f782d48 Mon Sep 17 00:00:00 2001 From: Anton Plotnikov Date: Tue, 3 Feb 2026 18:48:32 +0200 Subject: [PATCH 167/828] platform/x86: hp-wmi: add Omen 14-fb1xxx (board 8E41) support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverse engineering of the HP Omen Windows utility shows that for performance mode it uses the same codes listed in hp_thermal_profile_omen_v1. Therefore it seems sufficient to add the board model name to omen_thermal_profile_boards. Tested on Omen 14-fb1xxx: CPU power in performance profile reaches the Windows limit (65W), instead of 45W in automatic BIOS mode. Max fan speed was reached as well. Link: https://patch.msgid.link/20260203164832.40514-1-plotnikovanton@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 40165138cfbd..68ede7e5757a 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -146,6 +146,7 @@ static const char * const omen_thermal_profile_boards[] = { "8900", "8901", "8902", "8912", "8917", "8918", "8949", "894A", "89EB", "8A15", "8A42", "8BAD", + "8E41", }; /* DMI Board names of Omen laptops that are specifically set to be thermal From bd5914caeb4b2de233992c31babccda88041b035 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Thu, 29 Jan 2026 12:19:24 -0500 Subject: [PATCH 168/828] platform/x86: alienware-wmi-wmax: Add G-Mode support to m18 laptops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alienware m18 laptops support G-Mode. Therefore, match them with G-Series quirks. Cc: stable@vger.kernel.org Tested-by: Olexa Bilaniuk Signed-off-by: Kurt Borja Link: https://patch.msgid.link/20260129-m18-gmode-v1-1-48be521487b9@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index e69b50162bb1..d1b4df91401b 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -175,7 +175,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m18"), }, - .driver_data = &generic_quirks, + .driver_data = &g_series_quirks, }, { .ident = "Alienware x15", From e3aa6feaf0cc30a3c941d15ff04bc1005a8aa315 Mon Sep 17 00:00:00 2001 From: Jesse Guo Date: Mon, 26 Jan 2026 04:48:26 +0800 Subject: [PATCH 169/828] platform/x86: redmi-wmi: Add more hotkey mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds more Fn hotkeys (like Refresh rate toggle). Additionally, remap the setup key from KEY_SETUP to KEY_CONFIG. As KEY_CONFIG is supported by Desktop Environments for launching system settings, whereas KEY_SETUP is often ignored by userspace. Signed-off-by: Jesse Guo Reviewed-by: Gladyshev Ilya Link: https://patch.msgid.link/TYCPR01MB6851636256C39B170F2312E5D192A@TYCPR01MB6851.jpnprd01.prod.outlook.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/redmi-wmi.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/redmi-wmi.c b/drivers/platform/x86/redmi-wmi.c index 949236b93a32..e5cb348e3a39 100644 --- a/drivers/platform/x86/redmi-wmi.c +++ b/drivers/platform/x86/redmi-wmi.c @@ -20,7 +20,10 @@ static const struct key_entry redmi_wmi_keymap[] = { {KE_KEY, 0x00000201, {KEY_SELECTIVE_SCREENSHOT}}, {KE_KEY, 0x00000301, {KEY_ALL_APPLICATIONS}}, - {KE_KEY, 0x00001b01, {KEY_SETUP}}, + {KE_KEY, 0x00001b01, {KEY_CONFIG}}, + {KE_KEY, 0x00011b01, {KEY_CONFIG}}, + {KE_KEY, 0x00010101, {KEY_SWITCHVIDEOMODE}}, + {KE_KEY, 0x00001a01, {KEY_REFRESH_RATE_TOGGLE}}, /* AI button has code for each position */ {KE_KEY, 0x00011801, {KEY_ASSISTANT}}, @@ -32,6 +35,26 @@ static const struct key_entry redmi_wmi_keymap[] = { {KE_IGNORE, 0x00050501, {}}, {KE_IGNORE, 0x000a0501, {}}, + /* Xiaomi G Command Center */ + {KE_KEY, 0x00010a01, {KEY_VENDOR}}, + + /* OEM preset power mode */ + {KE_IGNORE, 0x00011601, {}}, + {KE_IGNORE, 0x00021601, {}}, + {KE_IGNORE, 0x00031601, {}}, + {KE_IGNORE, 0x00041601, {}}, + + /* Fn Lock state */ + {KE_IGNORE, 0x00000701, {}}, + {KE_IGNORE, 0x00010701, {}}, + + /* Fn+`/1/2/3/4 */ + {KE_KEY, 0x00011101, {KEY_F13}}, + {KE_KEY, 0x00011201, {KEY_F14}}, + {KE_KEY, 0x00011301, {KEY_F15}}, + {KE_KEY, 0x00011401, {KEY_F16}}, + {KE_KEY, 0x00011501, {KEY_F17}}, + {KE_END} }; From 318c58852e686c009825ae8c071080b9ccdd2af0 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Wed, 11 Feb 2026 14:22:27 -0500 Subject: [PATCH 170/828] cxl/memdev: fix deadlock in cxl_memdev_autoremove() on attach failure cxl_memdev_autoremove() takes device_lock(&cxlmd->dev) via guard(device) and then calls cxl_memdev_unregister() when the attach callback was provided but cxl_mem_probe() failed to bind. cxl_memdev_unregister() calls cdev_device_del() device_del() bus_remove_device() device_release_driver() This path is reached when a driver uses the @attach parameter to devm_cxl_add_memdev() and the CXL topology fails to enumerate (e.g. DVSEC range registers decode outside platform-defined CXL ranges, causing the endpoint port probe to fail). Add cxl_memdev_attach_failed() to set the scope of the check correctly. Reported-by: kreview-c94b85d6d2 Fixes: 29317f8dc6ed ("cxl/mem: Introduce cxl_memdev_attach for CXL-dependent operation") Signed-off-by: Gregory Price Reviewed-by: Dan Williams Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/20260211192228.2148713-1-gourry@gourry.net Signed-off-by: Dave Jiang --- drivers/cxl/core/memdev.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index f547d8ac34c7..273c22118d3d 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -1089,10 +1089,8 @@ static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds) DEFINE_FREE(put_cxlmd, struct cxl_memdev *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) -static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +static bool cxl_memdev_attach_failed(struct cxl_memdev *cxlmd) { - int rc; - /* * If @attach is provided fail if the driver is not attached upon * return. Note that failure here could be the result of a race to @@ -1100,7 +1098,14 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) * succeeded and then cxl_mem unbound before the lock is acquired. */ guard(device)(&cxlmd->dev); - if (cxlmd->attach && !cxlmd->dev.driver) { + return (cxlmd->attach && !cxlmd->dev.driver); +} + +static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +{ + int rc; + + if (cxl_memdev_attach_failed(cxlmd)) { cxl_memdev_unregister(cxlmd); return ERR_PTR(-ENXIO); } From 2d53dfacf067968a9ff90c03700fb9b28049f4c6 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 18 Feb 2026 01:50:58 +0100 Subject: [PATCH 171/828] platform/x86: uniwill-laptop: Rename FN lock and super key lock attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that both sysfs attributes actually directly control the FN lock status/super key enable status, rather than the triggering of the associated events. This behavior was first observed on a Tuxedo notebook and was belived to be a hardware quirk. However, it seems that i simply misunderstood the manual of the OEM software for Intel NUC devices. The correct behavior is: - fn_lock_toggle_enable enables/disables FN lock mode - super_key_toggle_enable enables/disables the super key Rename both sysfs attributes to avoid confusing users. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260218005101.73680-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../ABI/testing/sysfs-driver-uniwill-laptop | 10 ++-- .../admin-guide/laptops/uniwill-laptop.rst | 2 +- drivers/platform/x86/uniwill/uniwill-acpi.c | 58 ++++++++++--------- drivers/platform/x86/uniwill/uniwill-wmi.h | 6 +- 4 files changed, 39 insertions(+), 37 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-uniwill-laptop b/Documentation/ABI/testing/sysfs-driver-uniwill-laptop index eaeb659793d2..2df70792968f 100644 --- a/Documentation/ABI/testing/sysfs-driver-uniwill-laptop +++ b/Documentation/ABI/testing/sysfs-driver-uniwill-laptop @@ -1,4 +1,4 @@ -What: /sys/bus/platform/devices/INOU0000:XX/fn_lock_toggle_enable +What: /sys/bus/platform/devices/INOU0000:XX/fn_lock Date: November 2025 KernelVersion: 6.19 Contact: Armin Wolf @@ -8,15 +8,15 @@ Description: Reading this file returns the current enable status of the FN lock functionality. -What: /sys/bus/platform/devices/INOU0000:XX/super_key_toggle_enable +What: /sys/bus/platform/devices/INOU0000:XX/super_key_enable Date: November 2025 KernelVersion: 6.19 Contact: Armin Wolf Description: - Allows userspace applications to enable/disable the super key functionality - of the integrated keyboard by writing "1"/"0" into this file. + Allows userspace applications to enable/disable the super key of the integrated + keyboard by writing "1"/"0" into this file. - Reading this file returns the current enable status of the super key functionality. + Reading this file returns the current enable status of the super key. What: /sys/bus/platform/devices/INOU0000:XX/touchpad_toggle_enable Date: November 2025 diff --git a/Documentation/admin-guide/laptops/uniwill-laptop.rst b/Documentation/admin-guide/laptops/uniwill-laptop.rst index a16baf15516b..aff5f57a6bd4 100644 --- a/Documentation/admin-guide/laptops/uniwill-laptop.rst +++ b/Documentation/admin-guide/laptops/uniwill-laptop.rst @@ -24,7 +24,7 @@ Keyboard settings The ``uniwill-laptop`` driver allows the user to enable/disable: - - the FN and super key lock functionality of the integrated keyboard + - the FN lock and super key of the integrated keyboard - the touchpad toggle functionality of the integrated touchpad See Documentation/ABI/testing/sysfs-driver-uniwill-laptop for details. diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index fee93537aa43..440724016885 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -314,8 +314,8 @@ #define LED_CHANNELS 3 #define LED_MAX_BRIGHTNESS 200 -#define UNIWILL_FEATURE_FN_LOCK_TOGGLE BIT(0) -#define UNIWILL_FEATURE_SUPER_KEY_TOGGLE BIT(1) +#define UNIWILL_FEATURE_FN_LOCK BIT(0) +#define UNIWILL_FEATURE_SUPER_KEY BIT(1) #define UNIWILL_FEATURE_TOUCHPAD_TOGGLE BIT(2) #define UNIWILL_FEATURE_LIGHTBAR BIT(3) #define UNIWILL_FEATURE_BATTERY BIT(4) @@ -377,11 +377,15 @@ static const struct key_entry uniwill_keymap[] = { { KE_IGNORE, UNIWILL_OSD_CAPSLOCK, { KEY_CAPSLOCK }}, { KE_IGNORE, UNIWILL_OSD_NUMLOCK, { KEY_NUMLOCK }}, - /* Reported when the user locks/unlocks the super key */ - { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_LOCK_ENABLE, { KEY_UNKNOWN }}, - { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_LOCK_DISABLE, { KEY_UNKNOWN }}, + /* + * Reported when the user enables/disables the super key. + * Those events might even be reported when the change was done + * using the sysfs attribute! + */ + { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_DISABLE, { KEY_UNKNOWN }}, + { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_ENABLE, { KEY_UNKNOWN }}, /* Optional, might not be reported by all devices */ - { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_LOCK_CHANGED, { KEY_UNKNOWN }}, + { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_STATE_CHANGED, { KEY_UNKNOWN }}, /* Reported in manual mode when toggling the airplane mode status */ { KE_KEY, UNIWILL_OSD_RFKILL, { KEY_RFKILL }}, @@ -600,8 +604,8 @@ static const struct regmap_config uniwill_ec_config = { .use_single_write = true, }; -static ssize_t fn_lock_toggle_enable_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t fn_lock_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -624,8 +628,7 @@ static ssize_t fn_lock_toggle_enable_store(struct device *dev, struct device_att return count; } -static ssize_t fn_lock_toggle_enable_show(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t fn_lock_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -638,10 +641,10 @@ static ssize_t fn_lock_toggle_enable_show(struct device *dev, struct device_attr return sysfs_emit(buf, "%d\n", !!(value & FN_LOCK_STATUS)); } -static DEVICE_ATTR_RW(fn_lock_toggle_enable); +static DEVICE_ATTR_RW(fn_lock); -static ssize_t super_key_toggle_enable_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t super_key_enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -673,8 +676,7 @@ static ssize_t super_key_toggle_enable_store(struct device *dev, struct device_a return count; } -static ssize_t super_key_toggle_enable_show(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t super_key_enable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -687,7 +689,7 @@ static ssize_t super_key_toggle_enable_show(struct device *dev, struct device_at return sysfs_emit(buf, "%d\n", !(value & SUPER_KEY_LOCK_STATUS)); } -static DEVICE_ATTR_RW(super_key_toggle_enable); +static DEVICE_ATTR_RW(super_key_enable); static ssize_t touchpad_toggle_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -881,8 +883,8 @@ static int uniwill_nvidia_ctgp_init(struct uniwill_data *data) static struct attribute *uniwill_attrs[] = { /* Keyboard-related */ - &dev_attr_fn_lock_toggle_enable.attr, - &dev_attr_super_key_toggle_enable.attr, + &dev_attr_fn_lock.attr, + &dev_attr_super_key_enable.attr, &dev_attr_touchpad_toggle_enable.attr, /* Lightbar-related */ &dev_attr_rainbow_animation.attr, @@ -897,13 +899,13 @@ static umode_t uniwill_attr_is_visible(struct kobject *kobj, struct attribute *a struct device *dev = kobj_to_dev(kobj); struct uniwill_data *data = dev_get_drvdata(dev); - if (attr == &dev_attr_fn_lock_toggle_enable.attr) { - if (uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK_TOGGLE)) + if (attr == &dev_attr_fn_lock.attr) { + if (uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) return attr->mode; } - if (attr == &dev_attr_super_key_toggle_enable.attr) { - if (uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY_TOGGLE)) + if (attr == &dev_attr_super_key_enable.attr) { + if (uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return attr->mode; } @@ -1505,7 +1507,7 @@ static void uniwill_shutdown(struct platform_device *pdev) static int uniwill_suspend_keyboard(struct uniwill_data *data) { - if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY_TOGGLE)) + if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return 0; /* @@ -1565,7 +1567,7 @@ static int uniwill_resume_keyboard(struct uniwill_data *data) unsigned int value; int ret; - if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY_TOGGLE)) + if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return 0; ret = regmap_read(data->regmap, EC_ADDR_SWITCH_STATUS, &value); @@ -1643,16 +1645,16 @@ static struct platform_driver uniwill_driver = { }; static struct uniwill_device_descriptor lapac71h_descriptor __initdata = { - .features = UNIWILL_FEATURE_FN_LOCK_TOGGLE | - UNIWILL_FEATURE_SUPER_KEY_TOGGLE | + .features = UNIWILL_FEATURE_FN_LOCK | + UNIWILL_FEATURE_SUPER_KEY | UNIWILL_FEATURE_TOUCHPAD_TOGGLE | UNIWILL_FEATURE_BATTERY | UNIWILL_FEATURE_HWMON, }; static struct uniwill_device_descriptor lapkc71f_descriptor __initdata = { - .features = UNIWILL_FEATURE_FN_LOCK_TOGGLE | - UNIWILL_FEATURE_SUPER_KEY_TOGGLE | + .features = UNIWILL_FEATURE_FN_LOCK | + UNIWILL_FEATURE_SUPER_KEY | UNIWILL_FEATURE_TOUCHPAD_TOGGLE | UNIWILL_FEATURE_LIGHTBAR | UNIWILL_FEATURE_BATTERY | diff --git a/drivers/platform/x86/uniwill/uniwill-wmi.h b/drivers/platform/x86/uniwill/uniwill-wmi.h index 48783b2e9ffb..fb1910c0f741 100644 --- a/drivers/platform/x86/uniwill/uniwill-wmi.h +++ b/drivers/platform/x86/uniwill/uniwill-wmi.h @@ -64,8 +64,8 @@ #define UNIWILL_OSD_KB_LED_LEVEL3 0x3E #define UNIWILL_OSD_KB_LED_LEVEL4 0x3F -#define UNIWILL_OSD_SUPER_KEY_LOCK_ENABLE 0x40 -#define UNIWILL_OSD_SUPER_KEY_LOCK_DISABLE 0x41 +#define UNIWILL_OSD_SUPER_KEY_DISABLE 0x40 +#define UNIWILL_OSD_SUPER_KEY_ENABLE 0x41 #define UNIWILL_OSD_MENU_JP 0x42 @@ -74,7 +74,7 @@ #define UNIWILL_OSD_RFKILL 0xA4 -#define UNIWILL_OSD_SUPER_KEY_LOCK_CHANGED 0xA5 +#define UNIWILL_OSD_SUPER_KEY_STATE_CHANGED 0xA5 #define UNIWILL_OSD_LIGHTBAR_STATE_CHANGED 0xA6 From 67e7eb4c130a74c5da9ab43316e00ed3186d1811 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 18 Feb 2026 01:50:59 +0100 Subject: [PATCH 172/828] platform/x86: uniwill-laptop: Fix crash on unexpected battery event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On devices that have not UNIWILL_FEATURE_BATTERY set, the underlying hardware might still send the UNIWILL_OSD_BATTERY_ALERT event. In such a situation, the driver will access uninitialized data structures when handling said event. Prevent this by only handling the UNIWILL_OSD_BATTERY_ALERT event when UNIWILL_FEATURE_BATTERY is set. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260218005101.73680-3-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/uniwill/uniwill-acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index 440724016885..e0f3740a6d3a 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -1359,6 +1359,9 @@ static int uniwill_notifier_call(struct notifier_block *nb, unsigned long action switch (action) { case UNIWILL_OSD_BATTERY_ALERT: + if (!uniwill_device_supports(data, UNIWILL_FEATURE_BATTERY)) + return NOTIFY_DONE; + mutex_lock(&data->battery_lock); list_for_each_entry(entry, &data->batteries, head) { power_supply_changed(entry->battery); From 2be519d94544e226636c185e28ec1a72d01aab87 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 18 Feb 2026 01:51:00 +0100 Subject: [PATCH 173/828] platform/x86: uniwill-laptop: Mark FN lock status as being volatile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that the FN lock status can be changed by the underlying hardware when the user presses a special key combination. Mark the associated register as volatile to prevent regmap from caching said value. Also add the necessary suspend/resume handling. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260218005101.73680-4-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/uniwill/uniwill-acpi.c | 39 ++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index e0f3740a6d3a..73b3d909e2b0 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -330,6 +330,7 @@ struct uniwill_data { struct acpi_battery_hook hook; unsigned int last_charge_ctrl; struct mutex battery_lock; /* Protects the list of currently registered batteries */ + unsigned int last_status; unsigned int last_switch_status; struct mutex super_key_lock; /* Protects the toggling of the super key lock state */ struct list_head batteries; @@ -580,6 +581,7 @@ static bool uniwill_volatile_reg(struct device *dev, unsigned int reg) case EC_ADDR_SECOND_FAN_RPM_1: case EC_ADDR_SECOND_FAN_RPM_2: case EC_ADDR_BAT_ALERT: + case EC_ADDR_BIOS_OEM: case EC_ADDR_PWM_1: case EC_ADDR_PWM_2: case EC_ADDR_TRIGGER: @@ -1508,7 +1510,19 @@ static void uniwill_shutdown(struct platform_device *pdev) regmap_clear_bits(data->regmap, EC_ADDR_AP_OEM, ENABLE_MANUAL_CTRL); } -static int uniwill_suspend_keyboard(struct uniwill_data *data) +static int uniwill_suspend_fn_lock(struct uniwill_data *data) +{ + if (!uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) + return 0; + + /* + * The EC_ADDR_BIOS_OEM is marked as volatile, so we have to restore it + * ourselves. + */ + return regmap_read(data->regmap, EC_ADDR_BIOS_OEM, &data->last_status); +} + +static int uniwill_suspend_super_key(struct uniwill_data *data) { if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return 0; @@ -1547,7 +1561,11 @@ static int uniwill_suspend(struct device *dev) struct uniwill_data *data = dev_get_drvdata(dev); int ret; - ret = uniwill_suspend_keyboard(data); + ret = uniwill_suspend_fn_lock(data); + if (ret < 0) + return ret; + + ret = uniwill_suspend_super_key(data); if (ret < 0) return ret; @@ -1565,7 +1583,16 @@ static int uniwill_suspend(struct device *dev) return 0; } -static int uniwill_resume_keyboard(struct uniwill_data *data) +static int uniwill_resume_fn_lock(struct uniwill_data *data) +{ + if (!uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) + return 0; + + return regmap_update_bits(data->regmap, EC_ADDR_BIOS_OEM, FN_LOCK_STATUS, + data->last_status); +} + +static int uniwill_resume_super_key(struct uniwill_data *data) { unsigned int value; int ret; @@ -1613,7 +1640,11 @@ static int uniwill_resume(struct device *dev) if (ret < 0) return ret; - ret = uniwill_resume_keyboard(data); + ret = uniwill_resume_fn_lock(data); + if (ret < 0) + return ret; + + ret = uniwill_resume_super_key(data); if (ret < 0) return ret; From 9836feedcf559449e82eb0f741084086780104e5 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 18 Feb 2026 01:51:01 +0100 Subject: [PATCH 174/828] platform/x86: uniwill-laptop: Handle FN lock event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On many devices, the user can toggle the Fn lock state by pressing Fn + Esc. Forward the associated event to the fn_lock sysfs attribute as a poll notification. Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260218005101.73680-5-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/uniwill/uniwill-acpi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index 73b3d909e2b0..6341dca20b76 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -406,9 +406,6 @@ static const struct key_entry uniwill_keymap[] = { /* Reported when the user wants to toggle the mute status */ { KE_IGNORE, UNIWILL_OSD_MUTE, { KEY_MUTE }}, - /* Reported when the user locks/unlocks the Fn key */ - { KE_IGNORE, UNIWILL_OSD_FN_LOCK, { KEY_FN_ESC }}, - /* Reported when the user wants to toggle the brightness of the keyboard */ { KE_KEY, UNIWILL_OSD_KBDILLUMTOGGLE, { KEY_KBDILLUMTOGGLE }}, { KE_KEY, UNIWILL_OSD_KB_LED_LEVEL0, { KEY_KBDILLUMTOGGLE }}, @@ -1376,6 +1373,13 @@ static int uniwill_notifier_call(struct notifier_block *nb, unsigned long action * gets implemented. */ + return NOTIFY_OK; + case UNIWILL_OSD_FN_LOCK: + if (!uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) + return NOTIFY_DONE; + + sysfs_notify(&data->dev->kobj, NULL, "fn_lock"); + return NOTIFY_OK; default: mutex_lock(&data->input_lock); From ec197dca8735f7627e5cff7e3fa8839b53a28514 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 22 Feb 2026 08:33:52 +0000 Subject: [PATCH 175/828] KVM: arm64: Revert accidental drop of kvm_uninit_stage2_mmu() for non-NV VMs Commit 0c4762e26879 ("KVM: arm64: nv: Avoid NV stage-2 code when NV is not supported") added an early return to several functions in arch/arm64/kvm/nested.c to prevent a UBSAN shift-out-of-bounds error when accessing the pgt union for non-nested VMs. However, this early return was inadvertently applied to kvm_arch_flush_shadow_all() as well, causing it to skip the call to kvm_uninit_stage2_mmu(kvm) for all non-nested VMs. For pKVM, skipping this teardown means the host never unshares the guest's memory with the EL2 hypervisor. When the host kernel later recycles these leaked pages for a new VM, it attempts to re-share them. The hypervisor correctly rejects this with -EPERM, triggering a host WARN_ON and hanging the guest. Fix this by dropping the early return from kvm_arch_flush_shadow_all(). The for-loop guarding the nested MMU cleanup already bounds itself when nested_mmus_size == 0, allowing execution to proceed to kvm_uninit_stage2_mmu() as intended. Reported-by: Mark Brown Closes: https://lore.kernel.org/all/60916cb6-f460-4751-b910-f63c58700ad0@sirena.org.uk/ Fixes: 0c4762e26879 ("KVM: arm64: nv: Avoid NV stage-2 code when NV is not supported") Signed-off-by: Fuad Tabba Tested-by: Mark Brown Link: https://patch.msgid.link/20260222083352.89503-1-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/nested.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index eeea5e692370..7f1ea85dc67a 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1154,9 +1154,6 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) { int i; - if (!kvm->arch.nested_mmus_size) - return; - for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; From 822655e6751dde2df7ddaa828c5aba217726c5a2 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 23 Feb 2026 09:29:00 -0700 Subject: [PATCH 176/828] cxl/port: Introduce port_to_host() helper In CXL subsystem, a port has its own host device for the port creation and removal. The host of CXL root and all the first level ports is the platform firmware device, the host of other ports is their parent port's device. Create this new helper to much easier to get the host of a cxl port. Introduce port_to_host() and use it to replace all places where using open coded to get the host of a port. Remove endpoint_host() as its functionality can be replaced by port_to_host(). [dj: Squashed commit 1 and 3 in the series to commit 1. ] Signed-off-by: Li Ming Tested-by: Alison Schofield Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260210-fix-port-enumeration-failure-v3-1-06acce0b9ead@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 18 ++++++++++++++++++ drivers/cxl/core/port.c | 29 +++-------------------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 007b8aff0238..5b0570df0fd9 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -152,6 +152,24 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +static inline struct device *port_to_host(struct cxl_port *port) +{ + struct cxl_port *parent = is_cxl_root(port) ? NULL : + to_cxl_port(port->dev.parent); + + /* + * The host of CXL root port and the first level of ports is + * the platform firmware device, the host of all other ports + * is their parent port. + */ + if (!parent) + return port->uport_dev; + else if (is_cxl_root(parent)) + return parent->uport_dev; + else + return &parent->dev; +} + static inline struct device *dport_to_host(struct cxl_dport *dport) { struct cxl_port *port = dport->port; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b69c2529744c..6be150e645b6 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -615,22 +615,8 @@ struct cxl_port *parent_port_of(struct cxl_port *port) static void unregister_port(void *_port) { struct cxl_port *port = _port; - struct cxl_port *parent = parent_port_of(port); - struct device *lock_dev; - /* - * CXL root port's and the first level of ports are unregistered - * under the platform firmware device lock, all other ports are - * unregistered while holding their parent port lock. - */ - if (!parent) - lock_dev = port->uport_dev; - else if (is_cxl_root(parent)) - lock_dev = parent->uport_dev; - else - lock_dev = &parent->dev; - - device_lock_assert(lock_dev); + device_lock_assert(port_to_host(port)); port->dead = true; device_unregister(&port->dev); } @@ -1427,20 +1413,11 @@ static struct device *grandparent(struct device *dev) return NULL; } -static struct device *endpoint_host(struct cxl_port *endpoint) -{ - struct cxl_port *port = to_cxl_port(endpoint->dev.parent); - - if (is_cxl_root(port)) - return port->uport_dev; - return &port->dev; -} - static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct device *host = endpoint_host(endpoint); + struct device *host = port_to_host(endpoint); scoped_guard(device, host) { if (host->driver && !endpoint->dead) { @@ -1456,7 +1433,7 @@ static void delete_endpoint(void *data) int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { - struct device *host = endpoint_host(endpoint); + struct device *host = port_to_host(endpoint); struct device *dev = &cxlmd->dev; get_device(host); From 0066688dbcdcf51680f499936faffe6d0e94194e Mon Sep 17 00:00:00 2001 From: Li Ming Date: Tue, 10 Feb 2026 19:46:57 +0800 Subject: [PATCH 177/828] cxl/port: Hold port host lock during dport adding. CXL testing environment can trigger following trace Oops: general protection fault, probably for non-canonical address 0xdffffc0000000092: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000490-0x0000000000000497] RIP: 0010:cxl_dpa_to_region+0x105/0x1f0 [cxl_core] Call Trace: cxl_event_trace_record+0xd1/0xa70 [cxl_core] __cxl_event_trace_record+0x12f/0x1e0 [cxl_core] cxl_mem_get_records_log+0x261/0x500 [cxl_core] cxl_mem_get_event_records+0x7c/0xc0 [cxl_core] cxl_mock_mem_probe+0xd38/0x1c60 [cxl_mock_mem] platform_probe+0x9d/0x130 really_probe+0x1c8/0x960 __driver_probe_device+0x187/0x3e0 driver_probe_device+0x45/0x120 __device_attach_driver+0x15d/0x280 When CXL subsystem adds a cxl port to a hierarchy, there is a small window where the new port becomes visible before it is bound to a driver. This happens because device_add() adds a device to bus device list before bus_probe_device() binds it to a driver. So if two cxl memdevs are trying to add a dport to a same port via devm_cxl_enumerate_ports(), the second cxl memdev may observe the port and attempt to add a dport, but fails because the port has not yet been attached to cxl port driver. That causes the memdev->endpoint can not be updated. The sequence is like: CPU 0 CPU 1 devm_cxl_enumerate_ports() # port not found, add it add_port_attach_ep() # hold the parent port lock # to add the new port devm_cxl_create_port() device_add() # Add dev to bus devs list bus_add_device() devm_cxl_enumerate_ports() # found the port find_cxl_port_by_uport() # hold port lock to add a dport device_lock(the port) find_or_add_dport() cxl_port_add_dport() return -ENXIO because port->dev.driver is NULL device_unlock(the port) bus_probe_device() # hold the port lock # for attaching device_lock(the port) attaching the new port device_unlock(the port) To fix this race, require that dport addition holds the host lock of the target port(the host of CXL root and all cxl host bridge ports is the platform firmware device, the host of all other ports is their parent port). The CXL subsystem already requires holding the host lock while attaching a new port. Therefore, successfully acquiring the host lock guarantees that port attaching has completed. Fixes: 4f06d81e7c6a ("cxl: Defer dport allocation for switch ports") Signed-off-by: Li Ming Reviewed-by: Dan Williams Tested-by: Alison Schofield Link: https://patch.msgid.link/20260210-fix-port-enumeration-failure-v3-2-06acce0b9ead@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 6be150e645b6..0c5957d1d329 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1767,7 +1767,16 @@ static struct cxl_dport *find_or_add_dport(struct cxl_port *port, { struct cxl_dport *dport; - device_lock_assert(&port->dev); + /* + * The port is already visible in CXL hierarchy, but it may still + * be in the process of binding to the CXL port driver at this point. + * + * port creation and driver binding are protected by the port's host + * lock, so acquire the host lock here to ensure the port has completed + * driver binding before proceeding with dport addition. + */ + guard(device)(port_to_host(port)); + guard(device)(&port->dev); dport = cxl_find_dport_by_dev(port, dport_dev); if (!dport) { dport = probe_dport(port, dport_dev); @@ -1834,13 +1843,11 @@ retry: * RP port enumerated by cxl_acpi without dport will * have the dport added here. */ - scoped_guard(device, &port->dev) { - dport = find_or_add_dport(port, dport_dev); - if (IS_ERR(dport)) { - if (PTR_ERR(dport) == -EAGAIN) - goto retry; - return PTR_ERR(dport); - } + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); } rc = cxl_add_ep(dport, &cxlmd->dev); From 9d851afa482680bdd7c158cc8720284dc9ecb5fe Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Sat, 21 Feb 2026 23:40:42 +0800 Subject: [PATCH 178/828] selftests/sched_ext: Abort test loop on signal The runner sets exit_req on SIGINT/SIGTERM but ignores it during the main loop. This prevents users from cleanly interrupting a test run. Check exit_req each iteration to safely break out on exit signals. Signed-off-by: Cheng-Yang Chou Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/runner.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c index 5748d2c69903..761c21f96404 100644 --- a/tools/testing/selftests/sched_ext/runner.c +++ b/tools/testing/selftests/sched_ext/runner.c @@ -166,6 +166,9 @@ int main(int argc, char **argv) enum scx_test_status status; struct scx_test *test = &__scx_tests[i]; + if (exit_req) + break; + if (list) { printf("%s\n", test->name); if (i == (__scx_num_tests - 1)) From 08fe1b5166fdc81b010d7bf39cd6440620e7931e Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 5 Feb 2026 22:02:37 -0800 Subject: [PATCH 179/828] accel/amdxdna: Remove buffer size check when creating command BO Large command buffers may be used, and they do not always need to be mapped or accessed by the driver. Performing a size check at command BO creation time unnecessarily rejects valid use cases. Remove the buffer size check from command BO creation, and defer vmap and size validation to the paths where the driver actually needs to map and access the command buffer. Fixes: ac49797c1815 ("accel/amdxdna: Add GEM buffer object management") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260206060237.4050492-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_gem.c | 38 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 8c290ddd3251..d60db49ead71 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -21,8 +21,6 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_ubuf.h" -#define XDNA_MAX_CMD_BO_SIZE SZ_32K - MODULE_IMPORT_NS("DMA_BUF"); static int @@ -745,12 +743,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, { struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_gem_obj *abo; - int ret; - - if (args->size > XDNA_MAX_CMD_BO_SIZE) { - XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size); - return ERR_PTR(-EINVAL); - } if (args->size < sizeof(struct amdxdna_cmd)) { XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size); @@ -764,17 +756,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); - if (ret) { - XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); - goto release_obj; - } - return abo; - -release_obj: - drm_gem_object_put(to_gobj(abo)); - return ERR_PTR(ret); } int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -871,6 +853,7 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, struct amdxdna_dev *xdna = client->xdna; struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; + int ret; gobj = drm_gem_object_lookup(client->filp, bo_hdl); if (!gobj) { @@ -879,9 +862,26 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, } abo = to_xdna_obj(gobj); - if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type) + if (bo_type != AMDXDNA_BO_INVALID && abo->type != bo_type) + goto put_obj; + + if (bo_type != AMDXDNA_BO_CMD || abo->mem.kva) return abo; + if (abo->mem.size > SZ_32K) { + XDNA_ERR(xdna, "Cmd bo is too big %ld", abo->mem.size); + goto put_obj; + } + + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); + if (ret) { + XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); + goto put_obj; + } + + return abo; + +put_obj: drm_gem_object_put(gobj); return NULL; } From c68a6af400ca80596e8c37de0a1cb564aa9da8a4 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 5 Feb 2026 22:02:51 -0800 Subject: [PATCH 180/828] accel/amdxdna: Switch to always use chained command Preempt commands are only supported when submitted as chained commands. To ensure preempt support works consistently, always submit commands in chained command format. Set force_cmdlist to true so that single commands are filled using the chained command layout, enabling correct handling of preempt commands. Fixes: 3a0ff7b98af4 ("accel/amdxdna: Support preemption requests") Reviewed-by: Karol Wachowski Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260206060251.4050512-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 4503c7c77a3e..7140c3f96362 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -23,9 +23,9 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_pm.h" -static bool force_cmdlist; +static bool force_cmdlist = true; module_param(force_cmdlist, bool, 0600); -MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)"); #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ From 8363c02863332992a1822688da41f881d88d1631 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 5 Feb 2026 22:03:06 -0800 Subject: [PATCH 181/828] accel/amdxdna: Fix crash when destroying a suspended hardware context If userspace issues an ioctl to destroy a hardware context that has already been automatically suspended, the driver may crash because the mailbox channel pointer is NULL for the suspended context. Fix this by checking the mailbox channel pointer in aie2_destroy_context() before accessing it. Fixes: 97f27573837e ("accel/amdxdna: Fix potential NULL pointer dereference in context cleanup") Reviewed-by: Karol Wachowski Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260206060306.4050531-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 7d7dcfeaf794..ab1178850c47 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -318,6 +318,9 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc struct amdxdna_dev *xdna = ndev->xdna; int ret; + if (!hwctx->priv->mbox_chann) + return 0; + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); From 57aa3917a3b3bd805a3679371f97a1ceda3c5510 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 Feb 2026 10:42:51 -0600 Subject: [PATCH 182/828] accel/amdxdna: Reduce log noise during process termination During process termination, several error messages are logged that are not actual errors but expected conditions when a process is killed or interrupted. This creates unnecessary noise in the kernel log. The specific scenarios are: 1. HMM invalidation returns -ERESTARTSYS when the wait is interrupted by a signal during process cleanup. This is expected when a process is being terminated and should not be logged as an error. 2. Context destruction returns -ENODEV when the firmware or device has already stopped, which commonly occurs during cleanup if the device was already torn down. This is also an expected condition during orderly shutdown. Downgrade these expected error conditions from error level to debug level to reduce log noise while still keeping genuine errors visible. Fixes: 97f27573837e ("accel/amdxdna: Fix potential NULL pointer dereference in context cleanup") Reviewed-by: Lizhi Hou Signed-off-by: Mario Limonciello Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260210164521.1094274-3-mario.limonciello@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 6 ++++-- drivers/accel/amdxdna/aie2_message.c | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 7140c3f96362..e13be7608462 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -497,7 +497,7 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx) if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { ret = aie2_destroy_context(xdna->dev_handle, hwctx); - if (ret) + if (ret && ret != -ENODEV) XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret); } else { ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); @@ -1070,6 +1070,8 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, true, MAX_SCHEDULE_TIMEOUT); - if (!ret || ret == -ERESTARTSYS) + if (!ret) XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); + else if (ret == -ERESTARTSYS) + XDNA_DBG(xdna, "Wait for bo interrupted by signal"); } diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index ab1178850c47..5d80c5837745 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -216,8 +216,10 @@ static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id) req.context_id = id; ret = aie2_send_mgmt_msg_wait(ndev, &msg); - if (ret) + if (ret && ret != -ENODEV) XDNA_WARN(xdna, "Destroy context failed, ret %d", ret); + else if (ret == -ENODEV) + XDNA_DBG(xdna, "Destroy context: device already stopped"); return ret; } From 1aa82181a3c285c7351523d587f7981ae4c015c8 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 11 Feb 2026 12:46:44 -0800 Subject: [PATCH 183/828] accel/amdxdna: Fix dead lock for suspend and resume When an application issues a query IOCTL while auto suspend is running, a deadlock can occur. The query path holds dev_lock and then calls pm_runtime_resume_and_get(), which waits for the ongoing suspend to complete. Meanwhile, the suspend callback attempts to acquire dev_lock and blocks, resulting in a deadlock. Fix this by releasing dev_lock before calling pm_runtime_resume_and_get() and reacquiring it after the call completes. Also acquire dev_lock in the resume callback to keep the locking consistent. Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260211204644.722758-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 4 ++-- drivers/accel/amdxdna/aie2_pci.c | 7 +++---- drivers/accel/amdxdna/aie2_pm.c | 2 +- drivers/accel/amdxdna/amdxdna_ctx.c | 19 +++++++------------ drivers/accel/amdxdna/amdxdna_pm.c | 2 ++ drivers/accel/amdxdna/amdxdna_pm.h | 11 +++++++++++ 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index e13be7608462..8d79fafd889a 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -629,7 +629,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) goto free_entity; } - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto free_col_list; @@ -760,7 +760,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size if (!hwctx->cus) return -ENOMEM; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto free_cus; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 2a51b2658bfc..07e369507818 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -451,7 +451,6 @@ static int aie2_hw_suspend(struct amdxdna_dev *xdna) { struct amdxdna_client *client; - guard(mutex)(&xdna->dev_lock); list_for_each_entry(client, &xdna->client_list, node) aie2_hwctx_suspend(client); @@ -951,7 +950,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; @@ -1044,7 +1043,7 @@ static int aie2_get_array(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; @@ -1134,7 +1133,7 @@ static int aie2_set_state(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c index 579b8be13b18..29bd4403a94d 100644 --- a/drivers/accel/amdxdna/aie2_pm.c +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -31,7 +31,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); + ret = amdxdna_pm_resume_get_locked(ndev->xdna); if (ret) return ret; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 59fa3800b9d3..5173456bbb61 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -266,9 +266,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr struct amdxdna_drm_config_hwctx *args = data; struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_hwctx *hwctx; - int ret, idx; u32 buf_size; void *buf; + int ret; u64 val; if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) @@ -310,20 +310,17 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr return -EINVAL; } - mutex_lock(&xdna->dev_lock); - idx = srcu_read_lock(&client->hwctx_srcu); + guard(mutex)(&xdna->dev_lock); hwctx = xa_load(&client->hwctx_xa, args->handle); if (!hwctx) { XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); ret = -EINVAL; - goto unlock_srcu; + goto free_buf; } ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); -unlock_srcu: - srcu_read_unlock(&client->hwctx_srcu, idx); - mutex_unlock(&xdna->dev_lock); +free_buf: kfree(buf); return ret; } @@ -334,7 +331,7 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) struct amdxdna_hwctx *hwctx; struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; - int ret, idx; + int ret; if (!xdna->dev_info->ops->hwctx_sync_debug_bo) return -EOPNOTSUPP; @@ -345,17 +342,15 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) abo = to_xdna_obj(gobj); guard(mutex)(&xdna->dev_lock); - idx = srcu_read_lock(&client->hwctx_srcu); hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx); if (!hwctx) { ret = -EINVAL; - goto unlock_srcu; + goto put_obj; } ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl); -unlock_srcu: - srcu_read_unlock(&client->hwctx_srcu, idx); +put_obj: drm_gem_object_put(gobj); return ret; } diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c index d024d480521c..b1fafddd7ad5 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.c +++ b/drivers/accel/amdxdna/amdxdna_pm.c @@ -16,6 +16,7 @@ int amdxdna_pm_suspend(struct device *dev) struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; + guard(mutex)(&xdna->dev_lock); if (xdna->dev_info->ops->suspend) ret = xdna->dev_info->ops->suspend(xdna); @@ -28,6 +29,7 @@ int amdxdna_pm_resume(struct device *dev) struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; + guard(mutex)(&xdna->dev_lock); if (xdna->dev_info->ops->resume) ret = xdna->dev_info->ops->resume(xdna); diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h index 77b2d6e45570..3d26b973e0e3 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.h +++ b/drivers/accel/amdxdna/amdxdna_pm.h @@ -15,4 +15,15 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna); void amdxdna_pm_init(struct amdxdna_dev *xdna); void amdxdna_pm_fini(struct amdxdna_dev *xdna); +static inline int amdxdna_pm_resume_get_locked(struct amdxdna_dev *xdna) +{ + int ret; + + mutex_unlock(&xdna->dev_lock); + ret = amdxdna_pm_resume_get(xdna); + mutex_lock(&xdna->dev_lock); + + return ret; +} + #endif /* _AMDXDNA_PM_H_ */ From fdb65acfe655f844ae1e88696b9656d3ef5bb8fb Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 11 Feb 2026 12:47:16 -0800 Subject: [PATCH 184/828] accel/amdxdna: Fix suspend failure after enabling turbo mode Enabling turbo mode disables hardware clock gating. Suspend requires hardware clock gating to be re-enabled, otherwise suspend will fail. Fix this by calling aie2_runtime_cfg() from aie2_hw_stop() to re-enable clock gating during suspend. Also ensure that firmware is initialized in aie2_hw_start() before modifying clock-gating settings during resume. Fixes: f4d7b8a6bc8c ("accel/amdxdna: Enhance power management settings") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260211204716.722788-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_pci.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 07e369507818..4b3e6bb97bd2 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -323,6 +323,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) return; } + aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL); aie2_mgmt_fw_fini(ndev); xdna_mailbox_stop_channel(ndev->mgmt_chann); xdna_mailbox_destroy_channel(ndev->mgmt_chann); @@ -406,18 +407,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto stop_psp; } - ret = aie2_pm_init(ndev); - if (ret) { - XDNA_ERR(xdna, "failed to init pm, ret %d", ret); - goto destroy_mgmt_chann; - } - ret = aie2_mgmt_fw_init(ndev); if (ret) { XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); goto destroy_mgmt_chann; } + ret = aie2_pm_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); + goto destroy_mgmt_chann; + } + ret = aie2_mgmt_fw_query(ndev); if (ret) { XDNA_ERR(xdna, "failed to query fw, ret %d", ret); From 07efce5a6611af6714ea3ef65694e0c8dd7e44f5 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 11 Feb 2026 12:53:41 -0800 Subject: [PATCH 185/828] accel/amdxdna: Fix command hang on suspended hardware context When a hardware context is suspended, the job scheduler is stopped. If a command is submitted while the context is suspended, the job is queued in the scheduler but aie2_sched_job_run() is never invoked to restart the hardware context. As a result, the command hangs. Fix this by modifying the hardware context suspend routine to keep the job scheduler running so that queued jobs can trigger context restart properly. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260211205341.722982-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 8d79fafd889a..25845bd5e507 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct { drm_sched_stop(&hwctx->priv->sched, bad_job); aie2_destroy_context(xdna->dev_handle, hwctx); + drm_sched_start(&hwctx->priv->sched, 0); } static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) @@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw } out: - drm_sched_start(&hwctx->priv->sched, 0); XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); return ret; } @@ -297,19 +297,23 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; - if (!hwctx->priv->mbox_chann) + ret = amdxdna_pm_resume_get(hwctx->client->xdna); + if (ret) return NULL; - if (!mmget_not_zero(job->mm)) + if (!hwctx->priv->mbox_chann) { + amdxdna_pm_suspend_put(hwctx->client->xdna); + return NULL; + } + + if (!mmget_not_zero(job->mm)) { + amdxdna_pm_suspend_put(hwctx->client->xdna); return ERR_PTR(-ESRCH); + } kref_get(&job->refcnt); fence = dma_fence_get(job->fence); - ret = amdxdna_pm_resume_get(hwctx->client->xdna); - if (ret) - goto out; - if (job->drv_cmd) { switch (job->drv_cmd->opcode) { case SYNC_DEBUG_BO: From 1110a949675ebd56b3f0286e664ea543f745801c Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 17 Feb 2026 10:54:15 -0800 Subject: [PATCH 186/828] accel/amdxdna: Fix out-of-bounds memset in command slot handling The remaining space in a command slot may be smaller than the size of the command header. Clearing the command header with memset() before verifying the available slot space can result in an out-of-bounds write and memory corruption. Fix this by moving the memset() call after the size validation. Fixes: 3d32eb7a5ecf ("accel/amdxdna: Fix cu_idx being cleared by memset() during command setup") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260217185415.1781908-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 5d80c5837745..277a27bce850 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -699,11 +699,11 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz u32 cmd_len; void *cmd; - memset(npu_slot, 0, sizeof(*npu_slot)); cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); if (*size < sizeof(*npu_slot) + cmd_len) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -724,7 +724,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*sn); if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -733,6 +732,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -756,7 +756,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -765,6 +764,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -792,7 +792,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -801,6 +800,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_ELF; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; From 03808abb1d868aed7478a11a82e5bb4b3f1ca6d6 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 17 Feb 2026 11:28:15 -0800 Subject: [PATCH 187/828] accel/amdxdna: Prevent ubuf size overflow The ubuf size calculation may overflow, resulting in an undersized allocation and possible memory corruption. Use check_add_overflow() helpers to validate the size calculation before allocation. Fixes: bd72d4acda10 ("accel/amdxdna: Support user space allocated buffer") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260217192815.1784689-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_ubuf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c index b509f10b155c..fb71d6e3f44d 100644 --- a/drivers/accel/amdxdna/amdxdna_ubuf.c +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -176,7 +177,10 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, goto free_ent; } - exp_info.size += va_ent[i].len; + if (check_add_overflow(exp_info.size, va_ent[i].len, &exp_info.size)) { + ret = -EINVAL; + goto free_ent; + } } ubuf->nr_pages = exp_info.size >> PAGE_SHIFT; From 901ec3470994006bc8dd02399e16b675566c3416 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 19 Feb 2026 13:19:46 -0800 Subject: [PATCH 188/828] accel/amdxdna: Validate command buffer payload count The count field in the command header is used to determine the valid payload size. Verify that the valid payload does not exceed the remaining buffer space. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260219211946.1920485-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_ctx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 5173456bbb61..263d36072540 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -104,7 +104,10 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) if (size) { count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); - if (unlikely(count <= num_masks)) { + if (unlikely(count <= num_masks || + count * sizeof(u32) + + offsetof(struct amdxdna_cmd, data[0]) > + abo->mem.size)) { *size = 0; return NULL; } From 551d44200152cb26f75d2ef990aeb6185b7e37fd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Feb 2026 09:33:08 -0800 Subject: [PATCH 189/828] default_gfp(): avoid using the "newfangled" __VA_OPT__ trick The default_gfp() helper that I added is not wrong, but it turns out that it causes unnecessary headaches for 'sparse' which doesn't support the use of __VA_OPT__ (introduced in C++20 and C23, and supported by gcc and clang for a long time). We do already use __VA_OPT__ in some other cases in the kernel (drm/xe and btrfs), but it has been fairly limited. Now it triggers for pretty much everything, and sparse ends up not working at all. We can use the traditional gcc ',##__VA_ARGS__' syntax instead: it may not be the "C standard" way and is slightly less natural in this context, but it is the traditional model for this and avoids the sparse problem. Reported-and-tested-by: Ricardo Ribalda Reported-and-tested-by: Richard Fitzgerald Reported-by: Ben Dooks Fixes: e19e1b480ac7 ("add default_gfp() helper macro and use it in the new *alloc_obj() helpers") Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b30a0529d48..90536b2bc42e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -14,8 +14,8 @@ struct vm_area_struct; struct mempolicy; /* Helper macro to avoid gfp flags if they are the default one */ -#define __default_gfp(a,...) a -#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL) +#define __default_gfp(a,b,...) b +#define default_gfp(...) __default_gfp(,##__VA_ARGS__,GFP_KERNEL) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) From a7b4bc094fbaa7dc7b7b91ae33549bbd7eefaac1 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Wed, 7 Jan 2026 13:22:57 +0100 Subject: [PATCH 190/828] module: Remove duplicate freeing of lockdep classes In the error path of load_module(), under the free_module label, the code calls lockdep_free_key_range() to release lock classes associated with the MOD_DATA, MOD_RODATA and MOD_RO_AFTER_INIT module regions, and subsequently invokes module_deallocate(). Since commit ac3b43283923 ("module: replace module_layout with module_memory"), the module_deallocate() function calls free_mod_mem(), which releases the lock classes as well and considers all module regions. Attempting to free these classes twice is unnecessary. Remove the redundant code in load_module(). Fixes: ac3b43283923 ("module: replace module_layout with module_memory") Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Reviewed-by: Aaron Tomlin Acked-by: Song Liu Acked-by: Peter Zijlstra (Intel) Signed-off-by: Sami Tolvanen --- kernel/module/main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 2bac4c7cd019..eec6c9cf8dbb 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3544,12 +3544,6 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_unlock(&module_mutex); free_module: mod_stat_bump_invalid(info, flags); - /* Free lock-classes; relies on the preceding sync_rcu() */ - for_class_mod_mem_type(type, core_data) { - lockdep_free_key_range(mod->mem[type].base, - mod->mem[type].size); - } - module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy: From 8d597ba6ec18dae2eec143d4e1c9d81441ca0dda Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Thu, 5 Feb 2026 15:37:08 +0100 Subject: [PATCH 191/828] module: Fix the modversions and signing submenus The module Kconfig file contains a set of options related to "Module versioning support" (depends on MODVERSIONS) and "Module signature verification" (depends on MODULE_SIG). The Kconfig tool automatically creates submenus when an entry for a symbol is followed by consecutive items that all depend on the symbol. However, this functionality doesn't work for the mentioned module options. The MODVERSIONS options are interleaved with ASM_MODVERSIONS, which has no 'depends on MODVERSIONS' but instead uses 'default HAVE_ASM_MODVERSIONS && MODVERSIONS'. Similarly, the MODULE_SIG options are interleaved by a comment warning not to forget signing modules with scripts/sign-file, which uses the condition 'depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL'. The result is that the options are confusingly shown when using a menuconfig tool, as follows: [*] Module versioning support Module versioning implementation (genksyms (from source code)) ---> [ ] Extended Module Versioning Support [*] Basic Module Versioning Support [*] Source checksum for all modules [*] Module signature verification [ ] Require modules to be validly signed [ ] Automatically sign all modules Hash algorithm to sign modules (SHA-256) ---> Fix the issue by using if/endif to group related options together in kernel/module/Kconfig, similarly to how the MODULE_DEBUG options are already grouped. Note that the signing-related options depend on 'MODULE_SIG || IMA_APPRAISE_MODSIG', with the exception of MODULE_SIG_FORCE, which is valid only for MODULE_SIG and is therefore kept separately. For consistency, do the same for the MODULE_COMPRESS entries. The options are then properly placed into submenus, as follows: [*] Module versioning support Module versioning implementation (genksyms (from source code)) ---> [ ] Extended Module Versioning Support [*] Basic Module Versioning Support [*] Source checksum for all modules [*] Module signature verification [ ] Require modules to be validly signed [ ] Automatically sign all modules Hash algorithm to sign modules (SHA-256) ---> Signed-off-by: Petr Pavlu Reviewed-by: Daniel Gomez Signed-off-by: Sami Tolvanen --- kernel/module/Kconfig | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index be74917802ad..43b1bb01fd27 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -169,9 +169,10 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +if MODVERSIONS + choice prompt "Module versioning implementation" - depends on MODVERSIONS help Select the tool used to calculate symbol versions for modules. @@ -206,7 +207,7 @@ endchoice config ASM_MODVERSIONS bool - default HAVE_ASM_MODVERSIONS && MODVERSIONS + default HAVE_ASM_MODVERSIONS help This enables module versioning for exported symbols also from assembly. This can be enabled only when the target architecture @@ -214,7 +215,6 @@ config ASM_MODVERSIONS config EXTENDED_MODVERSIONS bool "Extended Module Versioning Support" - depends on MODVERSIONS help This enables extended MODVERSIONs support, allowing long symbol names to be versioned. @@ -224,7 +224,6 @@ config EXTENDED_MODVERSIONS config BASIC_MODVERSIONS bool "Basic Module Versioning Support" - depends on MODVERSIONS default y help This enables basic MODVERSIONS support, allowing older tools or @@ -237,6 +236,8 @@ config BASIC_MODVERSIONS This is enabled by default when MODVERSIONS are enabled. If unsure, say Y. +endif # MODVERSIONS + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help @@ -277,10 +278,11 @@ config MODULE_SIG_FORCE Reject unsigned modules or signed modules for which we don't have a key. Without this, such modules will simply taint the kernel. +if MODULE_SIG || IMA_APPRAISE_MODSIG + config MODULE_SIG_ALL bool "Automatically sign all modules" default y - depends on MODULE_SIG || IMA_APPRAISE_MODSIG help Sign all modules during make modules_install. Without this option, modules must be signed manually, using the scripts/sign-file tool. @@ -290,7 +292,6 @@ comment "Do not forget to sign required modules with scripts/sign-file" choice prompt "Hash algorithm to sign modules" - depends on MODULE_SIG || IMA_APPRAISE_MODSIG default MODULE_SIG_SHA512 help This determines which sort of hashing algorithm will be used during @@ -327,7 +328,6 @@ endchoice config MODULE_SIG_HASH string - depends on MODULE_SIG || IMA_APPRAISE_MODSIG default "sha256" if MODULE_SIG_SHA256 default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512 @@ -335,6 +335,8 @@ config MODULE_SIG_HASH default "sha3-384" if MODULE_SIG_SHA3_384 default "sha3-512" if MODULE_SIG_SHA3_512 +endif # MODULE_SIG || IMA_APPRAISE_MODSIG + config MODULE_COMPRESS bool "Module compression" help @@ -350,9 +352,10 @@ config MODULE_COMPRESS If unsure, say N. +if MODULE_COMPRESS + choice prompt "Module compression type" - depends on MODULE_COMPRESS help Choose the supported algorithm for module compression. @@ -379,7 +382,6 @@ endchoice config MODULE_COMPRESS_ALL bool "Automatically compress all modules" default y - depends on MODULE_COMPRESS help Compress all modules during 'make modules_install'. @@ -389,7 +391,6 @@ config MODULE_COMPRESS_ALL config MODULE_DECOMPRESS bool "Support in-kernel module decompression" - depends on MODULE_COMPRESS select ZLIB_INFLATE if MODULE_COMPRESS_GZIP select XZ_DEC if MODULE_COMPRESS_XZ select ZSTD_DECOMPRESS if MODULE_COMPRESS_ZSTD @@ -400,6 +401,8 @@ config MODULE_DECOMPRESS If unsure, say N. +endif # MODULE_COMPRESS + config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS bool "Allow loading of modules with missing namespace imports" help From 1f0638604f65dae9fc8b6ce937907daf5f0132d0 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Sun, 22 Feb 2026 16:25:22 +0800 Subject: [PATCH 192/828] selftests/sched_ext: Fix unused-result warning for read() The read() call in run_test() triggers a warn_unused_result compiler warning, which breaks the build under -Werror. Check the return value of read() and exit the child process on failure to satisfy the compiler and handle pipe read errors. Reviewed-by: Andrea Righi Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/init_enable_count.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 82c71653977b..44577e30e764 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c @@ -57,7 +57,8 @@ static enum scx_test_status run_test(bool global) char buf; close(pipe_fds[1]); - read(pipe_fds[0], &buf, 1); + if (read(pipe_fds[0], &buf, 1) < 0) + exit(1); close(pipe_fds[0]); exit(0); } From 0c36a6f6f0eb071379b7995ef571c18c1235adf2 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Sun, 22 Feb 2026 21:08:25 +0800 Subject: [PATCH 193/828] tools/sched_ext: scx_central: Remove unused '-p' option The '-p' option is defined in getopt() but not handled in the switch statement or documented in the help text. Providing '-p' currently triggers the default error path. Remove it to sync the optstring with the actual implementation. Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/sched_ext/scx_central.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 2a805f1d6c8f..710fa03376e2 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -66,7 +66,7 @@ restart: assert(skel->rodata->nr_cpu_ids > 0); assert(skel->rodata->nr_cpu_ids <= INT32_MAX); - while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { + while ((opt = getopt(argc, argv, "s:c:vh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; From cbb297323dbed4f6087b5f189585413801d1d45b Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Sun, 22 Feb 2026 21:08:26 +0800 Subject: [PATCH 194/828] tools/sched_ext: scx_sdt: Remove unused '-f' option The '-f' option is defined in getopt() but not handled in the switch statement or documented in the help text. Providing '-f' currently triggers the default error path. Remove it to sync the optstring with the actual implementation. Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/sched_ext/scx_sdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sched_ext/scx_sdt.c b/tools/sched_ext/scx_sdt.c index d8ca9aa316a5..a36405d8df30 100644 --- a/tools/sched_ext/scx_sdt.c +++ b/tools/sched_ext/scx_sdt.c @@ -54,7 +54,7 @@ restart: optind = 1; skel = SCX_OPS_OPEN(sdt_ops, scx_sdt); - while ((opt = getopt(argc, argv, "fvh")) != -1) { + while ((opt = getopt(argc, argv, "vh")) != -1) { switch (opt) { case 'v': verbose = true; From 075e70d13edfcb309c8fdc1f444fbd1f834ca1d4 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Mon, 23 Feb 2026 02:47:09 +0800 Subject: [PATCH 195/828] selftests/sched_ext: Remove duplicated unistd.h include in rt_stall.c The header is included twice in rt_stall.c. Remove the redundant inclusion to clean up the code. Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/rt_stall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c index ab772e336f86..81ea9b4883e5 100644 --- a/tools/testing/selftests/sched_ext/rt_stall.c +++ b/tools/testing/selftests/sched_ext/rt_stall.c @@ -15,7 +15,6 @@ #include #include #include -#include #include "rt_stall.bpf.skel.h" #include "scx_test.h" #include "../kselftest.h" From e7e222ad73d93fe54d6e6e3a15253a0ecf081a1b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Feb 2026 17:31:23 -0700 Subject: [PATCH 196/828] cxl: Move devm_cxl_add_nvdimm_bridge() to cxl_pmem.ko Moving the symbol devm_cxl_add_nvdimm_bridge() to drivers/cxl/cxl_pmem.c, so that cxl_pmem can export a symbol that gives cxl_acpi a depedency on cxl_pmem kernel module. This is a prepatory patch to resolve the issue of a race for nvdimm_bus object that is created during cxl_acpi_probe(). No functional changes besides moving code. Suggested-by: Dan Williams Acked-by: Ira Weiny Tested-by: Alison Schofield Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20260205001633.1813643-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pmem.c | 13 +++---------- drivers/cxl/cxl.h | 2 ++ drivers/cxl/pmem.c | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 3c6e76721522..b652e3486038 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -115,15 +115,8 @@ static void unregister_nvb(void *_cxl_nvb) device_unregister(&cxl_nvb->dev); } -/** - * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology - * @host: platform firmware root device - * @port: CXL port at the root of a CXL topology - * - * Return: bridge device that can host cxl_nvdimm objects - */ -struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, - struct cxl_port *port) +struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; @@ -155,7 +148,7 @@ err: put_device(dev); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL"); +EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_nvdimm_bridge, "cxl_pmem"); static void cxl_nvdimm_release(struct device *dev) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 04c673e7cdb0..f5850800f400 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -920,6 +920,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev); struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); +struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 6a97e4e490b6..c67b30b516bf 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -13,6 +13,20 @@ static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); +/** + * __devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology + * @host: platform firmware root device + * @port: CXL port at the root of a CXL topology + * + * Return: bridge device that can host cxl_nvdimm objects + */ +struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) +{ + return __devm_cxl_add_nvdimm_bridge(host, port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL"); + static void clear_exclusive(void *mds) { clear_exclusive_cxl_commands(mds, exclusive_cmds); From 43d37df67f7770d8d261fdcb64ecc8c314e91303 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 14:30:59 -0800 Subject: [PATCH 197/828] drm/xe/wa: Steer RMW of MCR registers while building default LRC When generating the default LRC, if a register is not masked, we apply any save-restore programming necessary via a read-modify-write sequence that will ensure we only update the relevant bits/fields without clobbering the rest of the register. However some of the registers that need to be updated might be MCR registers which require steering to a non-terminated instance to ensure we can read back a valid, non-zero value. The steering of reads originating from a command streamer is controlled by register CS_MMIO_GROUP_INSTANCE_SELECT. Emit additional MI_LRI commands to update the steering before any RMW of an MCR register to ensure the reads are performed properly. Note that needing to perform a RMW of an MCR register while building the default LRC is pretty rare. Most of the MCR registers that are part of an engine's LRCs are also masked registers, so no MCR is necessary. Fixes: f2f90989ccff ("drm/xe: Avoid reading RMW registers in emit_wa_job") Cc: Michal Wajdeczko Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit 6c2e331c915ba9e774aa847921262805feb00863) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 6 +++ drivers/gpu/drm/xe/xe_gt.c | 66 +++++++++++++++++++----- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 68172b0248a6..dc5a4fafa70c 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -96,6 +96,12 @@ #define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) + +#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc) +#define SELECTIVE_READ_ADDRESSING REG_BIT(30) +#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23) +#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9d090d0f2438..df6d04704823 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return ret; } +/* Dwords required to emit a RMW of a register */ +#define EMIT_RMW_DW 20 + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { - struct xe_reg_sr *sr = &q->hwe->reg_lrc; + struct xe_hw_engine *hwe = q->hwe; + struct xe_reg_sr *sr = &hwe->reg_lrc; struct xe_reg_sr_entry *entry; - int count_rmw = 0, count = 0, ret; + int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret; unsigned long idx; struct xe_bb *bb; size_t bb_len = 0; @@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) ++count; + else if (entry->reg.mcr) + ++count_rmw_mcr; else ++count_rmw; } @@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (count) bb_len += count * 2 + 1; - if (count_rmw) - bb_len += count_rmw * 20 + 7; + /* + * RMW of MCR registers is the same as a normal RMW, except an + * additional LRI (3 dwords) is required per register to steer the read + * to a nom-terminated instance. + * + * We could probably shorten the batch slightly by eliding the + * steering for consecutive MCR registers that have the same + * group/instance target, but it's not worth the extra complexity to do + * so. + */ + bb_len += count_rmw * EMIT_RMW_DW; + bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3); - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * After doing all RMW, we need 7 trailing dwords to clean up, + * plus an additional 3 dwords to reset steering if any of the + * registers were MCR. + */ + if (count_rmw || count_rmw_mcr) + bb_len += 7 + (count_rmw_mcr ? 3 : 0); + + if (hwe->class == XE_ENGINE_CLASS_RENDER) /* * Big enough to emit all of the context's 3DSTATE via * xe_lrc_emit_hwe_state_instructions() */ - bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32); - xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len); bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) @@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } - if (count_rmw) { - /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - + if (count_rmw || count_rmw_mcr) { xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; + if (entry->reg.mcr) { + struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw }; + u8 group, instance; + + xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr; + *cs++ = SELECTIVE_READ_ADDRESSING | + REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) | + REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance); + } + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; *cs++ = entry->reg.addr; *cs++ = CS_GPR_REG(0, 0).addr; @@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = CS_GPR_REG(0, 0).addr; *cs++ = entry->reg.addr; - xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", - entry->reg.addr, entry->clr_bits, entry->set_bits); + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n", + entry->reg.addr, entry->clr_bits, entry->set_bits, + entry->reg.mcr ? " (MCR)" : ""); } /* reset used GPR */ @@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = 0; *cs++ = CS_GPR_REG(0, 2).addr; *cs++ = 0; + + /* reset steering */ + if (count_rmw_mcr) { + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr; + *cs++ = 0; + } } cs = xe_lrc_emit_hwe_state_instructions(q, cs); From 7dff99b354601dd01829e1511711846e04340a69 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Feb 2026 11:18:48 -0800 Subject: [PATCH 198/828] Remove WARN_ALL_UNSEEDED_RANDOM kernel config option This config option goes way back - it used to be an internal debug option to random.c (at that point called DEBUG_RANDOM_BOOT), then was renamed and exposed as a config option as CONFIG_WARN_UNSEEDED_RANDOM, and then further renamed to the current CONFIG_WARN_ALL_UNSEEDED_RANDOM. It was all done with the best of intentions: the more limited rate-limited reports were reporting some cases, but if you wanted to see all the gory details, you'd enable this "ALL" option. However, it turns out - perhaps not surprisingly - that when people don't care about and fix the first rate-limited cases, they most certainly don't care about any others either, and so warning about all of them isn't actually helping anything. And the non-ratelimited reporting causes problems, where well-meaning people enable debug options, but the excessive flood of messages that nobody cares about will hide actual real information when things go wrong. I just got a kernel bug report (which had nothing to do with randomness) where two thirds of the the truncated dmesg was just variations of random: get_random_u32 called from __get_random_u32_below+0x10/0x70 with crng_init=0 and in the process early boot messages had been lost (in addition to making the messages that _hadn't_ been lost harder to read). The proper way to find these things for the hypothetical developer that cares - if such a person exists - is almost certainly with boot time tracing. That gives you the option to get call graphs etc too, which is likely a requirement for fixing any problems anyway. See Documentation/trace/boottime-trace.rst for that option. And if we for some reason do want to re-introduce actual printing of these things, it will need to have some uniqueness filtering rather than this "just print it all" model. Fixes: cc1e127bfa95 ("random: remove ratelimiting for in-kernel unseeded randomness") Acked-by: Jason Donenfeld Signed-off-by: Linus Torvalds --- drivers/char/random.c | 12 +----------- kernel/configs/debug.config | 1 - lib/Kconfig.debug | 27 --------------------------- 3 files changed, 1 insertion(+), 39 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index dcd002e1b890..7ff4d29911fd 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -96,8 +96,7 @@ static ATOMIC_NOTIFIER_HEAD(random_ready_notifier); /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); -static int ratelimit_disable __read_mostly = - IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); +static int ratelimit_disable __read_mostly = 0; module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); @@ -168,12 +167,6 @@ int __cold execute_with_initialized_rng(struct notifier_block *nb) return ret; } -#define warn_unseeded_randomness() \ - if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ - __func__, (void *)_RET_IP_, crng_init) - - /********************************************************************* * * Fast key erasure RNG, the "crng". @@ -434,7 +427,6 @@ static void _get_random_bytes(void *buf, size_t len) */ void get_random_bytes(void *buf, size_t len) { - warn_unseeded_randomness(); _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); @@ -523,8 +515,6 @@ type get_random_ ##type(void) \ struct batch_ ##type *batch; \ unsigned long next_gen; \ \ - warn_unseeded_randomness(); \ - \ if (!crng_ready()) { \ _get_random_bytes(&ret, sizeof(ret)); \ return ret; \ diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 774702591d26..307c97ac5fa9 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -29,7 +29,6 @@ CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_UBSAN_ALIGNMENT is not set # CONFIG_UBSAN_DIV_ZERO is not set # CONFIG_UBSAN_TRAP is not set -# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y CONFIG_DEBUG_IRQFLAGS=y diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4e2dfbbd3d78..318df4c75454 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1766,33 +1766,6 @@ config STACKTRACE It is also used by various kernel debugging features that require stack trace generation. -config WARN_ALL_UNSEEDED_RANDOM - bool "Warn for all uses of unseeded randomness" - default n - help - Some parts of the kernel contain bugs relating to their use of - cryptographically secure random numbers before it's actually possible - to generate those numbers securely. This setting ensures that these - flaws don't go unnoticed, by enabling a message, should this ever - occur. This will allow people with obscure setups to know when things - are going wrong, so that they might contact developers about fixing - it. - - Unfortunately, on some models of some architectures getting - a fully seeded CRNG is extremely difficult, and so this can - result in dmesg getting spammed for a surprisingly long - time. This is really bad from a security perspective, and - so architecture maintainers really need to do what they can - to get the CRNG seeded sooner after the system is booted. - However, since users cannot do anything actionable to - address this, by default this option is disabled. - - Say Y here if you want to receive warnings for all uses of - unseeded randomness. This will be of use primarily for - those developers interested in improving the security of - Linux kernels running on their architecture (or - subarchitecture). - config DEBUG_KOBJECT bool "kobject debugging" depends on DEBUG_KERNEL From f9d69d5e7bde2295eb7488a56f094ac8f5383b92 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Tue, 30 Dec 2025 10:32:08 -0800 Subject: [PATCH 199/828] module: Fix kernel panic when a symbol st_shndx is out of bounds The module loader doesn't check for bounds of the ELF section index in simplify_symbols(): for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) { const char *name = info->strtab + sym[i].st_name; switch (sym[i].st_shndx) { case SHN_COMMON: [...] default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == info->index.pcpu) secbase = (unsigned long)mod_percpu(mod); else /** HERE --> **/ secbase = info->sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; break; } } A symbol with an out-of-bounds st_shndx value, for example 0xffff (known as SHN_XINDEX or SHN_HIRESERVE), may cause a kernel panic: BUG: unable to handle page fault for address: ... RIP: 0010:simplify_symbols+0x2b2/0x480 ... Kernel panic - not syncing: Fatal exception This can happen when module ELF is legitimately using SHN_XINDEX or when it is corrupted. Add a bounds check in simplify_symbols() to validate that st_shndx is within the valid range before using it. This issue was discovered due to a bug in llvm-objcopy, see relevant discussion for details [1]. [1] https://lore.kernel.org/linux-modules/20251224005752.201911-1-ihor.solodrai@linux.dev/ Signed-off-by: Ihor Solodrai Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Signed-off-by: Sami Tolvanen --- kernel/module/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/module/main.c b/kernel/module/main.c index eec6c9cf8dbb..c3ce106c70af 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1568,6 +1568,13 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) break; default: + if (sym[i].st_shndx >= info->hdr->e_shnum) { + pr_err("%s: Symbol %s has an invalid section index %u (max %u)\n", + mod->name, name, sym[i].st_shndx, info->hdr->e_shnum - 1); + ret = -ENOEXEC; + break; + } + /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == info->index.pcpu) secbase = (unsigned long)mod_percpu(mod); From 16cb1a64dce56708a1684bf755ad52fb52c41f87 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Feb 2026 13:12:00 +0100 Subject: [PATCH 200/828] RDMA/uverbs: select CONFIG_DMA_SHARED_BUFFER The addition of dmabuf support in uverbs means that it is no longer possible to build infiniband support if that is disabled: arm-linux-gnueabi-ld: drivers/infiniband/core/ib_core_uverbs.o: in function `rdma_user_mmap_entry_remove.part.0': ib_core_uverbs.c:(.text+0x508): undefined reference to `dma_buf_move_notify' (dma_buf_move_notify): Unknown destination type (ARM/Thumb) in drivers/infiniband/core/ib_core_uverbs.o ib_core_uverbs.c:(.text+0x518): undefined reference to `dma_resv_wait_timeout' (dma_resv_wait_timeout): Unknown destination type (ARM/Thumb) in drivers/infiniband/core/ib_core_uverbs.o Select this from Kconfig, as we do for the other users. Fixes: 0ac6f4056c4a ("RDMA/uverbs: Add DMABUF object type and operations") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260216121213.2088910-1-arnd@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 794b9778816b..78ac2ff5befd 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -6,6 +6,7 @@ menuconfig INFINIBAND depends on INET depends on m || IPV6 != m depends on !ALPHA + select DMA_SHARED_BUFFER select IRQ_POLL select DIMLIB help From 7accb1c4321acb617faf934af59d928b0b047e2b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 3 Feb 2026 15:16:16 -0500 Subject: [PATCH 201/828] Bluetooth: L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ This fixes responding with an invalid result caused by checking the wrong size of CID which should have been (cmd_len - sizeof(*req)) and on top of it the wrong result was use L2CAP_CR_LE_INVALID_PARAMS which is invalid/reserved for reconf when running test like L2CAP/ECFC/BI-03-C: > ACL Data RX: Handle 64 flags 0x02 dlen 14 LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6 MTU: 64 MPS: 64 Source CID: 64 < ACL Data TX: Handle 64 flags 0x00 dlen 10 LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2 ! Result: Reserved (0x000c) Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003) Fiix L2CAP/ECFC/BI-04-C which expects L2CAP_RECONF_INVALID_MPS (0x0002) when more than one channel gets its MPS reduced: > ACL Data RX: Handle 64 flags 0x02 dlen 16 LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 8 MTU: 264 MPS: 99 Source CID: 64 ! Source CID: 65 < ACL Data TX: Handle 64 flags 0x00 dlen 10 LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2 ! Result: Reconfiguration successful (0x0000) Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002) Fix L2CAP/ECFC/BI-05-C when SCID is invalid (85 unconnected): > ACL Data RX: Handle 64 flags 0x02 dlen 14 LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6 MTU: 65 MPS: 64 ! Source CID: 85 < ACL Data TX: Handle 64 flags 0x00 dlen 10 LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2 ! Result: Reconfiguration successful (0x0000) Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003) Fix L2CAP/ECFC/BI-06-C when MPS < L2CAP_ECRED_MIN_MPS (64): > ACL Data RX: Handle 64 flags 0x02 dlen 14 LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6 MTU: 672 ! MPS: 63 Source CID: 64 < ACL Data TX: Handle 64 flags 0x00 dlen 10 LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2 ! Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002) Result: Reconfiguration failed - other unacceptable parameters (0x0004) Fix L2CAP/ECFC/BI-07-C when MPS reduced for more than one channel: > ACL Data RX: Handle 64 flags 0x02 dlen 16 LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 3 len 8 MTU: 84 ! MPS: 71 Source CID: 64 ! Source CID: 65 < ACL Data TX: Handle 64 flags 0x00 dlen 10 LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2 ! Result: Reconfiguration successful (0x0000) Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002) Link: https://github.com/bluez/bluez/issues/1865 Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 2 + net/bluetooth/l2cap_core.c | 71 ++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index ec3af01e4db9..6f9cf7a05986 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req { #define L2CAP_RECONF_SUCCESS 0x0000 #define L2CAP_RECONF_INVALID_MTU 0x0001 #define L2CAP_RECONF_INVALID_MPS 0x0002 +#define L2CAP_RECONF_INVALID_CID 0x0003 +#define L2CAP_RECONF_INVALID_PARAMS 0x0004 struct l2cap_ecred_reconf_rsp { __le16 result; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b628b0fa39b2..81038458be0c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5310,14 +5310,14 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, struct l2cap_ecred_reconf_req *req = (void *) data; struct l2cap_ecred_reconf_rsp rsp; u16 mtu, mps, result; - struct l2cap_chan *chan; + struct l2cap_chan *chan[L2CAP_ECRED_MAX_CID] = {}; int i, num_scid; if (!enable_ecred) return -EINVAL; - if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { - result = L2CAP_CR_LE_INVALID_PARAMS; + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { + result = L2CAP_RECONF_INVALID_CID; goto respond; } @@ -5327,42 +5327,69 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, BT_DBG("mtu %u mps %u", mtu, mps); if (mtu < L2CAP_ECRED_MIN_MTU) { - result = L2CAP_RECONF_INVALID_MTU; + result = L2CAP_RECONF_INVALID_PARAMS; goto respond; } if (mps < L2CAP_ECRED_MIN_MPS) { - result = L2CAP_RECONF_INVALID_MPS; + result = L2CAP_RECONF_INVALID_PARAMS; goto respond; } cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); + + if (num_scid > L2CAP_ECRED_MAX_CID) { + result = L2CAP_RECONF_INVALID_PARAMS; + goto respond; + } + result = L2CAP_RECONF_SUCCESS; + /* Check if each SCID, MTU and MPS are valid */ for (i = 0; i < num_scid; i++) { u16 scid; scid = __le16_to_cpu(req->scid[i]); - if (!scid) - return -EPROTO; - - chan = __l2cap_get_chan_by_dcid(conn, scid); - if (!chan) - continue; - - /* If the MTU value is decreased for any of the included - * channels, then the receiver shall disconnect all - * included channels. - */ - if (chan->omtu > mtu) { - BT_ERR("chan %p decreased MTU %u -> %u", chan, - chan->omtu, mtu); - result = L2CAP_RECONF_INVALID_MTU; + if (!scid) { + result = L2CAP_RECONF_INVALID_CID; + goto respond; } - chan->omtu = mtu; - chan->remote_mps = mps; + chan[i] = __l2cap_get_chan_by_dcid(conn, scid); + if (!chan[i]) { + result = L2CAP_RECONF_INVALID_CID; + goto respond; + } + + /* The MTU field shall be greater than or equal to the greatest + * current MTU size of these channels. + */ + if (chan[i]->omtu > mtu) { + BT_ERR("chan %p decreased MTU %u -> %u", chan[i], + chan[i]->omtu, mtu); + result = L2CAP_RECONF_INVALID_MTU; + goto respond; + } + + /* If more than one channel is being configured, the MPS field + * shall be greater than or equal to the current MPS size of + * each of these channels. If only one channel is being + * configured, the MPS field may be less than the current MPS + * of that channel. + */ + if (chan[i]->remote_mps >= mps && i) { + BT_ERR("chan %p decreased MPS %u -> %u", chan[i], + chan[i]->remote_mps, mps); + result = L2CAP_RECONF_INVALID_MPS; + goto respond; + } + } + + /* Commit the new MTU and MPS values after checking they are valid */ + for (i = 0; i < num_scid; i++) { + chan[i]->omtu = mtu; + chan[i]->remote_mps = mps; } respond: From c28d2bff70444a85b3b86aaf241ece9408c7858c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Feb 2026 15:11:34 -0500 Subject: [PATCH 202/828] Bluetooth: L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too short Test L2CAP/ECFC/BV-26-C expect the response to L2CAP_ECRED_CONN_REQ with and MTU value < L2CAP_ECRED_MIN_MTU (64) to be L2CAP_CR_LE_INVALID_PARAMS rather than L2CAP_CR_LE_UNACCEPT_PARAMS. Also fix not including the correct number of CIDs in the response since the spec requires all CIDs being rejected to be included in the response. Link: https://github.com/bluez/bluez/issues/1868 Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 6 +++--- net/bluetooth/l2cap_core.c | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 6f9cf7a05986..010f1a8fd15f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -284,9 +284,9 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 -#define L2CAP_CR_LE_SCID_IN_USE 0X000A -#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B -#define L2CAP_CR_LE_INVALID_PARAMS 0X000C +#define L2CAP_CR_LE_SCID_IN_USE 0x000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0x000B +#define L2CAP_CR_LE_INVALID_PARAMS 0x000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 81038458be0c..390d25909104 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5051,13 +5051,15 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, struct l2cap_chan *chan, *pchan; u16 mtu, mps; __le16 psm; - u8 result, len = 0; + u8 result, rsp_len = 0; int i, num_scid; bool defer = false; if (!enable_ecred) return -EINVAL; + memset(pdu, 0, sizeof(*pdu)); + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; @@ -5066,6 +5068,9 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; @@ -5075,7 +5080,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, mps = __le16_to_cpu(req->mps); if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { - result = L2CAP_CR_LE_UNACCEPT_PARAMS; + result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } @@ -5095,8 +5100,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); - memset(pdu, 0, sizeof(*pdu)); - /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -5121,7 +5124,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("scid[%d] 0x%4.4x", i, scid); pdu->dcid[i] = 0x0000; - len += sizeof(*pdu->dcid); /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { @@ -5188,7 +5190,7 @@ response: return 0; l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, - sizeof(*pdu) + len, pdu); + sizeof(*pdu) + rsp_len, pdu); return 0; } From 21e4271e65094172aadd5beb8caea95dd0fbf6d7 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Wed, 11 Feb 2026 15:03:35 -0300 Subject: [PATCH 203/828] Bluetooth: purge error queues in socket destructors When TX timestamping is enabled via SO_TIMESTAMPING, SKBs may be queued into sk_error_queue and will stay there until consumed. If userspace never gets to read the timestamps, or if the controller is removed unexpectedly, these SKBs will leak. Fix by adding skb_queue_purge() calls for sk_error_queue in affected bluetooth destructors. RFCOMM does not currently use sk_error_queue. Fixes: 134f4b39df7b ("Bluetooth: add support for skb TX SND/COMPLETION timestamping") Reported-by: syzbot+7ff4013eabad1407b70a@syzkaller.appspotmail.com Closes: https://syzbot.org/bug?extid=7ff4013eabad1407b70a Cc: stable@vger.kernel.org Signed-off-by: Heitor Alves de Siqueira Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sock.c | 1 + net/bluetooth/iso.c | 1 + net/bluetooth/l2cap_sock.c | 1 + net/bluetooth/sco.c | 1 + 4 files changed, 4 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4e7bf63af9c5..0290dea081f6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -2166,6 +2166,7 @@ static void hci_sock_destruct(struct sock *sk) mgmt_cleanup(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static const struct proto_ops hci_sock_ops = { diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 1459ab161fd2..a38d3774176d 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -746,6 +746,7 @@ static void iso_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void iso_sock_cleanup_listen(struct sock *parent) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3ba3ce7eaa98..62ceda979f39 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1817,6 +1817,7 @@ static void l2cap_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void l2cap_skb_msg_name(struct sk_buff *skb, void *msg_name, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 87ba90336e80..cccfaf560317 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -470,6 +470,7 @@ static void sco_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void sco_sock_cleanup_listen(struct sock *parent) From 5c4e9a8b18457ad28b57069ef0f14661e3192b2e Mon Sep 17 00:00:00 2001 From: Jinwang Li Date: Thu, 5 Feb 2026 14:26:00 +0800 Subject: [PATCH 204/828] Bluetooth: hci_qca: Cleanup on all setup failures The setup process previously combined error handling and retry gating under one condition. As a result, the final failed attempt exited without performing cleanup. Update the failure path to always perform power and port cleanup on setup failure, and reopen the port only when retrying. Fixes: 9e80587aba4c ("Bluetooth: hci_qca: Enhance retry logic in qca_setup") Signed-off-by: Jinwang Li Reviewed-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index c511546f793e..c6de2602b49d 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2046,19 +2046,23 @@ retry: } out: - if (ret && retries < MAX_INIT_RETRIES) { - bt_dev_warn(hdev, "Retry BT power ON:%d", retries); + if (ret) { qca_power_shutdown(hu); - if (hu->serdev) { - serdev_device_close(hu->serdev); - ret = serdev_device_open(hu->serdev); - if (ret) { - bt_dev_err(hdev, "failed to open port"); - return ret; + + if (retries < MAX_INIT_RETRIES) { + bt_dev_warn(hdev, "Retry BT power ON:%d", retries); + if (hu->serdev) { + serdev_device_close(hu->serdev); + ret = serdev_device_open(hu->serdev); + if (ret) { + bt_dev_err(hdev, "failed to open port"); + return ret; + } } + retries++; + goto retry; } - retries++; - goto retry; + return ret; } /* Setup bdaddr */ From 05761c2c2b5bfec85c47f60c903c461e9b56cf87 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 11 Feb 2026 15:18:03 -0500 Subject: [PATCH 205/828] Bluetooth: L2CAP: Fix response to L2CAP_ECRED_CONN_REQ Similar to 03dba9cea72f ("Bluetooth: L2CAP: Fix not responding with L2CAP_CR_LE_ENCRYPTION") the result code L2CAP_CR_LE_ENCRYPTION shall be used when BT_SECURITY_MEDIUM is set since that means security mode 2 which mean it doesn't require authentication which results in qualification test L2CAP/ECFC/BV-32-C failing. Link: https://github.com/bluez/bluez/issues/1871 Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 390d25909104..9452c6179acb 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5112,7 +5112,8 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { - result = L2CAP_CR_LE_AUTHENTICATION; + result = pchan->sec_level == BT_SECURITY_MEDIUM ? + L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION; goto unlock; } From f9a1767ce3a34bc33c3d33473f65dc13a380e379 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:11 -0500 Subject: [PATCH 206/828] cgroup/cpuset: Fix incorrect change to effective_xcpus in partition_xcpus_del() The effective_xcpus of a cpuset can contain offline CPUs. In partition_xcpus_del(), the xcpus parameter is incorrectly used as a temporary cpumask to mask out offline CPUs. As xcpus can be the effective_xcpus of a cpuset, this can result in unexpected changes in that cpumask. Fix this problem by not making any changes to the xcpus parameter. Fixes: 11e5f407b64a ("cgroup/cpuset: Keep track of CPUs in isolated partitions") Reviewed-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index c43efef7df71..a366ef84f982 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1221,8 +1221,8 @@ static void partition_xcpus_del(int old_prs, struct cpuset *parent, isolated_cpus_update(old_prs, parent->partition_root_state, xcpus); - cpumask_and(xcpus, xcpus, cpu_active_mask); cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); + cpumask_and(parent->effective_cpus, parent->effective_cpus, cpu_active_mask); } /* From 68230aac8b9aad243626fbaf3ca170012c17fec5 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:12 -0500 Subject: [PATCH 207/828] cgroup/cpuset: Fix incorrect use of cpuset_update_tasks_cpumask() in update_cpumasks_hier() Commit e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") incorrectly changed the 2nd parameter of cpuset_update_tasks_cpumask() from tmp->new_cpus to cp->effective_cpus. This second parameter is just a temporary cpumask for internal use. The cpuset_update_tasks_cpumask() function was originally called update_tasks_cpumask() before commit 381b53c3b549 ("cgroup/cpuset: rename functions shared between v1 and v2"). This mistake can incorrectly change the effective_cpus of the cpuset when it is the top_cpuset or in arm64 architecture where task_cpu_possible_mask() may differ from cpu_possible_mask. So far top_cpuset hasn't been passed to update_cpumasks_hier() yet, but arm64 arch can still be impacted. Fix it by reverting the incorrect change. Fixes: e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a366ef84f982..8dfffe2dc9ed 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2157,7 +2157,7 @@ get_css: WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); - cpuset_update_tasks_cpumask(cp, cp->effective_cpus); + cpuset_update_tasks_cpumask(cp, tmp->new_cpus); /* * On default hierarchy, inherit the CS_SCHED_LOAD_BALANCE From 17b1860034c769c9f7669ae2612e91ef8fdde769 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:13 -0500 Subject: [PATCH 208/828] cgroup/cpuset: Clarify exclusion rules for cpuset internal variables Clarify the locking rules associated with file level internal variables inside the cpuset code. There is no functional change. Reviewed-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 105 ++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 8dfffe2dc9ed..165e2967025b 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -61,6 +61,58 @@ static const char * const perr_strings[] = { [PERR_REMOTE] = "Have remote partition underneath", }; +/* + * CPUSET Locking Convention + * ------------------------- + * + * Below are the three global locks guarding cpuset structures in lock + * acquisition order: + * - cpu_hotplug_lock (cpus_read_lock/cpus_write_lock) + * - cpuset_mutex + * - callback_lock (raw spinlock) + * + * A task must hold all the three locks to modify externally visible or + * used fields of cpusets, though some of the internally used cpuset fields + * and internal variables can be modified without holding callback_lock. If only + * reliable read access of the externally used fields are needed, a task can + * hold either cpuset_mutex or callback_lock which are exposed to other + * external subsystems. + * + * If a task holds cpu_hotplug_lock and cpuset_mutex, it blocks others, + * ensuring that it is the only task able to also acquire callback_lock and + * be able to modify cpusets. It can perform various checks on the cpuset + * structure first, knowing nothing will change. It can also allocate memory + * without holding callback_lock. While it is performing these checks, various + * callback routines can briefly acquire callback_lock to query cpusets. Once + * it is ready to make the changes, it takes callback_lock, blocking everyone + * else. + * + * Calls to the kernel memory allocator cannot be made while holding + * callback_lock which is a spinlock, as the memory allocator may sleep or + * call back into cpuset code and acquire callback_lock. + * + * Now, the task_struct fields mems_allowed and mempolicy may be changed + * by other task, we use alloc_lock in the task_struct fields to protect + * them. + * + * The cpuset_common_seq_show() handlers only hold callback_lock across + * small pieces of code, such as when reading out possibly multi-word + * cpumasks and nodemasks. + */ + +static DEFINE_MUTEX(cpuset_mutex); + +/* + * File level internal variables below follow one of the following exclusion + * rules. + * + * RWCS: Read/write-able by holding either cpus_write_lock (and optionally + * cpuset_mutex) or both cpus_read_lock and cpuset_mutex. + * + * CSCB: Readable by holding either cpuset_mutex or callback_lock. Writable + * by holding both cpuset_mutex and callback_lock. + */ + /* * For local partitions, update to subpartitions_cpus & isolated_cpus is done * in update_parent_effective_cpumask(). For remote partitions, it is done in @@ -70,19 +122,18 @@ static const char * const perr_strings[] = { * Exclusive CPUs distributed out to local or remote sub-partitions of * top_cpuset */ -static cpumask_var_t subpartitions_cpus; +static cpumask_var_t subpartitions_cpus; /* RWCS */ /* - * Exclusive CPUs in isolated partitions + * Exclusive CPUs in isolated partitions (shown in cpuset.cpus.isolated) */ -static cpumask_var_t isolated_cpus; +static cpumask_var_t isolated_cpus; /* CSCB */ /* - * isolated_cpus updating flag (protected by cpuset_mutex) - * Set if isolated_cpus is going to be updated in the current - * cpuset_mutex crtical section. + * Set if isolated_cpus is being updated in the current cpuset_mutex + * critical section. */ -static bool isolated_cpus_updating; +static bool isolated_cpus_updating; /* RWCS */ /* * A flag to force sched domain rebuild at the end of an operation. @@ -98,7 +149,7 @@ static bool isolated_cpus_updating; * Note that update_relax_domain_level() in cpuset-v1.c can still call * rebuild_sched_domains_locked() directly without using this flag. */ -static bool force_sd_rebuild; +static bool force_sd_rebuild; /* RWCS */ /* * Partition root states: @@ -218,42 +269,6 @@ struct cpuset top_cpuset = { .partition_root_state = PRS_ROOT, }; -/* - * There are two global locks guarding cpuset structures - cpuset_mutex and - * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel - * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset - * structures. Note that cpuset_mutex needs to be a mutex as it is used in - * paths that rely on priority inheritance (e.g. scheduler - on RT) for - * correctness. - * - * A task must hold both locks to modify cpusets. If a task holds - * cpuset_mutex, it blocks others, ensuring that it is the only task able to - * also acquire callback_lock and be able to modify cpusets. It can perform - * various checks on the cpuset structure first, knowing nothing will change. - * It can also allocate memory while just holding cpuset_mutex. While it is - * performing these checks, various callback routines can briefly acquire - * callback_lock to query cpusets. Once it is ready to make the changes, it - * takes callback_lock, blocking everyone else. - * - * Calls to the kernel memory allocator can not be made while holding - * callback_lock, as that would risk double tripping on callback_lock - * from one of the callbacks into the cpuset code from within - * __alloc_pages(). - * - * If a task is only holding callback_lock, then it has read-only - * access to cpusets. - * - * Now, the task_struct fields mems_allowed and mempolicy may be changed - * by other task, we use alloc_lock in the task_struct fields to protect - * them. - * - * The cpuset_common_seq_show() handlers only hold callback_lock across - * small pieces of code, such as when reading out possibly multi-word - * cpumasks and nodemasks. - */ - -static DEFINE_MUTEX(cpuset_mutex); - /** * cpuset_lock - Acquire the global cpuset mutex * @@ -1163,6 +1178,8 @@ static void reset_partition_data(struct cpuset *cs) static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus) { WARN_ON_ONCE(old_prs == new_prs); + lockdep_assert_held(&callback_lock); + lockdep_assert_held(&cpuset_mutex); if (new_prs == PRS_ISOLATED) cpumask_or(isolated_cpus, isolated_cpus, xcpus); else From 14713ed9e9291813849018f32edbf5f6de362088 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:14 -0500 Subject: [PATCH 209/828] cgroup/cpuset: Set isolated_cpus_updating only if isolated_cpus is changed As cpuset is updating HK_TYPE_DOMAIN housekeeping mask when there is a change in the set of isolated CPUs, making this change is now more costly than before. Right now, the isolated_cpus_updating flag can be set even if there is no real change in isolated_cpus. Put in additional checks to make sure that isolated_cpus_updating is set only if there is a real change in isolated_cpus. Reviewed-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 165e2967025b..edc67c6aa553 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1180,11 +1180,15 @@ static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus WARN_ON_ONCE(old_prs == new_prs); lockdep_assert_held(&callback_lock); lockdep_assert_held(&cpuset_mutex); - if (new_prs == PRS_ISOLATED) + if (new_prs == PRS_ISOLATED) { + if (cpumask_subset(xcpus, isolated_cpus)) + return; cpumask_or(isolated_cpus, isolated_cpus, xcpus); - else + } else { + if (!cpumask_intersects(xcpus, isolated_cpus)) + return; cpumask_andnot(isolated_cpus, isolated_cpus, xcpus); - + } isolated_cpus_updating = true; } From 5e6aac573cc4cc8e62aebf880b824e219a5ff557 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:15 -0500 Subject: [PATCH 210/828] kselftest/cgroup: Simplify test_cpuset_prs.sh by removing "S+" command The "S+" command is used in the test matrix to enable the cpuset controller. However this can be done automatically and we never use the "S-" command to disable cpuset controller. Simplify the test matrix and reduce clutter by removing the command and doing that automatically. There is no functional change to the test cases. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- .../selftests/cgroup/test_cpuset_prs.sh | 213 +++++++++--------- 1 file changed, 104 insertions(+), 109 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 5dff3ad53867..ef16f5610d0b 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -196,7 +196,6 @@ test_add_proc() # P = set cpus.partition (0:member, 1:root, 2:isolated) # C = add cpu-list to cpuset.cpus # X = add cpu-list to cpuset.cpus.exclusive -# S

= use prefix in subtree_control # T = put a task into cgroup # CX = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O= = Write to CPU online file of @@ -209,44 +208,43 @@ test_add_proc() # sched-debug matching which includes offline CPUs and single-CPU partitions # while the second one is for matching cpuset.cpus.isolated. # -SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" +SETUP_A123_PARTITIONS="C1-3:P1 C2-3:P1 C3:P1" TEST_MATRIX=( # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- - " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1" + " C0-1 . . C2-3 . C4-5 . . 0 A2:0-1" " C0-1 . . C2-3 P1 . . . 0 " - " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 " - " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 " - " C0-1:S+ . . C2-3 . . . P1 0 " - " C0-1:P1 . . C2-3 S+ C1 . . 0 " - " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 " - " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 " + " C0-1 . . C2-3 P1 C0-1:P1 . . 0 " + " C0-1 . . C2-3 P1 C1:P1 . . 0 " + " C0-1 . . C2-3 . . . P1 0 " + " C0-1:P1 . . C2-3 . C1 . . 0 " + " C0-1:P1 . . C2-3 . C1:P1 . . 0 " + " C0-1:P1 . . C2-3 . C1:P1 . P1 0 " " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5" - " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + " C0-3:P1 C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + " C0-3:P1 C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + " C2-3:P1 C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + " C2-3:P1 C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + " C2-3:P1 C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + " C0-1 . . C2-3 . C4-5 . O2=0 0 A1:0-1|B1:3" + " C0-3:P1 C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + " C0-3:P1 C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + " C0-3:P1 C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + " C0-3:P1 C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + " C2-3:P1 C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + " C2-3:P1 C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + " C2-3:P1 C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + " C2-3:P1 C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" @@ -264,88 +262,87 @@ TEST_MATRIX=( # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2" - " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2" + " C0-3 C1-3 C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3 C1-3 C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3 X1-3 X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3 X1-3 X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + " C0-3 C1-3 C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ A1:P0|A2:P1|A3:P2|B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ A1:P0|A2:P1|A3:P0|B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + " C0-3 C1-3 C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ A1:P0|A2:P2|A3:P1 2-4|2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ A1:P0|A2:P2|A3:P1 2" - " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ + " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ A1:P0|A2:P-2|A3:P-1 ." - " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ + " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3 C1-3 C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." + " C0-3 C1-3 C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" + " C0-3 C1-3 C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." - " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" + " C0-3 C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." + " C0-3 C1-3:X3:P2 . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" + " C1-3:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" + " C0-5:P2 C4-5:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:P2 C4-5:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ A1:P0|A3:P-2 ." - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ A1:P0|A3:P-2 ." - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ A1:P0|A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ A1:P0|A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \ . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ A1:P0|A3:P-2" @@ -356,37 +353,37 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + " C2-3:P1 C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1 C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + " C2-3:P1 C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" + " C0-1:P1 C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" + " C0-1:P1 . . C2-3 . C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" + " C0-1 C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" + " C0-1 C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1 C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will not invalidate its siblings partition. " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:3 A1:P1|B1:P0" @@ -398,23 +395,23 @@ TEST_MATRIX=( # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1 P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1 P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1 P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + " C1-4:X3 C1:X3 . . . C . . 0 A2:1-4|XA2:3" # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive # but creating a local partition out of it is not allowed. Similarly and change # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will # invalidate it. - " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + " CX1-4 CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ A1:P0|A2:P2:B1:P1 2-4" - " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + " CX1-4 CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ A1:P0|A2:P2:B1:P-1 2-4" - " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + " CX1-4 CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ A1:P0|A2:P2:B1:P-1 2-4" # When multiple partitions with conflicting cpuset.cpus are created, the @@ -426,14 +423,14 @@ TEST_MATRIX=( " C1-3:X1-3 . . C4-5 . . . C1-2 0 A1:1-3|B1:1-2" # cpuset.cpus can become empty with task in it as it inherits parent's effective CPUs - " C1-3:S+ C2 . . . T:C . . 0 A1:1-3|A2:1-3" + " C1-3 C2 . . . T:C . . 0 A1:1-3|A2:1-3" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" @@ -465,31 +462,31 @@ REMOTE_TEST_MATRIX=( # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS # ------ ------ ------- ------- ------- ------- ----- ------ -------- - " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + " X1-3 X4-6 X1-2 X3 X4-5 X6 \ . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ c11:P2|c12:P2|c21:P2|c22:P2 1-6" - " CX1-4:S+ . X1-2:P2 C3 . . \ + " CX1-4 . X1-2:P2 C3 . . \ . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ p1:P0|c11:P2|c12:P0 1-2" - " CX1-4:S+ . X1-2:P2 . . . \ + " CX1-4 . X1-2:P2 . . . \ X2-4 . . . . . p1:1,3-4|c11:2 \ p1:P0|c11:P2 2" - " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + " CX1-5 . X1-2:P2 X3-5:P1 . . \ X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ p1:P0|c11:P2|c12:P1 2" - " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + " CX1-4 . X1-2:P2 X3-4:P1 . . \ . . X2 . . . p1:1|c11:2|c12:3-4 \ p1:P0|c11:P2|c12:P1 2" # p1 as member, will get its effective CPUs from its parent rtest - " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + " CX1-4 . X1-2:P2 X3-4:P1 . . \ . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ p1:P0|c11:P2|c12:P1 1" - " CX1-4:S+ X5-6:P1:S+ . . . . \ - . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + " CX1-4 X5-6:P1 . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ 1-2,4-6|1-2,5-6" # c12 whose cpuset.cpus CPUs are all granted to c11 will become invalid partition - " C1-5:P1:S+ . C1-4:P1 C2-3 . . \ + " C1-5:P1 . C1-4:P1 C2-3 . . \ . . . P1 . . p1:5|c11:1-4|c12:5 \ p1:P1|c11:P1|c12:P-1" ) @@ -530,7 +527,6 @@ set_ctrl_state() CGRP=$1 STATE=$2 SHOWERR=${3} - CTRL=${CTRL:=$CONTROLLER} HASERR=0 REDIRECT="2> $TMPMSG" [[ -z "$STATE" || "$STATE" = '.' ]] && return 0 @@ -540,15 +536,16 @@ set_ctrl_state() for CMD in $(echo $STATE | sed -e "s/:/ /g") do TFILE=$CGRP/cgroup.procs - SFILE=$CGRP/cgroup.subtree_control PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - case $CMD in - S*) PREFIX=${CMD#?} - COMM="echo ${PREFIX}${CTRL} > $SFILE" + + # Enable cpuset controller if not enabled yet + [[ -f $CFILE ]] || { + COMM="echo +cpuset > $CGRP/../cgroup.subtree_control" eval $COMM $REDIRECT - ;; + } + case $CMD in X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" @@ -947,7 +944,6 @@ check_test_results() run_state_test() { TEST=$1 - CONTROLLER=cpuset CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 @@ -1003,7 +999,6 @@ run_state_test() run_remote_state_test() { TEST=$1 - CONTROLLER=cpuset [[ -d rtest ]] || mkdir rtest cd rtest echo +cpuset > cgroup.subtree_control From 3bfe47967191f42d17510713b31a47d9284b8c5a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:16 -0500 Subject: [PATCH 211/828] cgroup/cpuset: Move housekeeping_update()/rebuild_sched_domains() together With the latest changes in sched/isolation.c, rebuild_sched_domains*() requires the HK_TYPE_DOMAIN housekeeping cpumask to be properly updated first, if needed, before the sched domains can be rebuilt. So the two naturally fit together. Do that by creating a new update_hk_sched_domains() helper to house both actions. The name of the isolated_cpus_updating flag to control the call to housekeeping_update() is now outdated. So change it to update_housekeeping to better reflect its purpose. Also move the call to update_hk_sched_domains() to the end of cpuset and hotplug operations before releasing the cpuset_mutex. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 51 ++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index edc67c6aa553..14b07a283a2c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -130,10 +130,9 @@ static cpumask_var_t subpartitions_cpus; /* RWCS */ static cpumask_var_t isolated_cpus; /* CSCB */ /* - * Set if isolated_cpus is being updated in the current cpuset_mutex - * critical section. + * Set if housekeeping cpumasks are to be updated. */ -static bool isolated_cpus_updating; /* RWCS */ +static bool update_housekeeping; /* RWCS */ /* * A flag to force sched domain rebuild at the end of an operation. @@ -1189,7 +1188,7 @@ static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus return; cpumask_andnot(isolated_cpus, isolated_cpus, xcpus); } - isolated_cpus_updating = true; + update_housekeeping = true; } /* @@ -1307,22 +1306,22 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus) } /* - * update_isolation_cpumasks - Update external isolation related CPU masks + * update_hk_sched_domains - Update HK cpumasks & rebuild sched domains * - * The following external CPU masks will be updated if necessary: - * - workqueue unbound cpumask + * Update housekeeping cpumasks and rebuild sched domains if necessary. + * This should be called at the end of cpuset or hotplug actions. */ -static void update_isolation_cpumasks(void) +static void update_hk_sched_domains(void) { - int ret; - - if (!isolated_cpus_updating) - return; - - ret = housekeeping_update(isolated_cpus); - WARN_ON_ONCE(ret < 0); - - isolated_cpus_updating = false; + if (update_housekeeping) { + /* Updating HK cpumasks implies rebuild sched domains */ + WARN_ON_ONCE(housekeeping_update(isolated_cpus)); + update_housekeeping = false; + force_sd_rebuild = true; + } + /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ + if (force_sd_rebuild) + rebuild_sched_domains_locked(); } /** @@ -1473,7 +1472,6 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, cs->remote_partition = true; cpumask_copy(cs->effective_xcpus, tmp->new_cpus); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); cpuset_force_rebuild(); cs->prs_err = 0; @@ -1518,7 +1516,6 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) compute_excpus(cs, cs->effective_xcpus); reset_partition_data(cs); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); cpuset_force_rebuild(); /* @@ -1589,7 +1586,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus, if (xcpus) cpumask_copy(cs->exclusive_cpus, xcpus); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); if (adding || deleting) cpuset_force_rebuild(); @@ -1933,7 +1929,6 @@ write_error: partition_xcpus_add(new_prs, parent, tmp->delmask); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); if ((old_prs != new_prs) && (cmd == partcmd_update)) update_partition_exclusive_flag(cs, new_prs); @@ -2901,7 +2896,6 @@ out: else if (isolcpus_updated) isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); /* Force update if switching back to member & update effective_xcpus */ update_cpumasks_hier(cs, &tmpmask, !new_prs); @@ -3191,9 +3185,8 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of, } free_cpuset(trialcs); - if (force_sd_rebuild) - rebuild_sched_domains_locked(); out_unlock: + update_hk_sched_domains(); cpuset_full_unlock(); if (of_cft(of)->private == FILE_MEMLIST) schedule_flush_migrate_mm(); @@ -3301,6 +3294,7 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, cpuset_full_lock(); if (is_cpuset_online(cs)) retval = update_prstate(cs, val); + update_hk_sched_domains(); cpuset_full_unlock(); return retval ?: nbytes; } @@ -3475,6 +3469,7 @@ static void cpuset_css_killed(struct cgroup_subsys_state *css) /* Reset valid partition back to member */ if (is_partition_valid(cs)) update_prstate(cs, PRS_MEMBER); + update_hk_sched_domains(); cpuset_full_unlock(); } @@ -3882,10 +3877,12 @@ static void cpuset_handle_hotplug(void) rcu_read_unlock(); } - /* rebuild sched domains if necessary */ - if (force_sd_rebuild) - rebuild_sched_domains_cpuslocked(); + if (update_housekeeping || force_sd_rebuild) { + mutex_lock(&cpuset_mutex); + update_hk_sched_domains(); + mutex_unlock(&cpuset_mutex); + } free_tmpmasks(ptmp); } From 6df415aa46ec10d607da5063d88492a7c7762074 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:17 -0500 Subject: [PATCH 212/828] cgroup/cpuset: Defer housekeeping_update() calls from CPU hotplug to workqueue The cpuset_handle_hotplug() may need to invoke housekeeping_update(), for instance, when an isolated partition is invalidated because its last active CPU has been put offline. As we are going to enable dynamic update to the nozh_full housekeeping cpumask (HK_TYPE_KERNEL_NOISE) soon with the help of CPU hotplug, allowing the CPU hotplug path to call into housekeeping_update() directly from update_isolation_cpumasks() will likely cause deadlock. So we have to defer any call to housekeeping_update() after the CPU hotplug operation has finished. This is now done via the workqueue where the update_hk_sched_domains() function will be invoked via the hk_sd_workfn(). An concurrent cpuset control file write may have executed the required update_hk_sched_domains() function before the work function is called. So the work function call may become a no-op when it is invoked. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 31 ++++++++++++++++--- .../selftests/cgroup/test_cpuset_prs.sh | 11 ++++++- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 14b07a283a2c..aa915e9b588f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1324,6 +1324,16 @@ static void update_hk_sched_domains(void) rebuild_sched_domains_locked(); } +/* + * Work function to invoke update_hk_sched_domains() + */ +static void hk_sd_workfn(struct work_struct *work) +{ + cpuset_full_lock(); + update_hk_sched_domains(); + cpuset_full_unlock(); +} + /** * rm_siblings_excl_cpus - Remove exclusive CPUs that are used by sibling cpusets * @parent: Parent cpuset containing all siblings @@ -3796,6 +3806,7 @@ unlock: */ static void cpuset_handle_hotplug(void) { + static DECLARE_WORK(hk_sd_work, hk_sd_workfn); static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; @@ -3878,11 +3889,21 @@ static void cpuset_handle_hotplug(void) } - if (update_housekeeping || force_sd_rebuild) { - mutex_lock(&cpuset_mutex); - update_hk_sched_domains(); - mutex_unlock(&cpuset_mutex); - } + /* + * Queue a work to call housekeeping_update() & rebuild_sched_domains() + * There will be a slight delay before the HK_TYPE_DOMAIN housekeeping + * cpumask can correctly reflect what is in isolated_cpus. + * + * We rely on WORK_STRUCT_PENDING_BIT to not requeue a work item that + * is still pending. Before the pending bit is cleared, the work data + * is copied out and work item dequeued. So it is possible to queue + * the work again before the hk_sd_workfn() is invoked to process the + * previously queued work. Since hk_sd_workfn() doesn't use the work + * item at all, this is not a problem. + */ + if (update_housekeeping || force_sd_rebuild) + queue_work(system_unbound_wq, &hk_sd_work); + free_tmpmasks(ptmp); } diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index ef16f5610d0b..a56f4153c64d 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -245,6 +245,9 @@ TEST_MATRIX=( " C2-3:P1 C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" " C2-3:P1 C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" " C2-3:P1 C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + " C2-3:P1 C3:P2 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-2" + " C1-3:P1 C3:P2 . . . T:O3=0 . . 0 A1:1-2|A2:1-2 A1:P1|A2:P-2 3|" + " C1-3:P1 C3:P2 . . . T:O3=0 O3=1 . 0 A1:1-2|A2:3 A1:P1|A2:P2 3" "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" @@ -761,7 +764,7 @@ check_cgroup_states() # only CPUs in isolated partitions as well as those that are isolated at # boot time. # -# $1 - expected isolated cpu list(s) {,} +# $1 - expected isolated cpu list(s) {|} # - expected sched/domains value # - cpuset.cpus.isolated value = if not defined # @@ -770,6 +773,7 @@ check_isolcpus() EXPECTED_ISOLCPUS=$1 ISCPUS=${CGROUP2}/cpuset.cpus.isolated ISOLCPUS=$(cat $ISCPUS) + HKICPUS=$(cat /sys/devices/system/cpu/isolated) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains if [[ $EXPECTED_ISOLCPUS = . ]] @@ -807,6 +811,11 @@ check_isolcpus() ISOLCPUS= EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN + # + # The inverse of HK_TYPE_DOMAIN cpumask in $HKICPUS should match $ISOLCPUS + # + [[ "$ISOLCPUS" != "$HKICPUS" ]] && return 1 + # # Use the sched domain in debugfs to check isolated CPUs, if available # From a84097e625f2b9e7f273161c004f34b7be63b348 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Feb 2026 13:54:18 -0500 Subject: [PATCH 213/828] cgroup/cpuset: Call housekeeping_update() without holding cpus_read_lock The current cpuset partition code is able to dynamically update the sched domains of a running system and the corresponding HK_TYPE_DOMAIN housekeeping cpumask to perform what is essentially the "isolcpus=domain,..." boot command line feature at run time. The housekeeping cpumask update requires flushing a number of different workqueues which may not be safe with cpus_read_lock() held as the workqueue flushing code may acquire cpus_read_lock() or acquiring locks which have locking dependency with cpus_read_lock() down the chain. Below is an example of such circular locking problem. ====================================================== WARNING: possible circular locking dependency detected 6.18.0-test+ #2 Tainted: G S ------------------------------------------------------ test_cpuset_prs/10971 is trying to acquire lock: ffff888112ba4958 ((wq_completion)sync_wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x7a/0x180 but task is already holding lock: ffffffffae47f450 (cpuset_mutex){+.+.}-{4:4}, at: cpuset_partition_write+0x85/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (cpuset_mutex){+.+.}-{4:4}: -> #3 (cpu_hotplug_lock){++++}-{0:0}: -> #2 (rtnl_mutex){+.+.}-{4:4}: -> #1 ((work_completion)(&arg.work)){+.+.}-{0:0}: -> #0 ((wq_completion)sync_wq){+.+.}-{0:0}: Chain exists of: (wq_completion)sync_wq --> cpu_hotplug_lock --> cpuset_mutex 5 locks held by test_cpuset_prs/10971: #0: ffff88816810e440 (sb_writers#7){.+.+}-{0:0}, at: ksys_write+0xf9/0x1d0 #1: ffff8891ab620890 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0x260/0x5f0 #2: ffff8890a78b83e8 (kn->active#187){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x2b6/0x5f0 #3: ffffffffadf32900 (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x77/0x130 #4: ffffffffae47f450 (cpuset_mutex){+.+.}-{4:4}, at: cpuset_partition_write+0x85/0x130 Call Trace: : touch_wq_lockdep_map+0x93/0x180 __flush_workqueue+0x111/0x10b0 housekeeping_update+0x12d/0x2d0 update_parent_effective_cpumask+0x595/0x2440 update_prstate+0x89d/0xce0 cpuset_partition_write+0xc5/0x130 cgroup_file_write+0x1a5/0x680 kernfs_fop_write_iter+0x3df/0x5f0 vfs_write+0x525/0xfd0 ksys_write+0xf9/0x1d0 do_syscall_64+0x95/0x520 entry_SYSCALL_64_after_hwframe+0x76/0x7e To avoid such a circular locking dependency problem, we have to call housekeeping_update() without holding the cpus_read_lock() and cpuset_mutex. The current set of wq's flushed by housekeeping_update() may not have work functions that call cpus_read_lock() directly, but we are likely to extend the list of wq's that are flushed in the future. Moreover, the current set of work functions may hold locks that may have cpu_hotplug_lock down the dependency chain. So housekeeping_update() is now called after releasing cpus_read_lock and cpuset_mutex at the end of a cpuset operation. These two locks are then re-acquired later before calling rebuild_sched_domains_locked(). To enable mutual exclusion between the housekeeping_update() call and other cpuset control file write actions, a new top level cpuset_top_mutex is introduced. This new mutex will be acquired first to allow sharing variables used by both code paths. However, cpuset update from CPU hotplug can still happen in parallel with the housekeeping_update() call, though that should be rare in production environment. As cpus_read_lock() is now no longer held when tmigr_isolated_exclude_cpumask() is called, it needs to acquire it directly. The lockdep_is_cpuset_held() is also updated to return true if either cpuset_top_mutex or cpuset_mutex is held. Fixes: 03ff73510169 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 47 +++++++++++++++++++++++++++++++---- kernel/sched/isolation.c | 4 +-- kernel/time/timer_migration.c | 4 +-- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index aa915e9b588f..aa45351c6f38 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -65,14 +65,28 @@ static const char * const perr_strings[] = { * CPUSET Locking Convention * ------------------------- * - * Below are the three global locks guarding cpuset structures in lock + * Below are the four global/local locks guarding cpuset structures in lock * acquisition order: + * - cpuset_top_mutex * - cpu_hotplug_lock (cpus_read_lock/cpus_write_lock) * - cpuset_mutex * - callback_lock (raw spinlock) * - * A task must hold all the three locks to modify externally visible or - * used fields of cpusets, though some of the internally used cpuset fields + * As cpuset will now indirectly flush a number of different workqueues in + * housekeeping_update() to update housekeeping cpumasks when the set of + * isolated CPUs is going to be changed, it may be vulnerable to deadlock + * if we hold cpus_read_lock while calling into housekeeping_update(). + * + * The first cpuset_top_mutex will be held except when calling into + * cpuset_handle_hotplug() from the CPU hotplug code where cpus_write_lock + * and cpuset_mutex will be held instead. The main purpose of this mutex + * is to prevent regular cpuset control file write actions from interfering + * with the call to housekeeping_update(), though CPU hotplug operation can + * still happen in parallel. This mutex also provides protection for some + * internal variables. + * + * A task must hold all the remaining three locks to modify externally visible + * or used fields of cpusets, though some of the internally used cpuset fields * and internal variables can be modified without holding callback_lock. If only * reliable read access of the externally used fields are needed, a task can * hold either cpuset_mutex or callback_lock which are exposed to other @@ -100,6 +114,7 @@ static const char * const perr_strings[] = { * cpumasks and nodemasks. */ +static DEFINE_MUTEX(cpuset_top_mutex); static DEFINE_MUTEX(cpuset_mutex); /* @@ -111,6 +126,8 @@ static DEFINE_MUTEX(cpuset_mutex); * * CSCB: Readable by holding either cpuset_mutex or callback_lock. Writable * by holding both cpuset_mutex and callback_lock. + * + * T: Read/write-able by holding the cpuset_top_mutex. */ /* @@ -134,6 +151,11 @@ static cpumask_var_t isolated_cpus; /* CSCB */ */ static bool update_housekeeping; /* RWCS */ +/* + * Copy of isolated_cpus to be passed to housekeeping_update() + */ +static cpumask_var_t isolated_hk_cpus; /* T */ + /* * A flag to force sched domain rebuild at the end of an operation. * It can be set in @@ -297,6 +319,7 @@ void lockdep_assert_cpuset_lock_held(void) */ void cpuset_full_lock(void) { + mutex_lock(&cpuset_top_mutex); cpus_read_lock(); mutex_lock(&cpuset_mutex); } @@ -305,12 +328,14 @@ void cpuset_full_unlock(void) { mutex_unlock(&cpuset_mutex); cpus_read_unlock(); + mutex_unlock(&cpuset_top_mutex); } #ifdef CONFIG_LOCKDEP bool lockdep_is_cpuset_held(void) { - return lockdep_is_held(&cpuset_mutex); + return lockdep_is_held(&cpuset_mutex) || + lockdep_is_held(&cpuset_top_mutex); } #endif @@ -1315,9 +1340,20 @@ static void update_hk_sched_domains(void) { if (update_housekeeping) { /* Updating HK cpumasks implies rebuild sched domains */ - WARN_ON_ONCE(housekeeping_update(isolated_cpus)); update_housekeeping = false; force_sd_rebuild = true; + cpumask_copy(isolated_hk_cpus, isolated_cpus); + + /* + * housekeeping_update() is now called without holding + * cpus_read_lock and cpuset_mutex. Only cpuset_top_mutex + * is still being held for mutual exclusion. + */ + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); + WARN_ON_ONCE(housekeeping_update(isolated_hk_cpus)); + cpus_read_lock(); + mutex_lock(&cpuset_mutex); } /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ if (force_sd_rebuild) @@ -3635,6 +3671,7 @@ int __init cpuset_init(void) BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL)); + BUG_ON(!zalloc_cpumask_var(&isolated_hk_cpus, GFP_KERNEL)); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 3b725d39c06e..ef152d401fe2 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -123,8 +123,6 @@ int housekeeping_update(struct cpumask *isol_mask) struct cpumask *trial, *old = NULL; int err; - lockdep_assert_cpus_held(); - trial = kmalloc(cpumask_size(), GFP_KERNEL); if (!trial) return -ENOMEM; @@ -136,7 +134,7 @@ int housekeeping_update(struct cpumask *isol_mask) } if (!housekeeping.flags) - static_branch_enable_cpuslocked(&housekeeping_overridden); + static_branch_enable(&housekeeping_overridden); if (housekeeping.flags & HK_FLAG_DOMAIN) old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN); diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 6da9cd562b20..83428aa03aef 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -1559,8 +1559,6 @@ int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask) cpumask_var_t cpumask __free(free_cpumask_var) = CPUMASK_VAR_NULL; int cpu; - lockdep_assert_cpus_held(); - if (!works) return -ENOMEM; if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) @@ -1570,6 +1568,7 @@ int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask) * First set previously isolated CPUs as available (unisolate). * This cpumask contains only CPUs that switched to available now. */ + guard(cpus_read_lock)(); cpumask_andnot(cpumask, cpu_online_mask, exclude_cpumask); cpumask_andnot(cpumask, cpumask, tmigr_available_cpumask); @@ -1626,7 +1625,6 @@ static int __init tmigr_init_isolation(void) cpumask_andnot(cpumask, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); /* Protect against RCU torture hotplug testing */ - guard(cpus_read_lock)(); return tmigr_isolated_exclude_cpumask(cpumask); } late_initcall(tmigr_init_isolation); From 7cff9a40c6b0f72ccefdaf0ffe03cfac30348f51 Mon Sep 17 00:00:00 2001 From: Mariusz Skamra Date: Thu, 12 Feb 2026 14:46:46 +0100 Subject: [PATCH 214/828] Bluetooth: Fix CIS host feature condition This fixes the condition for sending the LE Set Host Feature command. The command is sent to indicate host support for Connected Isochronous Streams in this case. It has been observed that the system could not initialize BIS-only capable controllers because the controllers do not support the command. As per Core v6.2 | Vol 4, Part E, Table 3.1 the command shall be supported if CIS Central or CIS Peripheral is supported; otherwise, the command is optional. Fixes: 709788b154ca ("Bluetooth: hci_core: Fix using {cis,bis}_capable for current settings") Cc: stable@vger.kernel.org Signed-off-by: Mariusz Skamra Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index f04a90bce4a9..0b0dc0965f5a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4592,7 +4592,7 @@ static int hci_le_set_host_features_sync(struct hci_dev *hdev) { int err; - if (iso_capable(hdev)) { + if (cis_capable(hdev)) { /* Connected Isochronous Channels (Host Support) */ err = hci_le_set_host_feature_sync(hdev, 32, (iso_enabled(hdev) ? 0x01 : From a8d1d73c81d1e70d2aa49fdaf59d933bb783ffe5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 17 Feb 2026 13:29:43 -0500 Subject: [PATCH 215/828] Bluetooth: L2CAP: Fix not checking output MTU is acceptable on L2CAP_ECRED_CONN_REQ Upon receiving L2CAP_ECRED_CONN_REQ the given MTU shall be checked against the suggested MTU of the listening socket as that is required by the likes of PTS L2CAP/ECFC/BV-27-C test which expects L2CAP_CR_LE_UNACCEPT_PARAMS if the MTU is lowers than socket omtu. In order to be able to set chan->omtu the code now allows setting setsockopt(BT_SNDMTU), but it is only allowed when connection has not been stablished since there is no procedure to reconfigure the output MTU. Link: https://github.com/bluez/bluez/issues/1895 Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 8 ++++++++ net/bluetooth/l2cap_sock.c | 15 +++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9452c6179acb..90676ca0e92b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5117,6 +5117,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto unlock; } + /* Check if the listening channel has set an output MTU then the + * requested MTU shall be less than or equal to that value. + */ + if (pchan->omtu && mtu < pchan->omtu) { + result = L2CAP_CR_LE_UNACCEPT_PARAMS; + goto unlock; + } + result = L2CAP_CR_LE_SUCCESS; for (i = 0; i < num_scid; i++) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 62ceda979f39..477b4d145459 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1029,10 +1029,17 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - /* Setting is not supported as it's the remote side that - * decides this. - */ - err = -EPERM; + /* Only allow setting output MTU when not connected */ + if (sk->sk_state == BT_CONNECTED) { + err = -EISCONN; + break; + } + + err = copy_safe_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) + break; + + chan->omtu = mtu; break; case BT_RCVMTU: From 138d7eca445ef37a0333425d269ee59900ca1104 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 13 Feb 2026 13:33:33 -0500 Subject: [PATCH 216/828] Bluetooth: L2CAP: Fix missing key size check for L2CAP_LE_CONN_REQ This adds a check for encryption key size upon receiving L2CAP_LE_CONN_REQ which is required by L2CAP/LE/CFC/BV-15-C which expects L2CAP_CR_LE_BAD_KEY_SIZE. Link: https://lore.kernel.org/linux-bluetooth/5782243.rdbgypaU67@n9w6sw14/ Fixes: 27e2d4c8d28b ("Bluetooth: Add basic LE L2CAP connect request receiving support") Signed-off-by: Luiz Augusto von Dentz Tested-by: Christian Eggers --- net/bluetooth/l2cap_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 90676ca0e92b..2dcc5bb907b8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4916,6 +4916,13 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response_unlock; } + /* Check if Key Size is sufficient for the security level */ + if (!l2cap_check_enc_key_size(conn->hcon, pchan)) { + result = L2CAP_CR_LE_BAD_KEY_SIZE; + chan = NULL; + goto response_unlock; + } + /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { result = L2CAP_CR_LE_INVALID_SCID; From 1bfd7575092420ba5a0b944953c95b74a5646ff8 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 19 Feb 2026 23:35:18 +0000 Subject: [PATCH 217/828] drm/xe/sync: Cleanup partially initialized sync on parse failure xe_sync_entry_parse() can allocate references (syncobj, fence, chain fence, or user fence) before hitting a later failure path. Several of those paths returned directly, leaving partially initialized state and leaking refs. Route these error paths through a common free_sync label and call xe_sync_entry_cleanup(sync) before returning the error. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260219233516.2938172-5-shuicheng.lin@intel.com (cherry picked from commit f939bdd9207a5d1fc55cced5459858480686ce22) Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index eb136390dafd..ebf6c96d7a41 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (!signal) { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } } break; @@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (signal) { sync->chain_fence = dma_fence_chain_alloc(); - if (!sync->chain_fence) - return -ENOMEM; + if (!sync->chain_fence) { + err = -ENOMEM; + goto free_sync; + } } else { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } err = dma_fence_chain_find_seqno(&sync->fence, sync_in.timeline_value); if (err) - return err; + goto free_sync; } break; @@ -216,6 +222,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, sync->timeline_value = sync_in.timeline_value; return 0; + +free_sync: + xe_sync_entry_cleanup(sync); + return err; } ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO); From 0879c3f04f67e2a1677c25dcc24669ce21eb6a6c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 19 Feb 2026 23:35:19 +0000 Subject: [PATCH 218/828] drm/xe/sync: Fix user fence leak on alloc failure When dma_fence_chain_alloc() fails, properly release the user fence reference to prevent a memory leak. Fixes: 0995c2fc39b0 ("drm/xe: Enforce correct user fence signaling order using") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260219233516.2938172-6-shuicheng.lin@intel.com (cherry picked from commit a5d5634cde48a9fcd68c8504aa07f89f175074a0) Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index ebf6c96d7a41..24d6d9af20d6 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -206,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); sync->ufence_chain_fence = dma_fence_chain_alloc(); - if (!sync->ufence_chain_fence) - return -ENOMEM; + if (!sync->ufence_chain_fence) { + err = -ENOMEM; + goto free_sync; + } sync->ufence_syncobj = ufence_syncobj; } From 96a7b71c4438d3b72d6c95e3efdc9e8e8aee6b78 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 23 Feb 2026 13:43:45 -0800 Subject: [PATCH 219/828] ubd: Use pointer-to-pointers for io_thread_req arrays Having an unbounded array for irq_req_buffer and io_req_buffer doesn't provide any bounds safety, and confuses the needed allocation type, which is returning a pointer to pointers. Instead of the implicit cast, switch the variable types. Reported-by: Nathan Chancellor Reported-by: Guenter Roeck Closes: https://lore.kernel.org/all/b04b6c13-7d0e-4a89-9e68-b572b6c686ac@roeck-us.net Fixes: 69050f8d6d07 ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types") Acked-by: Richard Weinberger Link: https://patch.msgid.link/20260223214341.work.846-kees@kernel.org Signed-off-by: Kees Cook --- arch/um/drivers/ubd_kern.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 012b2bcaa8a0..20fc33300a95 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -69,11 +69,11 @@ struct io_thread_req { }; -static struct io_thread_req * (*irq_req_buffer)[]; +static struct io_thread_req **irq_req_buffer; static struct io_thread_req *irq_remainder; static int irq_remainder_size; -static struct io_thread_req * (*io_req_buffer)[]; +static struct io_thread_req **io_req_buffer; static struct io_thread_req *io_remainder; static int io_remainder_size; @@ -398,7 +398,7 @@ static int thread_fd = -1; static int bulk_req_safe_read( int fd, - struct io_thread_req * (*request_buffer)[], + struct io_thread_req **request_buffer, struct io_thread_req **remainder, int *remainder_size, int max_recs @@ -465,7 +465,7 @@ static irqreturn_t ubd_intr(int irq, void *dev) &irq_remainder, &irq_remainder_size, UBD_REQ_BUFFER_SIZE)) >= 0) { for (i = 0; i < len / sizeof(struct io_thread_req *); i++) - ubd_end_request((*irq_req_buffer)[i]); + ubd_end_request(irq_req_buffer[i]); } if (len < 0 && len != -EAGAIN) @@ -1512,7 +1512,7 @@ void *io_thread(void *arg) } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - struct io_thread_req *req = (*io_req_buffer)[count]; + struct io_thread_req *req = io_req_buffer[count]; int i; io_count++; From f709859f331b8fbd7fede5769d668008fc5ba789 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 23 Feb 2026 11:23:12 -0700 Subject: [PATCH 220/828] init/Kconfig: Adjust fixed clang version for __builtin_counted_by_ref Commit d39a1d7486d9 ("compiler_types: Disable __builtin_counted_by_ref for Clang") used 22.0.0 as the fixed version for a compiler crash but the fix was only merged in main (23.0.0) and release/22.x (22.1.0). With the current fixed version number, prerelease or Android LLVM 22 builds will still be able to hit the compiler crash when building the kernel. This can be particularly disruptive when bisecting LLVM. Use 21.1.0 as the fixed version number to ensure the fix for this crash is always present. Fixes: d39a1d7486d9 ("compiler_types: Disable __builtin_counted_by_ref for Clang") Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20260223-fix-clang-version-builtin-counted-by-ref-v1-1-3ea478a24f0a@kernel.org Signed-off-by: Kees Cook --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index c25869cf59c1..b55deae9256c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -153,7 +153,7 @@ config CC_HAS_COUNTED_BY_PTR config CC_HAS_BROKEN_COUNTED_BY_REF bool # https://github.com/llvm/llvm-project/issues/182575 - default y if CC_IS_CLANG && CLANG_VERSION < 220000 + default y if CC_IS_CLANG && CLANG_VERSION < 220100 config CC_HAS_MULTIDIMENSIONAL_NONSTRING def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) From eddb98ad9364b4e778768785d46cfab04ce52100 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Feb 2026 13:43:00 +0900 Subject: [PATCH 221/828] ata: libata-eh: correctly handle deferred qc timeouts A deferred qc may timeout while waiting for the device queue to drain to be submitted. In such case, since the qc is not active, ata_scsi_cmd_error_handler() ends up calling scsi_eh_finish_cmd(), which frees the qc. But as the port deferred_qc field still references this finished/freed qc, the deferred qc work may eventually attempt to call ata_qc_issue() against this invalid qc, leading to errors such as reported by UBSAN (syzbot run): UBSAN: shift-out-of-bounds in drivers/ata/libata-core.c:5166:24 shift exponent 4210818301 is too large for 64-bit type 'long long unsigned int' ... Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120 ubsan_epilogue+0xa/0x30 lib/ubsan.c:233 __ubsan_handle_shift_out_of_bounds+0x279/0x2a0 lib/ubsan.c:494 ata_qc_issue.cold+0x38/0x9f drivers/ata/libata-core.c:5166 ata_scsi_deferred_qc_work+0x154/0x1f0 drivers/ata/libata-scsi.c:1679 process_one_work+0x9d7/0x1920 kernel/workqueue.c:3275 process_scheduled_works kernel/workqueue.c:3358 [inline] worker_thread+0x5da/0xe40 kernel/workqueue.c:3439 kthread+0x370/0x450 kernel/kthread.c:467 ret_from_fork+0x754/0xd80 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fix this by checking if the qc of a timed out SCSI command is a deferred one, and in such case, clear the port deferred_qc field and finish the SCSI command with DID_TIME_OUT. Reported-by: syzbot+1f77b8ca15336fff21ff@syzkaller.appspotmail.com Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Igor Pylypiv --- drivers/ata/libata-eh.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 72a22b6c9682..b373cceb95d2 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -640,12 +640,28 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, set_host_byte(scmd, DID_OK); ata_qc_for_each_raw(ap, qc, i) { - if (qc->flags & ATA_QCFLAG_ACTIVE && - qc->scsicmd == scmd) + if (qc->scsicmd != scmd) + continue; + if ((qc->flags & ATA_QCFLAG_ACTIVE) || + qc == ap->deferred_qc) break; } - if (i < ATA_MAX_QUEUE) { + if (qc == ap->deferred_qc) { + /* + * This is a deferred command that timed out while + * waiting for the command queue to drain. Since the qc + * is not active yet (deferred_qc is still set, so the + * deferred qc work has not issued the command yet), + * simply signal the timeout by finishing the SCSI + * command and clear the deferred qc to prevent the + * deferred qc work from issuing this qc. + */ + WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); + ap->deferred_qc = NULL; + set_host_byte(scmd, DID_TIME_OUT); + scsi_eh_finish_cmd(scmd, &ap->eh_done_q); + } else if (i < ATA_MAX_QUEUE) { /* the scmd has an associated qc */ if (!(qc->flags & ATA_QCFLAG_EH)) { /* which hasn't failed yet, timeout */ From 55db009926634b20955bd8abbee921adbc8d2cb4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Feb 2026 12:09:12 +0900 Subject: [PATCH 222/828] ata: libata-core: fix cancellation of a port deferred qc work cancel_work_sync() is a sleeping function so it cannot be called with the spin lock of a port being held. Move the call to this function in ata_port_detach() after EH completes, with the port lock released, together with other work cancellation calls. Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Igor Pylypiv --- drivers/ata/libata-core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 11909417f017..ccbf320524da 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6269,10 +6269,6 @@ static void ata_port_detach(struct ata_port *ap) } } - /* Make sure the deferred qc work finished. */ - cancel_work_sync(&ap->deferred_qc_work); - WARN_ON(ap->deferred_qc); - /* Tell EH to disable all devices */ ap->pflags |= ATA_PFLAG_UNLOADING; ata_port_schedule_eh(ap); @@ -6283,9 +6279,11 @@ static void ata_port_detach(struct ata_port *ap) /* wait till EH commits suicide */ ata_port_wait_eh(ap); - /* it better be dead now */ + /* It better be dead now and not have any remaining deferred qc. */ WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); + WARN_ON(ap->deferred_qc); + cancel_work_sync(&ap->deferred_qc_work); cancel_delayed_work_sync(&ap->hotplug_task); cancel_delayed_work_sync(&ap->scsi_rescan_task); From 41e09ec73d7431dffda01b1e7208a272a5c90fb9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Feb 2026 17:18:58 -0800 Subject: [PATCH 223/828] MAINTAINERS: include all of framer under pef2256 The "framer" infrastructure only has one driver - pef2256 and is not covered by any MAINTAINERS entry of its own. This leads to author not being CCed on patches. Let's include all of framer/ under the pef2256 entry. We can split it in the very unlikely event of another driver appearing. Link: https://lore.kernel.org/aZefB5f3EAkQQM1m@google.com Acked-by: Herve Codina Link: https://patch.msgid.link/20260221011858.3403605-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c00b5f5d4203..e1a6c6cc2b7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14410,9 +14410,9 @@ LANTIQ PEF2256 DRIVER M: Herve Codina S: Maintained F: Documentation/devicetree/bindings/net/lantiq,pef2256.yaml -F: drivers/net/wan/framer/pef2256/ +F: drivers/net/wan/framer/ F: drivers/pinctrl/pinctrl-pef2256.c -F: include/linux/framer/pef2256.h +F: include/linux/framer/ LASI 53c700 driver for PARISC M: "James E.J. Bottomley" From 8a8a9fac9efa6423fd74938b940cb7d731780718 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Feb 2026 22:26:05 +0000 Subject: [PATCH 224/828] net: do not pass flow_id to set_rps_cpu() Blamed commit made the assumption that the RPS table for each receive queue would have the same size, and that it would not change. Compute flow_id in set_rps_cpu(), do not assume we can use the value computed by get_rps_cpu(). Otherwise we risk out-of-bound access and/or crashes. Fixes: 48aa30443e52 ("net: Cache hash and flow_id to avoid recalculation") Signed-off-by: Eric Dumazet Cc: Krishna Kumar Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260220222605.3468081-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 096b3ff13f6b..f3426385f1ba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4992,8 +4992,7 @@ static bool rps_flow_is_active(struct rps_dev_flow *rflow, static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, - struct rps_dev_flow *rflow, u16 next_cpu, u32 hash, - u32 flow_id) + struct rps_dev_flow *rflow, u16 next_cpu, u32 hash) { if (next_cpu < nr_cpu_ids) { u32 head; @@ -5004,6 +5003,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *tmp_rflow; unsigned int tmp_cpu; u16 rxq_index; + u32 flow_id; int rc; /* Should we steer this flow to a different hardware queue? */ @@ -5019,6 +5019,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (!flow_table) goto out; + flow_id = rfs_slot(hash, flow_table); tmp_rflow = &flow_table->flows[flow_id]; tmp_cpu = READ_ONCE(tmp_rflow->cpu); @@ -5066,7 +5067,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow_table *flow_table; struct rps_map *map; int cpu = -1; - u32 flow_id; u32 tcpu; u32 hash; @@ -5113,8 +5113,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - flow_id = rfs_slot(hash, flow_table); - rflow = &flow_table->flows[flow_id]; + rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; tcpu = rflow->cpu; /* @@ -5133,8 +5132,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; - rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash, - flow_id); + rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash); } if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { From 7bb09315f93dce6acc54bf59e5a95ba7365c2be4 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Fri, 20 Feb 2026 18:40:36 +0900 Subject: [PATCH 225/828] tls: Fix race condition in tls_sw_cancel_work_tx() This issue was discovered during a code audit. After cancel_delayed_work_sync() is called from tls_sk_proto_close(), tx_work_handler() can still be scheduled from paths such as the Delayed ACK handler or ksoftirqd. As a result, the tx_work_handler() worker may dereference a freed TLS object. The following is a simple race scenario: cpu0 cpu1 tls_sk_proto_close() tls_sw_cancel_work_tx() tls_write_space() tls_sw_write_space() if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); cancel_delayed_work_sync(&ctx->tx_work.work); schedule_delayed_work(&tx_ctx->tx_work.work, 0); To prevent this race condition, cancel_delayed_work_sync() is replaced with disable_delayed_work_sync(). Fixes: f87e62d45e51 ("net/tls: remove close callback sock unlock/lock around TX work flush") Signed-off-by: Hyunwoo Kim Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/aZgsFO6nfylfvLE7@v4bel Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9937d4c810f2..b1fa62de9dab 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2533,7 +2533,7 @@ void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); - cancel_delayed_work_sync(&ctx->tx_work.work); + disable_delayed_work_sync(&ctx->tx_work.work); } void tls_sw_release_resources_tx(struct sock *sk) From fb868db5f4bccd7a78219313ab2917429f715cea Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Fri, 20 Feb 2026 13:53:24 -0800 Subject: [PATCH 226/828] gve: fix incorrect buffer cleanup in gve_tx_clean_pending_packets for QPL In DQ-QPL mode, gve_tx_clean_pending_packets() incorrectly uses the RDA buffer cleanup path. It iterates num_bufs times and attempts to unmap entries in the dma array. This leads to two issues: 1. The dma array shares storage with tx_qpl_buf_ids (union). Interpreting buffer IDs as DMA addresses results in attempting to unmap incorrect memory locations. 2. num_bufs in QPL mode (counting 2K chunks) can significantly exceed the size of the dma array, causing out-of-bounds access warnings (trace below is how we noticed this issue). UBSAN: array-index-out-of-bounds in drivers/net/ethernet/drivers/net/ethernet/google/gve/gve_tx_dqo.c:178:5 index 18 is out of range for type 'dma_addr_t[18]' (aka 'unsigned long long[18]') Workqueue: gve gve_service_task [gve] Call Trace: dump_stack_lvl+0x33/0xa0 __ubsan_handle_out_of_bounds+0xdc/0x110 gve_tx_stop_ring_dqo+0x182/0x200 [gve] gve_close+0x1be/0x450 [gve] gve_reset+0x99/0x120 [gve] gve_service_task+0x61/0x100 [gve] process_scheduled_works+0x1e9/0x380 Fix this by properly checking for QPL mode and delegating to gve_free_tx_qpl_bufs() to reclaim the buffers. Cc: stable@vger.kernel.org Fixes: a6fb8d5a8b69 ("gve: Tx path for DQO-QPL") Signed-off-by: Ankit Garg Reviewed-by: Jordan Rhee Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260220215324.1631350-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 54 +++++++++----------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 28e85730f785..b57e8f13cb51 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -167,6 +167,25 @@ gve_free_pending_packet(struct gve_tx_ring *tx, } } +static void gve_unmap_packet(struct device *dev, + struct gve_tx_pending_packet_dqo *pkt) +{ + int i; + + if (!pkt->num_bufs) + return; + + /* SKB linear portion is guaranteed to be mapped */ + dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), + dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); + for (i = 1; i < pkt->num_bufs; i++) { + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); + } + pkt->num_bufs = 0; +} + /* gve_tx_free_desc - Cleans up all pending tx requests and buffers. */ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) @@ -176,21 +195,12 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) for (i = 0; i < tx->dqo.num_pending_packets; i++) { struct gve_tx_pending_packet_dqo *cur_state = &tx->dqo.pending_packets[i]; - int j; - for (j = 0; j < cur_state->num_bufs; j++) { - if (j == 0) { - dma_unmap_single(tx->dev, - dma_unmap_addr(cur_state, dma[j]), - dma_unmap_len(cur_state, len[j]), - DMA_TO_DEVICE); - } else { - dma_unmap_page(tx->dev, - dma_unmap_addr(cur_state, dma[j]), - dma_unmap_len(cur_state, len[j]), - DMA_TO_DEVICE); - } - } + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, cur_state); + else + gve_unmap_packet(tx->dev, cur_state); + if (cur_state->skb) { dev_consume_skb_any(cur_state->skb); cur_state->skb = NULL; @@ -1157,22 +1167,6 @@ static void remove_from_list(struct gve_tx_ring *tx, } } -static void gve_unmap_packet(struct device *dev, - struct gve_tx_pending_packet_dqo *pkt) -{ - int i; - - /* SKB linear portion is guaranteed to be mapped */ - dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), - dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); - for (i = 1; i < pkt->num_bufs; i++) { - netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), - DMA_TO_DEVICE, 0); - } - pkt->num_bufs = 0; -} - /* Completion types and expected behavior: * No Miss compl + Packet compl = Packet completed normally. * Miss compl + Re-inject compl = Packet completed normally. From ca220141fa8ebae09765a242076b2b77338106b0 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Thu, 19 Feb 2026 09:42:51 +0800 Subject: [PATCH 227/828] kcm: fix zero-frag skb in frag_list on partial sendmsg error Syzkaller reported a warning in kcm_write_msgs() when processing a message with a zero-fragment skb in the frag_list. When kcm_sendmsg() fills MAX_SKB_FRAGS fragments in the current skb, it allocates a new skb (tskb) and links it into the frag_list before copying data. If the copy subsequently fails (e.g. -EFAULT from user memory), tskb remains in the frag_list with zero fragments: head skb (msg being assembled, NOT yet in sk_write_queue) +-----------+ | frags[17] | (MAX_SKB_FRAGS, all filled with data) | frag_list-+--> tskb +-----------+ +----------+ | frags[0] | (empty! copy failed before filling) +----------+ For SOCK_SEQPACKET with partial data already copied, the error path saves this message via partial_message for later completion. For SOCK_SEQPACKET, sock_write_iter() automatically sets MSG_EOR, so a subsequent zero-length write(fd, NULL, 0) completes the message and queues it to sk_write_queue. kcm_write_msgs() then walks the frag_list and hits: WARN_ON(!skb_shinfo(skb)->nr_frags) TCP has a similar pattern where skbs are enqueued before data copy and cleaned up on failure via tcp_remove_empty_skb(). KCM was missing the equivalent cleanup. Fix this by tracking the predecessor skb (frag_prev) when allocating a new frag_list entry. On error, if the tail skb has zero frags, use frag_prev to unlink and free it in O(1) without walking the singly-linked frag_list. frag_prev is safe to dereference because the entire message chain is only held locally (or in kcm->seq_skb) and is not added to sk_write_queue until MSG_EOR, so the send path cannot free it underneath us. Also change the WARN_ON to WARN_ON_ONCE to avoid flooding the log if the condition is somehow hit repeatedly. There are currently no KCM selftests in the kernel tree; a simple reproducer is available at [1]. [1] https://gist.github.com/mrpre/a94d431c757e8d6f168f4dd1a3749daa Reported-by: syzbot+52624bdfbf2746d37d70@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000269a1405a12fdc77@google.com/T/ Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260219014256.370092-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/kcm/kcmsock.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 5dd7e0509a48..3912e75079f5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -628,7 +628,7 @@ retry: skb = txm->frag_skb; } - if (WARN_ON(!skb_shinfo(skb)->nr_frags) || + if (WARN_ON_ONCE(!skb_shinfo(skb)->nr_frags) || WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) { ret = -EINVAL; goto out; @@ -749,7 +749,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); - struct sk_buff *skb = NULL, *head = NULL; + struct sk_buff *skb = NULL, *head = NULL, *frag_prev = NULL; size_t copy, copied = 0; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); int eor = (sock->type == SOCK_DGRAM) ? @@ -824,6 +824,7 @@ start: else skb->next = tskb; + frag_prev = skb; skb = tskb; skb->ip_summed = CHECKSUM_UNNECESSARY; continue; @@ -933,6 +934,22 @@ partial_message: out_error: kcm_push(kcm); + /* When MAX_SKB_FRAGS was reached, a new skb was allocated and + * linked into the frag_list before data copy. If the copy + * subsequently failed, this skb has zero frags. Remove it from + * the frag_list to prevent kcm_write_msgs from later hitting + * WARN_ON(!skb_shinfo(skb)->nr_frags). + */ + if (frag_prev && !skb_shinfo(skb)->nr_frags) { + if (head == frag_prev) + skb_shinfo(head)->frag_list = NULL; + else + frag_prev->next = NULL; + kfree_skb(skb); + /* Update skb as it may be saved in partial_message via goto */ + skb = frag_prev; + } + if (sock->type == SOCK_SEQPACKET) { /* Wrote some bytes before encountering an * error, return partial success. From 4cfe066a82cdf9e83e48b16000f55280efc98325 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 20 Feb 2026 16:57:54 +0100 Subject: [PATCH 228/828] dpll: zl3073x: fix REF_PHASE_OFFSET_COMP register width for some chip IDs The REF_PHASE_OFFSET_COMP register is 48-bit wide on most zl3073x chip variants, but only 32-bit wide on chip IDs 0x0E30, 0x0E93..0x0E97 and 0x1F60. The driver unconditionally uses 48-bit read/write operations, which on 32-bit variants causes reading 2 bytes past the register boundary (corrupting the value) and writing 2 bytes into the adjacent register. Fix this by storing the chip ID in the device structure during probe and adding a helper to detect the affected variants. Use the correct register width for read/write operations and the matching sign extension bit (31 vs 47) when interpreting the phase compensation value. Fixes: 6287262f761e ("dpll: zl3073x: Add support to adjust phase") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260220155755.448185-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 1 + drivers/dpll/zl3073x/core.h | 28 ++++++++++++++++++++++++++++ drivers/dpll/zl3073x/dpll.c | 7 +++++-- drivers/dpll/zl3073x/ref.c | 25 ++++++++++++++++++++----- drivers/dpll/zl3073x/regs.h | 1 + 5 files changed, 55 insertions(+), 7 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 63bd97181b9e..8c5ee28e02f4 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -1026,6 +1026,7 @@ int zl3073x_dev_probe(struct zl3073x_dev *zldev, "Unknown or non-match chip ID: 0x%0x\n", id); } + zldev->chip_id = id; /* Read revision, firmware version and custom config version */ rc = zl3073x_read_u16(zldev, ZL_REG_REVISION, &revision); diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index dddfcacea5c0..fd2af3c62a7d 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -35,6 +35,7 @@ struct zl3073x_dpll; * @dev: pointer to device * @regmap: regmap to access device registers * @multiop_lock: to serialize multiple register operations + * @chip_id: chip ID read from hardware * @ref: array of input references' invariants * @out: array of outs' invariants * @synth: array of synths' invariants @@ -48,6 +49,7 @@ struct zl3073x_dev { struct device *dev; struct regmap *regmap; struct mutex multiop_lock; + u16 chip_id; /* Invariants */ struct zl3073x_ref ref[ZL3073X_NUM_REFS]; @@ -144,6 +146,32 @@ int zl3073x_write_hwreg_seq(struct zl3073x_dev *zldev, int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel); +/** + * zl3073x_dev_is_ref_phase_comp_32bit - check ref phase comp register size + * @zldev: pointer to zl3073x device + * + * Some chip IDs have a 32-bit wide ref_phase_offset_comp register instead + * of the default 48-bit. + * + * Return: true if the register is 32-bit, false if 48-bit + */ +static inline bool +zl3073x_dev_is_ref_phase_comp_32bit(struct zl3073x_dev *zldev) +{ + switch (zldev->chip_id) { + case 0x0E30: + case 0x0E93: + case 0x0E94: + case 0x0E95: + case 0x0E96: + case 0x0E97: + case 0x1F60: + return true; + default: + return false; + } +} + static inline bool zl3073x_is_n_pin(u8 id) { diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 78edc36b17fb..8ffbede117c6 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -475,8 +475,11 @@ zl3073x_dpll_input_pin_phase_adjust_get(const struct dpll_pin *dpll_pin, ref_id = zl3073x_input_pin_ref_get(pin->id); ref = zl3073x_ref_state_get(zldev, ref_id); - /* Perform sign extension for 48bit signed value */ - phase_comp = sign_extend64(ref->phase_comp, 47); + /* Perform sign extension based on register width */ + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) + phase_comp = sign_extend64(ref->phase_comp, 31); + else + phase_comp = sign_extend64(ref->phase_comp, 47); /* Reverse two's complement negation applied during set and convert * to 32bit signed int diff --git a/drivers/dpll/zl3073x/ref.c b/drivers/dpll/zl3073x/ref.c index aa2de13effa8..6b65e6103999 100644 --- a/drivers/dpll/zl3073x/ref.c +++ b/drivers/dpll/zl3073x/ref.c @@ -121,8 +121,16 @@ int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index) return rc; /* Read phase compensation register */ - rc = zl3073x_read_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, - &ref->phase_comp); + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) { + u32 val; + + rc = zl3073x_read_u32(zldev, ZL_REG_REF_PHASE_OFFSET_COMP_32, + &val); + ref->phase_comp = val; + } else { + rc = zl3073x_read_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, + &ref->phase_comp); + } if (rc) return rc; @@ -179,9 +187,16 @@ int zl3073x_ref_state_set(struct zl3073x_dev *zldev, u8 index, if (!rc && dref->sync_ctrl != ref->sync_ctrl) rc = zl3073x_write_u8(zldev, ZL_REG_REF_SYNC_CTRL, ref->sync_ctrl); - if (!rc && dref->phase_comp != ref->phase_comp) - rc = zl3073x_write_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, - ref->phase_comp); + if (!rc && dref->phase_comp != ref->phase_comp) { + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) + rc = zl3073x_write_u32(zldev, + ZL_REG_REF_PHASE_OFFSET_COMP_32, + ref->phase_comp); + else + rc = zl3073x_write_u48(zldev, + ZL_REG_REF_PHASE_OFFSET_COMP, + ref->phase_comp); + } if (rc) return rc; diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index d837bee72b17..5573d7188406 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -194,6 +194,7 @@ #define ZL_REF_CONFIG_DIFF_EN BIT(2) #define ZL_REG_REF_PHASE_OFFSET_COMP ZL_REG(10, 0x28, 6) +#define ZL_REG_REF_PHASE_OFFSET_COMP_32 ZL_REG(10, 0x28, 4) #define ZL_REG_REF_SYNC_CTRL ZL_REG(10, 0x2e, 1) #define ZL_REF_SYNC_CTRL_MODE GENMASK(2, 0) From 3aa677625c8fad39989496c51bcff3872c1f16f1 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Fri, 20 Feb 2026 05:05:41 +0000 Subject: [PATCH 229/828] tipc: fix duplicate publication key in tipc_service_insert_publ() TIPC uses named table to store TIPC services represented by type and instance. Each time an application calls TIPC API bind() to bind a type/instance to a socket, an entry is created and inserted into the named table. It looks like this: named table: key1, entry1 (type, instance ...) key2, entry2 (type, instance ...) In the above table, each entry represents a route for sending data from one socket to the other. For all publications originated from the same node, the key is UNIQUE to identify each entry. It is calculated by this formula: key = socket portid + number of bindings + 1 (1) where: - socket portid: unique and calculated by using linux kernel function get_random_u32_below(). So, the value is randomized. - number of bindings: the number of times a type/instance pair is bound to a socket. This number is linearly increased, starting from 0. While the socket portid is unique and randomized by linux kernel, the linear increment of "number of bindings" in formula (1) makes "key" not unique anymore. For example: - Socket 1 is created with its associated port number 20062001. Type 1000, instance 1 is bound to socket 1: key1: 20062001 + 0 + 1 = 20062002 Then, bind() is called a second time on Socket 1 to by the same type 1000, instance 1: key2: 20062001 + 1 + 1 = 20062003 Named table: key1 (20062002), entry1 (1000, 1 ...) key2 (20062003), entry2 (1000, 1 ...) - Socket 2 is created with its associated port number 20062002. Type 1000, instance 1 is bound to socket 2: key3: 20062002 + 0 + 1 = 20062003 TIPC looks up the named table and finds out that key2 with the same value already exists and rejects the insertion into the named table. This leads to failure of bind() call from application on Socket 2 with error message EINVAL "Invalid argument". This commit fixes this issue by adding more port id checking to make sure that the key is unique to publications originated from the same port id and node. Fixes: 218527fe27ad ("tipc: replace name table service range array with rb tree") Signed-off-by: Tung Nguyen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260220050541.237962-1-tung.quang.nguyen@est.tech Signed-off-by: Jakub Kicinski --- net/tipc/name_table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index e74940eab3a4..7f42fb6a8481 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -348,7 +348,8 @@ static bool tipc_service_insert_publ(struct net *net, /* Return if the publication already exists */ list_for_each_entry(_p, &sr->all_publ, all_publ) { - if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) { + if (_p->key == key && _p->sk.ref == p->sk.ref && + (!_p->sk.node || _p->sk.node == node)) { pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n", p->sr.type, p->sr.lower, p->sr.upper, node, p->sk.ref, key); @@ -388,7 +389,8 @@ static struct publication *tipc_service_remove_publ(struct service_range *r, u32 node = sk->node; list_for_each_entry(p, &r->all_publ, all_publ) { - if (p->key != key || (node && node != p->sk.node)) + if (p->key != key || p->sk.ref != sk->ref || + (node && node != p->sk.node)) continue; list_del(&p->all_publ); list_del(&p->local_publ); From 63c49efc987afefc6b9bb7de083eb8748e0b1789 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:17 -0800 Subject: [PATCH 230/828] selftests/bpf: Add simple strscpy() implementation Replace bpf_strlcpy() in bpf_util.h with a sized_strscpy(), which is a simplified sized_strscpy() from the kernel (lib/string.c [1]). It: * takes a count (destination size) parameter * guarantees NULL-termination * returns the number of characters copied or -E2BIG Re-define strscpy macro similar to in-kernel implementation [2]: allow the count parameter to be optional. Add #ifdef-s to tools/include/linux/args.h, as they may be defined in other system headers (for example, __CONCAT in sys/cdefs.h). Fixup the single existing bpf_strlcpy() call in cgroup_helpers.c [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/string.c?h=v6.19#n113 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/string.h?h=v6.19#n91 Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-2-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/include/linux/args.h | 4 ++ tools/testing/selftests/bpf/bpf_util.h | 45 ++++++++++++++------ tools/testing/selftests/bpf/cgroup_helpers.c | 2 +- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h index 2e8e65d975c7..14b268f2389a 100644 --- a/tools/include/linux/args.h +++ b/tools/include/linux/args.h @@ -22,7 +22,11 @@ #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) /* Concatenate two parameters, but allow them to be expanded beforehand. */ +#ifndef __CONCAT #define __CONCAT(a, b) a ## b +#endif +#ifndef CONCATENATE #define CONCATENATE(a, b) __CONCAT(a, b) +#endif #endif /* _LINUX_ARGS_H */ diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 4bc2d25f33e1..6cb56501a505 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -8,6 +8,7 @@ #include #include #include /* libbpf_num_possible_cpus */ +#include static inline unsigned int bpf_num_possible_cpus(void) { @@ -21,25 +22,43 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; } -/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst - * is zero-terminated string no matter what (unless sz == 0, in which case - * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs - * in what is returned. Given this is internal helper, it's trivial to extend - * this, when necessary. Use this instead of strncpy inside libbpf source code. +/* + * Simplified strscpy() implementation. The kernel one is in lib/string.c */ -static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) +static inline ssize_t sized_strscpy(char *dest, const char *src, size_t count) { - size_t i; + long res = 0; - if (sz == 0) - return; + if (count == 0) + return -E2BIG; - sz--; - for (i = 0; i < sz && src[i]; i++) - dst[i] = src[i]; - dst[i] = '\0'; + while (count > 1) { + char c; + + c = src[res]; + dest[res] = c; + if (!c) + return res; + res++; + count--; + } + + /* Force NUL-termination. */ + dest[res] = '\0'; + + /* Return E2BIG if the source didn't stop */ + return src[res] ? -E2BIG : res; } +#define __strscpy0(dst, src, ...) \ + sized_strscpy(dst, src, sizeof(dst)) +#define __strscpy1(dst, src, size) \ + sized_strscpy(dst, src, size) + +#undef strscpy /* Redefine the placeholder from tools/include/linux/string.h */ +#define strscpy(dst, src, ...) \ + CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) + #define __bpf_percpu_val_align __attribute__((__aligned__(8))) #define BPF_DECLARE_PERCPU(type, name) \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 20cede4db3ce..45cd0b479fe3 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -86,7 +86,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers enable[len] = 0; close(fd); } else { - bpf_strlcpy(enable, controllers, sizeof(enable)); + strscpy(enable, controllers); } snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); From 6a087ac23f5b6619033ad08e60c355b00bb8ce51 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:18 -0800 Subject: [PATCH 231/828] selftests/bpf: Replace strcpy() calls with strscpy() strcpy() does not perform bounds checking and is considered deprecated [1]. Replace strcpy() calls with strscpy() defined in bpf_util.h. [1] https://docs.kernel.org/process/deprecated.html#strcpy Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-3-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.c | 2 +- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 2 +- tools/testing/selftests/bpf/prog_tests/setget_sockopt.c | 2 +- tools/testing/selftests/bpf/prog_tests/sockopt_sk.c | 2 +- tools/testing/selftests/bpf/prog_tests/test_veristat.c | 4 ++-- tools/testing/selftests/bpf/xdp_features.c | 3 ++- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 0a6a5561bed3..5374b7e16d53 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -432,7 +432,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, memset(addr, 0, sizeof(*sun)); sun->sun_family = family; sun->sun_path[0] = 0; - strcpy(sun->sun_path + 1, addr_str); + strscpy(sun->sun_path + 1, addr_str, sizeof(sun->sun_path) - 1); if (len) *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index b7d1b52309d0..f829b6f09bc9 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -281,7 +281,7 @@ static void test_dctcp_fallback(void) dctcp_skel = bpf_dctcp__open(); if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) return; - strcpy(dctcp_skel->rodata->fallback_cc, "cubic"); + strscpy(dctcp_skel->rodata->fallback_cc, "cubic"); if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index e4dac529d424..77fe1bfb7504 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -212,7 +212,7 @@ void test_setget_sockopt(void) if (!ASSERT_OK_PTR(skel, "open skel")) goto done; - strcpy(skel->rodata->veth, "binddevtest1"); + strscpy(skel->rodata->veth, "binddevtest1"); skel->rodata->veth_ifindex = if_nametoindex("binddevtest1"); if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index ba6b3ec1156a..53637431ec5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -142,7 +142,7 @@ static int getsetsockopt(void) /* TCP_CONGESTION can extend the string */ - strcpy(buf.cc, "nv"); + strscpy(buf.cc, "nv"); err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv")); if (err) { log_err("Failed to call setsockopt(TCP_CONGESTION)"); diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index b38c16b4247f..9aff08ac55c0 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -24,9 +24,9 @@ static struct fixture *init_fixture(void) /* for no_alu32 and cpuv4 veristat is in parent folder */ if (access("./veristat", F_OK) == 0) - strcpy(fix->veristat, "./veristat"); + strscpy(fix->veristat, "./veristat"); else if (access("../veristat", F_OK) == 0) - strcpy(fix->veristat, "../veristat"); + strscpy(fix->veristat, "../veristat"); else PRINT_FAIL("Can't find veristat binary"); diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index 595c79141cf3..a27ed663967c 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -16,6 +16,7 @@ #include +#include "bpf_util.h" #include "xdp_features.skel.h" #include "xdp_features.h" @@ -212,7 +213,7 @@ static void set_env_default(void) env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT; env.feature.action = -EINVAL; env.ifindex = -ENODEV; - strcpy(env.ifname, "unknown"); + strscpy(env.ifname, "unknown"); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT, &env.dut_ctrl_addr, NULL); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT, From 5f3518d77419255f8b12bb23c8ec22acbeb6bc5b Mon Sep 17 00:00:00 2001 From: Julius Lehmann Date: Sat, 14 Feb 2026 20:34:21 +0100 Subject: [PATCH 232/828] HID: magicmouse: fix battery reporting for Apple Magic Trackpad 2 Battery reporting does not work for the Apple Magic Trackpad 2 if it is connected via USB. The current hid descriptor fixup code checks for a hid descriptor length of exactly 83 bytes. If the hid descriptor is larger, which is the case for newer apple mice, the fixup is not applied. This fix checks for hid descriptor sizes greater/equal 83 bytes which applies the fixup for newer devices as well. Signed-off-by: Julius Lehmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 17908d52c027..9eadf3252d0d 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -990,7 +990,7 @@ static const __u8 *magicmouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, */ if ((is_usb_magicmouse2(hdev->vendor, hdev->product) || is_usb_magictrackpad2(hdev->vendor, hdev->product)) && - *rsize == 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) { + *rsize >= 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) { hid_info(hdev, "fixing up magicmouse battery report descriptor\n"); *rsize = *rsize - 1; From a2e70a89fa58133521b2deae4427d35776bda935 Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Tue, 17 Feb 2026 13:51:51 +0200 Subject: [PATCH 233/828] HID: multitouch: new class MT_CLS_EGALAX_P80H84 Fixes: f9e82295eec1 ("HID: multitouch: add eGalaxTouch P80H84 support") Signed-off-by: Ian Ray Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7daa8f6d8187..8052b35bfd7d 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -233,6 +233,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); #define MT_CLS_SMART_TECH 0x0113 #define MT_CLS_APPLE_TOUCHBAR 0x0114 #define MT_CLS_YOGABOOK9I 0x0115 +#define MT_CLS_EGALAX_P80H84 0x0116 #define MT_CLS_SIS 0x0457 #define MT_DEFAULT_MAXCONTACT 10 @@ -438,6 +439,11 @@ static const struct mt_class mt_classes[] = { MT_QUIRK_YOGABOOK9I, .export_all_inputs = true }, + { .name = MT_CLS_EGALAX_P80H84, + .quirks = MT_QUIRK_ALWAYS_VALID | + MT_QUIRK_IGNORE_DUPLICATES | + MT_QUIRK_CONTACT_CNT_ACCURATE, + }, { } }; @@ -2215,8 +2221,9 @@ static const struct hid_device_id mt_devices[] = { { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C000) }, - { .driver_data = MT_CLS_EGALAX, - MT_USB_DEVICE(USB_VENDOR_ID_DWAV, + { .driver_data = MT_CLS_EGALAX_P80H84, + HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C002) }, /* Elan devices */ From b4f4fd947a2a6df7cf1f530dd1028d9f3849eff5 Mon Sep 17 00:00:00 2001 From: Alex Tran Date: Sat, 31 Jan 2026 15:57:43 -0800 Subject: [PATCH 234/828] selftests: hid: tests: test_wacom_generic: add tests for display devices and opaque devices Verify Wacom devices set INPUT_PROP_DIRECT on display devices and INPUT_PROP_POINTER on opaque devices. Verify INPUT_PROP_POINTER is not set on display devices and INPUT_PROP_DIRECT is not set on opaque devices. Moved test_prop_pointer into TestOpaqueTablet. Created a DirectTabletTest mixin class for test_prop_direct that can be inherited by display tablet test classes.Used DirectTabletTest for TestDTH2452Tablet case. Signed-off-by: Alex Tran Tested-by: Erin Skomra Reviewed-by: Erin Skomra Signed-off-by: Jiri Kosina --- .../selftests/hid/tests/test_wacom_generic.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 2d6d04f0ff80..3903f479b15b 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -598,18 +598,6 @@ class BaseTest: if unit_set: assert required[usage].contains(field) - def test_prop_direct(self): - """ - Todo: Verify that INPUT_PROP_DIRECT is set on display devices. - """ - pass - - def test_prop_pointer(self): - """ - Todo: Verify that INPUT_PROP_POINTER is set on opaque devices. - """ - pass - class PenTabletTest(BaseTest.TestTablet): def assertName(self, uhdev): @@ -677,6 +665,15 @@ class TestOpaqueTablet(PenTabletTest): uhdev.event(130, 240, pressure=0), [], auto_syn=False, strict=True ) + def test_prop_pointer(self): + """ + Verify that INPUT_PROP_POINTER is set and INPUT_PROP_DIRECT + is not set on opaque devices. + """ + evdev = self.uhdev.get_evdev() + assert libevdev.INPUT_PROP_POINTER in evdev.properties + assert libevdev.INPUT_PROP_DIRECT not in evdev.properties + class TestOpaqueCTLTablet(TestOpaqueTablet): def create_device(self): @@ -862,7 +859,18 @@ class TestPTHX60_Pen(TestOpaqueCTLTablet): ) -class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest): +class DirectTabletTest(): + def test_prop_direct(self): + """ + Verify that INPUT_PROP_DIRECT is set and INPUT_PROP_POINTER + is not set on display devices. + """ + evdev = self.uhdev.get_evdev() + assert libevdev.INPUT_PROP_DIRECT in evdev.properties + assert libevdev.INPUT_PROP_POINTER not in evdev.properties + + +class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest, DirectTabletTest): ContactIds = namedtuple("ContactIds", "contact_id, tracking_id, slot_num") def create_device(self): From 9af0feae8016ba58ad7ff784a903404986b395b1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 27 Jan 2026 10:38:39 +0100 Subject: [PATCH 235/828] RDMA/core: Fix stale RoCE GIDs during netdev events at registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RoCE GID entries become stale when netdev properties change during the IB device registration window. This is reproducible with a udev rule that sets a MAC address when a VF netdev appears: ACTION=="add", SUBSYSTEM=="net", KERNEL=="eth4", \ RUN+="/sbin/ip link set eth4 address 88:22:33:44:55:66" After VF creation, show_gids displays GIDs derived from the original random MAC rather than the configured one. The root cause is a race between netdev event processing and device registration: CPU 0 (driver) CPU 1 (udev/workqueue) ────────────── ────────────────────── ib_register_device() ib_cache_setup_one() gid_table_setup_one() _gid_table_setup_one() ← GID table allocated rdma_roce_rescan_device() ← GIDs populated with OLD MAC ip link set eth4 addr NEW_MAC NETDEV_CHANGEADDR queued netdevice_event_work_handler() ib_enum_all_roce_netdevs() ← Iterates DEVICE_REGISTERED ← Device NOT marked yet, SKIP! enable_device_and_get() xa_set_mark(DEVICE_REGISTERED) ← Too late, event was lost The netdev event handler uses ib_enum_all_roce_netdevs() which only iterates devices marked DEVICE_REGISTERED. However, this mark is set late in the registration process, after the GID cache is already populated. Events arriving in this window are silently dropped. Fix this by introducing a new xarray mark DEVICE_GID_UPDATES that is set immediately after the GID table is allocated and initialized. Use the new mark in ib_enum_all_roce_netdevs() function to iterate devices instead of DEVICE_REGISTERED. This is safe because: - After _gid_table_setup_one(), all required structures exist (port_data, immutable, cache.gid) - The GID table mutex serializes concurrent access between the initial rescan and event handlers - Event handlers correctly update stale GIDs even when racing with rescan - The mark is cleared in ib_cache_cleanup_one() before teardown This also fixes similar races for IP address events (inetaddr_event, inet6addr_event) which use the same enumeration path. Fixes: 0df91bb67334 ("RDMA/devices: Use xarray to store the client_data") Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260127093839.126291-1-jiri@resnulli.us Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cache.c | 13 +++++++++++ drivers/infiniband/core/core_priv.h | 3 +++ drivers/infiniband/core/device.c | 34 ++++++++++++++++++++++++++++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index b415d4aad04f..ee4a2bc68fb2 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -926,6 +926,13 @@ static int gid_table_setup_one(struct ib_device *ib_dev) if (err) return err; + /* + * Mark the device as ready for GID cache updates. This allows netdev + * event handlers to update the GID cache even before the device is + * fully registered. + */ + ib_device_enable_gid_updates(ib_dev); + rdma_roce_rescan_device(ib_dev); return err; @@ -1637,6 +1644,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { + /* + * Clear the GID updates mark first to prevent event handlers from + * accessing the device while it's being torn down. + */ + ib_device_disable_gid_updates(device); + /* The cleanup function waits for all in-progress workqueue * elements and cleans up the GID cache. This function should be * called after the device was removed from the devices list and diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 05102769a918..a2c36666e6fc 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -100,6 +100,9 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, roce_netdev_callback cb, void *cookie); +void ib_device_enable_gid_updates(struct ib_device *device); +void ib_device_disable_gid_updates(struct ib_device *device); + typedef int (*nldev_callback)(struct ib_device *device, struct sk_buff *skb, struct netlink_callback *cb, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 1b5f1ee0a557..558b73940d66 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -93,6 +93,7 @@ static struct workqueue_struct *ib_unreg_wq; static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 +#define DEVICE_GID_UPDATES XA_MARK_2 static u32 highest_client_id; #define CLIENT_REGISTERED XA_MARK_1 @@ -2412,11 +2413,42 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, unsigned long index; down_read(&devices_rwsem); - xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) + xa_for_each_marked(&devices, index, dev, DEVICE_GID_UPDATES) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); up_read(&devices_rwsem); } +/** + * ib_device_enable_gid_updates - Mark device as ready for GID cache updates + * @device: Device to mark + * + * Called after GID table is allocated and initialized. After this mark is set, + * netdevice event handlers can update the device's GID cache. This allows + * events that arrive during device registration to be processed, avoiding + * stale GID entries when netdev properties change during the device + * registration process. + */ +void ib_device_enable_gid_updates(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_set_mark(&devices, device->index, DEVICE_GID_UPDATES); + up_write(&devices_rwsem); +} + +/** + * ib_device_disable_gid_updates - Clear the GID updates mark + * @device: Device to unmark + * + * Called before GID table cleanup to prevent event handlers from accessing + * the device while it's being torn down. + */ +void ib_device_disable_gid_updates(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_clear_mark(&devices, device->index, DEVICE_GID_UPDATES); + up_write(&devices_rwsem); +} + /* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device From 7a23af417d9dd57b4382356b2e7442e5d2bf5bea Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 18 Feb 2026 09:12:45 +0000 Subject: [PATCH 236/828] RDMA/bng_re: Remove unnessary validity checks Fix below smatch warning: drivers/infiniband/hw/bng_re/bng_dev.c:113 bng_re_net_ring_free() warn: variable dereferenced before check 'rdev' (see line 107) current driver has unnessary validity checks. So, removing these unnessary validity checks. Fixes: 4f830cd8d7fe ("RDMA/bng_re: Add infrastructure for enabling Firmware channel") Fixes: 745065770c2d ("RDMA/bng_re: Register and get the resources from bnge driver") Fixes: 04e031ff6e60 ("RDMA/bng_re: Initialize the Firmware and Hardware") Fixes: d0da769c19d0 ("RDMA/bng_re: Add Auxiliary interface") Reported-by: Simon Horman Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202601010413.sWadrQel-lkp@intel.com/ Signed-off-by: Siva Reddy Kallam Link: https://patch.msgid.link/20260218091246.1764808-2-siva.kallam@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bng_re/bng_dev.c | 27 ++++---------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index cf3bf08e5f26..b8df807c3a64 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c @@ -54,9 +54,6 @@ static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev) { struct bng_re_chip_ctx *chip_ctx; - if (!rdev->chip_ctx) - return; - kfree(rdev->dev_attr); rdev->dev_attr = NULL; @@ -124,12 +121,6 @@ static int bng_re_net_ring_free(struct bng_re_dev *rdev, struct bnge_fw_msg fw_msg = {}; int rc = -EINVAL; - if (!rdev) - return rc; - - if (!aux_dev) - return rc; - bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE); req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); @@ -150,10 +141,7 @@ static int bng_re_net_ring_alloc(struct bng_re_dev *rdev, struct hwrm_ring_alloc_input req = {}; struct hwrm_ring_alloc_output resp; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; - - if (!aux_dev) - return rc; + int rc; bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); req.enables = 0; @@ -184,10 +172,7 @@ static int bng_re_stats_ctx_free(struct bng_re_dev *rdev) struct hwrm_stat_ctx_free_input req = {}; struct hwrm_stat_ctx_free_output resp = {}; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; - - if (!aux_dev) - return rc; + int rc; bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id); @@ -208,13 +193,10 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) struct hwrm_stat_ctx_alloc_output resp = {}; struct hwrm_stat_ctx_alloc_input req = {}; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; + int rc; stats->fw_id = BNGE_INVALID_STATS_CTX_ID; - if (!aux_dev) - return rc; - bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(stats->dma_map); @@ -486,8 +468,7 @@ static void bng_re_remove(struct auxiliary_device *adev) rdev = dev_info->rdev; - if (rdev) - bng_re_remove_device(rdev, adev); + bng_re_remove_device(rdev, adev); kfree(dev_info); } From 3d2e5d12a2eef0ca8a629a422aa593673235c77c Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 18 Feb 2026 09:12:46 +0000 Subject: [PATCH 237/828] RDMA/bng_re: Unwind bng_re_dev_init properly Fix below smatch warning: drivers/infiniband/hw/bng_re/bng_dev.c:270 bng_re_dev_init() warn: missing unwind goto? Current bng_re_dev_init function is not having clear unwinding code. So, added proper unwinding with ladder. Fixes: 4f830cd8d7fe ("RDMA/bng_re: Add infrastructure for enabling Firmware channel") Reported-by: Simon Horman Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202601010413.sWadrQel-lkp@intel.com/ Signed-off-by: Siva Reddy Kallam Link: https://patch.msgid.link/20260218091246.1764808-3-siva.kallam@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bng_re/bng_dev.c | 29 +++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index b8df807c3a64..d34b5f88cd40 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c @@ -285,7 +285,7 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to register with netedev: %#x\n", rc); - return -EINVAL; + goto reg_netdev_fail; } set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); @@ -294,19 +294,16 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) ibdev_err(&rdev->ibdev, "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", rdev->aux_dev->auxr_info->msix_requested); - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - return -EINVAL; + rc = -EINVAL; + goto msix_ctx_fail; } ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", rdev->aux_dev->auxr_info->msix_requested); rc = bng_re_setup_chip_ctx(rdev); if (rc) { - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); - return -EINVAL; + goto msix_ctx_fail; } bng_re_query_hwrm_version(rdev); @@ -315,16 +312,14 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); - goto fail; + goto alloc_fw_chl_fail; } /* Allocate nq record memory */ rdev->nqr = kzalloc_obj(*rdev->nqr); if (!rdev->nqr) { - bng_re_destroy_chip_ctx(rdev); - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - return -ENOMEM; + rc = -ENOMEM; + goto nq_alloc_fail; } rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested; @@ -393,9 +388,15 @@ disable_rcfw: free_ring: bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); free_rcfw: + kfree(rdev->nqr); +nq_alloc_fail: bng_re_free_rcfw_channel(&rdev->rcfw); -fail: - bng_re_dev_uninit(rdev); +alloc_fw_chl_fail: + bng_re_destroy_chip_ctx(rdev); +msix_ctx_fail: + bnge_unregister_dev(rdev->aux_dev); + clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); +reg_netdev_fail: return rc; } From 017c1792525064a723971f0216e6ef86a8c7af11 Mon Sep 17 00:00:00 2001 From: Vahagn Vardanian Date: Mon, 23 Feb 2026 00:00:00 +0000 Subject: [PATCH 238/828] wifi: mac80211: fix NULL pointer dereference in mesh_rx_csa_frame() In mesh_rx_csa_frame(), elems->mesh_chansw_params_ie is dereferenced at lines 1638 and 1642 without a prior NULL check: ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; ... pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value); The mesh_matches_local() check above only validates the Mesh ID, Mesh Configuration, and Supported Rates IEs. It does not verify the presence of the Mesh Channel Switch Parameters IE (element ID 118). When a received CSA action frame omits that IE, ieee802_11_parse_elems() leaves elems->mesh_chansw_params_ie as NULL, and the unconditional dereference causes a kernel NULL pointer dereference. A remote mesh peer with an established peer link (PLINK_ESTAB) can trigger this by sending a crafted SPECTRUM_MGMT/CHL_SWITCH action frame that includes a matching Mesh ID and Mesh Configuration IE but omits the Mesh Channel Switch Parameters IE. No authentication beyond the default open mesh peering is required. Crash confirmed on kernel 6.17.0-5-generic via mac80211_hwsim: BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: Oops: 0000 [#1] SMP NOPTI RIP: 0010:ieee80211_mesh_rx_queued_mgmt+0x143/0x2a0 [mac80211] CR2: 0000000000000000 Fix by adding a NULL check for mesh_chansw_params_ie after mesh_matches_local() returns, consistent with how other optional IEs are guarded throughout the mesh code. The bug has been present since v3.13 (released 2014-01-19). Fixes: 8f2535b92d68 ("mac80211: process the CSA frame for mesh accordingly") Cc: stable@vger.kernel.org Signed-off-by: Vahagn Vardanian Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 68901f1def0d..129e814abe76 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1636,6 +1636,9 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, if (!mesh_matches_local(sdata, elems)) goto free; + if (!elems->mesh_chansw_params_ie) + goto free; + ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false; From 021fd0f87004e949cba6f27199b9925bc24ec3a0 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 19 Feb 2026 08:57:38 +0100 Subject: [PATCH 239/828] net/rds: fix recursive lock in rds_tcp_conn_slots_available syzbot reported a recursive lock warning in rds_tcp_get_peer_sport() as it calls inet6_getname() which acquires the socket lock that was already held by __release_sock(). kworker/u8:6/2985 is trying to acquire lock: ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1709 [inline] ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: inet6_getname+0x15d/0x650 net/ipv6/af_inet6.c:533 but task is already holding lock: ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1709 [inline] ffff88807a07aa20 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_sock_set_cork+0x2c/0x2e0 net/ipv4/tcp.c:3694 lock_sock_nested+0x48/0x100 net/core/sock.c:3780 lock_sock include/net/sock.h:1709 [inline] inet6_getname+0x15d/0x650 net/ipv6/af_inet6.c:533 rds_tcp_get_peer_sport net/rds/tcp_listen.c:70 [inline] rds_tcp_conn_slots_available+0x288/0x470 net/rds/tcp_listen.c:149 rds_recv_hs_exthdrs+0x60f/0x7c0 net/rds/recv.c:265 rds_recv_incoming+0x9f6/0x12d0 net/rds/recv.c:389 rds_tcp_data_recv+0x7f1/0xa40 net/rds/tcp_recv.c:243 __tcp_read_sock+0x196/0x970 net/ipv4/tcp.c:1702 rds_tcp_read_sock net/rds/tcp_recv.c:277 [inline] rds_tcp_data_ready+0x369/0x950 net/rds/tcp_recv.c:331 tcp_rcv_established+0x19e9/0x2670 net/ipv4/tcp_input.c:6675 tcp_v6_do_rcv+0x8eb/0x1ba0 net/ipv6/tcp_ipv6.c:1609 sk_backlog_rcv include/net/sock.h:1185 [inline] __release_sock+0x1b8/0x3a0 net/core/sock.c:3213 Reading from the socket struct directly is safe from possible paths. For rds_tcp_accept_one(), the socket has just been accepted and is not yet exposed to concurrent access. For rds_tcp_conn_slots_available(), direct access avoids the recursive deadlock seen during backlog processing where the socket lock is already held from the __release_sock(). However, rds_tcp_conn_slots_available() is also called from the normal softirq path via tcp_data_ready() where the lock is not held. This is also safe because inet_dport is a stable 16 bits field. A READ_ONCE() annotation as the value might be accessed lockless in a concurrent access context. Note that it is also safe to call rds_tcp_conn_slots_available() from rds_conn_shutdown() because the fan-out is disabled. Fixes: 9d27a0fb122f ("net/rds: Trigger rds_send_ping() more than once") Reported-by: syzbot+5efae91f60932839f0a5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5efae91f60932839f0a5 Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Allison Henderson Link: https://patch.msgid.link/20260219075738.4403-1-fmancera@suse.de Signed-off-by: Paolo Abeni --- net/rds/connection.c | 3 +++ net/rds/tcp_listen.c | 26 ++++---------------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 185f73b01694..a542f94c0214 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -455,6 +455,9 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_unlock(); } + /* we do not hold the socket lock here but it is safe because + * fan-out is disabled when calling conn_slots_available() + */ if (conn->c_trans->conn_slots_available) conn->c_trans->conn_slots_available(conn, false); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index b4ab68a1da6d..08a506aa7ce7 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -59,30 +59,12 @@ void rds_tcp_keepalive(struct socket *sock) static int rds_tcp_get_peer_sport(struct socket *sock) { - union { - struct sockaddr_storage storage; - struct sockaddr addr; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - } saddr; - int sport; + struct sock *sk = sock->sk; - if (kernel_getpeername(sock, &saddr.addr) >= 0) { - switch (saddr.addr.sa_family) { - case AF_INET: - sport = ntohs(saddr.sin.sin_port); - break; - case AF_INET6: - sport = ntohs(saddr.sin6.sin6_port); - break; - default: - sport = -1; - } - } else { - sport = -1; - } + if (!sk) + return -1; - return sport; + return ntohs(READ_ONCE(inet_sk(sk)->inet_dport)); } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the From fdcfce93073d990ed4b71752e31ad1c1d6e9d58b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 23 Feb 2026 20:59:33 +0100 Subject: [PATCH 240/828] eventpoll: Fix integer overflow in ep_loop_check_proc() If a recursive call to ep_loop_check_proc() hits the `result = INT_MAX`, an integer overflow will occur in the calling ep_loop_check_proc() at `result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1)`, breaking the recursion depth check. Fix it by using a different placeholder value that can't lead to an overflow. Reported-by: Guenter Roeck Fixes: f2e467a48287 ("eventpoll: Fix semi-unbounded recursion") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Link: https://patch.msgid.link/20260223-epoll-int-overflow-v1-1-452f35132224@google.com Signed-off-by: Christian Brauner --- fs/eventpoll.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 6c36d9dc6926..d20917b03161 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2061,7 +2061,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * - * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. + * Return: depth of the subtree, or a value bigger than EP_MAX_NESTS if we found + * a loop or went too deep. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { @@ -2080,7 +2081,7 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) - result = INT_MAX; + result = EP_MAX_NESTS+1; else result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); if (result > EP_MAX_NESTS) From bae8a5d2e759da2e0cba33ab2080deee96a09373 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 19 Feb 2026 20:46:37 +0800 Subject: [PATCH 241/828] net: wan: farsync: Fix use-after-free bugs caused by unfinished tasklets When the FarSync T-series card is being detached, the fst_card_info is deallocated in fst_remove_one(). However, the fst_tx_task or fst_int_task may still be running or pending, leading to use-after-free bugs when the already freed fst_card_info is accessed in fst_process_tx_work_q() or fst_process_int_work_q(). A typical race condition is depicted below: CPU 0 (cleanup) | CPU 1 (tasklet) | fst_start_xmit() fst_remove_one() | tasklet_schedule() unregister_hdlc_device()| | fst_process_tx_work_q() //handler kfree(card) //free | do_bottom_half_tx() | card-> //use The following KASAN trace was captured: ================================================================== BUG: KASAN: slab-use-after-free in do_bottom_half_tx+0xb88/0xd00 Read of size 4 at addr ffff88800aad101c by task ksoftirqd/3/32 ... Call Trace: dump_stack_lvl+0x55/0x70 print_report+0xcb/0x5d0 ? do_bottom_half_tx+0xb88/0xd00 kasan_report+0xb8/0xf0 ? do_bottom_half_tx+0xb88/0xd00 do_bottom_half_tx+0xb88/0xd00 ? _raw_spin_lock_irqsave+0x85/0xe0 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? __pfx___hrtimer_run_queues+0x10/0x10 fst_process_tx_work_q+0x67/0x90 tasklet_action_common+0x1fa/0x720 ? hrtimer_interrupt+0x31f/0x780 handle_softirqs+0x176/0x530 __irq_exit_rcu+0xab/0xe0 sysvec_apic_timer_interrupt+0x70/0x80 ... Allocated by task 41 on cpu 3 at 72.330843s: kasan_save_stack+0x24/0x50 kasan_save_track+0x17/0x60 __kasan_kmalloc+0x7f/0x90 fst_add_one+0x1a5/0x1cd0 local_pci_probe+0xdd/0x190 pci_device_probe+0x341/0x480 really_probe+0x1c6/0x6a0 __driver_probe_device+0x248/0x310 driver_probe_device+0x48/0x210 __device_attach_driver+0x160/0x320 bus_for_each_drv+0x101/0x190 __device_attach+0x198/0x3a0 device_initial_probe+0x78/0xa0 pci_bus_add_device+0x81/0xc0 pci_bus_add_devices+0x7e/0x190 enable_slot+0x9b9/0x1130 acpiphp_check_bridge.part.0+0x2e1/0x460 acpiphp_hotplug_notify+0x36c/0x3c0 acpi_device_hotplug+0x203/0xb10 acpi_hotplug_work_fn+0x59/0x80 ... Freed by task 41 on cpu 1 at 75.138639s: kasan_save_stack+0x24/0x50 kasan_save_track+0x17/0x60 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x43/0x70 kfree+0x135/0x410 fst_remove_one+0x2ca/0x540 pci_device_remove+0xa6/0x1d0 device_release_driver_internal+0x364/0x530 pci_stop_bus_device+0x105/0x150 pci_stop_and_remove_bus_device+0xd/0x20 disable_slot+0x116/0x260 acpiphp_disable_and_eject_slot+0x4b/0x190 acpiphp_hotplug_notify+0x230/0x3c0 acpi_device_hotplug+0x203/0xb10 acpi_hotplug_work_fn+0x59/0x80 ... The buggy address belongs to the object at ffff88800aad1000 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 28 bytes inside of freed 1024-byte region The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xaad0 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x100000000000040(head|node=0|zone=1) page_type: f5(slab) raw: 0100000000000040 ffff888007042dc0 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000 head: 0100000000000040 ffff888007042dc0 dead000000000122 0000000000000000 head: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000 head: 0100000000000003 ffffea00002ab401 00000000ffffffff 00000000ffffffff head: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88800aad0f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88800aad0f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88800aad1000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88800aad1080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88800aad1100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fix this by ensuring that both fst_tx_task and fst_int_task are properly canceled before the fst_card_info is released. Add tasklet_kill() in fst_remove_one() to synchronize with any pending or running tasklets. Since unregister_hdlc_device() stops data transmission and reception, and fst_disable_intr() prevents further interrupts, it is appropriate to place tasklet_kill() after these calls. The bugs were identified through static analysis. To reproduce the issue and validate the fix, a FarSync T-series card was simulated in QEMU and delays(e.g., mdelay()) were introduced within the tasklet handler to increase the likelihood of triggering the race condition. Fixes: 2f623aaf9f31 ("net: farsync: Fix kmemleak when rmmods farsync") Signed-off-by: Duoming Zhou Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20260219124637.72578-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- drivers/net/wan/farsync.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 5b01642ca44e..6b2d1e63855e 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2550,6 +2550,8 @@ fst_remove_one(struct pci_dev *pdev) fst_disable_intr(card); free_irq(card->irq, card); + tasklet_kill(&fst_tx_task); + tasklet_kill(&fst_int_task); iounmap(card->ctlmem); iounmap(card->mem); From 82aec772fca2223bc5774bd9af486fd95766e578 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Feb 2026 11:50:21 -0800 Subject: [PATCH 242/828] netconsole: avoid OOB reads, msg is not nul-terminated msg passed to netconsole from the console subsystem is not guaranteed to be nul-terminated. Before recent commit 7eab73b18630 ("netconsole: convert to NBCON console infrastructure") the message would be placed in printk_shared_pbufs, a static global buffer, so KASAN had harder time catching OOB accesses. Now we see: printk: console [netcon_ext0] enabled BUG: KASAN: slab-out-of-bounds in string+0x1f7/0x240 Read of size 1 at addr ffff88813b6d4c00 by task pr/netcon_ext0/594 CPU: 65 UID: 0 PID: 594 Comm: pr/netcon_ext0 Not tainted 6.19.0-11754-g4246fd6547c9 Call Trace: kasan_report+0xe4/0x120 string+0x1f7/0x240 vsnprintf+0x655/0xba0 scnprintf+0xba/0x120 netconsole_write+0x3fe/0xa10 nbcon_emit_next_record+0x46e/0x860 nbcon_kthread_func+0x623/0x750 Allocated by task 1: nbcon_alloc+0x1ea/0x450 register_console+0x26b/0xe10 init_netconsole+0xbb0/0xda0 The buggy address belongs to the object at ffff88813b6d4000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 0 bytes to the right of allocated 3072-byte region [ffff88813b6d4000, ffff88813b6d4c00) Fixes: c62c0a17f9b7 ("netconsole: Append kernel version to message") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260219195021.2099699-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/netconsole.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index d144787b2947..1b6a4135ec08 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1679,7 +1679,8 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt, if (release_len) { release = init_utsname()->release; - scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); + scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%.*s", release, + msg_len, msg); msg_len += release_len; } else { memcpy(nt->buf, msg, msg_len); From 7a648d598cb8e8c62af3f0e020a25820a3f3a9a7 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Sat, 14 Feb 2026 23:14:51 +0800 Subject: [PATCH 243/828] pinctrl: pinconf-generic: Fix memory leak in pinconf_generic_parse_dt_config() In pinconf_generic_parse_dt_config(), if parse_dt_cfg() fails, it returns directly. This bypasses the cleanup logic and results in a memory leak of the cfg buffer. Fix this by jumping to the out label on failure, ensuring kfree(cfg) is called before returning. Fixes: 90a18c512884 ("pinctrl: pinconf-generic: Handle string values for generic properties") Signed-off-by: Felix Gu Reviewed-by: Antonio Borneo Signed-off-by: Linus Walleij --- drivers/pinctrl/pinconf-generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 94b1d057197c..2b030bd0e6ad 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -351,13 +351,13 @@ int pinconf_generic_parse_dt_config(struct device_node *np, ret = parse_dt_cfg(np, dt_params, ARRAY_SIZE(dt_params), cfg, &ncfg); if (ret) - return ret; + goto out; if (pctldev && pctldev->desc->num_custom_params && pctldev->desc->custom_params) { ret = parse_dt_cfg(np, pctldev->desc->custom_params, pctldev->desc->num_custom_params, cfg, &ncfg); if (ret) - return ret; + goto out; } /* no configs found at all */ From a48150d05190b41b5eec19f74b751a75a15a456a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Feb 2026 09:58:39 +0100 Subject: [PATCH 244/828] pinctrl: amdisp: Make amdisp_pinctrl_ops variable static File-scope 'amdisp_pinctrl_ops' is not used outside of this unit, so make it static to silence sparse warning: pinctrl-amdisp.c:83:26: warning: symbol 'amdisp_pinctrl_ops' was not declared. Should it be static? Signed-off-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amdisp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amdisp.c b/drivers/pinctrl/pinctrl-amdisp.c index efbf40c776ea..e0874cc086a7 100644 --- a/drivers/pinctrl/pinctrl-amdisp.c +++ b/drivers/pinctrl/pinctrl-amdisp.c @@ -80,7 +80,7 @@ static int amdisp_get_group_pins(struct pinctrl_dev *pctldev, return 0; } -const struct pinctrl_ops amdisp_pinctrl_ops = { +static const struct pinctrl_ops amdisp_pinctrl_ops = { .get_groups_count = amdisp_get_groups_count, .get_group_name = amdisp_get_group_name, .get_group_pins = amdisp_get_group_pins, From c2e174994c9e98956a85dee9af6fbb293f5ad673 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Feb 2026 09:58:40 +0100 Subject: [PATCH 245/828] pinctrl: cix: sky1: Unexport sky1_pinctrl_pm_ops File-scope 'sky1_pinctrl_pm_ops' is not used outside of this unit (and it should not be!), so unexport it and make it static to silence sparse warning: pinctrl-sky1.c:525:25: warning: symbol 'sky1_pinctrl_pm_ops' was not declared. Should it be static? Signed-off-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij --- drivers/pinctrl/cix/pinctrl-sky1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/cix/pinctrl-sky1.c b/drivers/pinctrl/cix/pinctrl-sky1.c index 5d0d8be815b2..938894058d86 100644 --- a/drivers/pinctrl/cix/pinctrl-sky1.c +++ b/drivers/pinctrl/cix/pinctrl-sky1.c @@ -522,11 +522,10 @@ static int __maybe_unused sky1_pinctrl_resume(struct device *dev) return pinctrl_force_default(spctl->pctl); } -const struct dev_pm_ops sky1_pinctrl_pm_ops = { +static const struct dev_pm_ops sky1_pinctrl_pm_ops = { SET_LATE_SYSTEM_SLEEP_PM_OPS(sky1_pinctrl_suspend, sky1_pinctrl_resume) }; -EXPORT_SYMBOL_GPL(sky1_pinctrl_pm_ops); static int sky1_pinctrl_probe(struct platform_device *pdev) { From e9e268ea9df102abef34d7afba59ef4d5868d5d7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Feb 2026 09:25:50 +0100 Subject: [PATCH 246/828] pinctrl: qcom: sdm660-lpass-lpi: Make groups and functions variables static File-scope 'sdm660_lpi_pinctrl_groups' and 'sdm660_lpi_pinctrl_functions' are not used outside of this unit, so make them static to silence sparse warnings: pinctrl-sdm660-lpass-lpi.c:79:27: warning: symbol 'sdm660_lpi_pinctrl_groups' was not declared. Should it be static? pinctrl-sdm660-lpass-lpi.c:116:27: warning: symbol 'sdm660_lpi_pinctrl_functions' was not declared. Should it be static? Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c index d93af5f0e8d3..65411abfbfac 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c @@ -76,7 +76,7 @@ static const char * const pdm_clk_groups[] = { "gpio18" }; static const char * const pdm_rx_groups[] = { "gpio21", "gpio23", "gpio25" }; static const char * const pdm_sync_groups[] = { "gpio19" }; -const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = { +static const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = { LPI_PINGROUP_OFFSET(0, LPI_NO_SLEW, _, _, _, _, 0x0000), LPI_PINGROUP_OFFSET(1, LPI_NO_SLEW, _, _, _, _, 0x1000), LPI_PINGROUP_OFFSET(2, LPI_NO_SLEW, _, _, _, _, 0x2000), @@ -113,7 +113,7 @@ const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = { LPI_PINGROUP_OFFSET(31, LPI_NO_SLEW, _, _, _, _, 0xb010), }; -const struct lpi_function sdm660_lpi_pinctrl_functions[] = { +static const struct lpi_function sdm660_lpi_pinctrl_functions[] = { LPI_FUNCTION(comp_rx), LPI_FUNCTION(dmic1_clk), LPI_FUNCTION(dmic1_data), From a2539b92e4b791c1ba482930b5e51b1591975461 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 19 Feb 2026 00:51:22 +0800 Subject: [PATCH 247/828] pinctrl: meson: amlogic-a4: Fix device node reference leak in aml_dt_node_to_map_pinmux() The of_get_parent() function returns a device_node with an incremented reference count. Use the __free(device_node) cleanup attribute to ensure of_node_put() is automatically called when pnode goes out of scope, fixing a reference leak. Fixes: 6e9be3abb78c ("pinctrl: Add driver support for Amlogic SoCs") Signed-off-by: Felix Gu Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index dfa32b11555c..e2293a872dcb 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -679,7 +679,6 @@ static int aml_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev, unsigned int *num_maps) { struct device *dev = pctldev->dev; - struct device_node *pnode; unsigned long *configs = NULL; unsigned int num_configs = 0; struct property *prop; @@ -693,7 +692,7 @@ static int aml_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev, return -ENOENT; } - pnode = of_get_parent(np); + struct device_node *pnode __free(device_node) = of_get_parent(np); if (!pnode) { dev_info(dev, "Missing function node\n"); return -EINVAL; From fd5bed798f45eb3a178ad527b43ab92705faaf8a Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 23 Feb 2026 17:39:07 +0800 Subject: [PATCH 248/828] pinctrl: cirrus: cs42l43: Fix double-put in cs42l43_pin_probe() devm_add_action_or_reset() already invokes the action on failure, so the explicit put causes a double-put. Fixes: 9b07cdf86a0b ("pinctrl: cirrus: Fix fwnode leak in cs42l43_pin_probe()") Signed-off-by: Felix Gu Reviewed-by: Charles Keepax Signed-off-by: Linus Walleij --- drivers/pinctrl/cirrus/pinctrl-cs42l43.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index a8f82104a384..227c37c360e1 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c @@ -574,10 +574,9 @@ static int cs42l43_pin_probe(struct platform_device *pdev) if (child) { ret = devm_add_action_or_reset(&pdev->dev, cs42l43_fwnode_put, child); - if (ret) { - fwnode_handle_put(child); + if (ret) return ret; - } + if (!child->dev) child->dev = priv->dev; fwnode = child; From 45fe4592454368df24d18352be700ff40e7df0c0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 23 Feb 2026 11:57:18 +0100 Subject: [PATCH 249/828] pinctrl: rockchip: Fix configuring a deferred pin Commit e2c58cbe3aff ("pinctrl: rockchip: Simplify locking with scoped_guard()") added a scoped_guard() over existing code containing a "break" instruction. That "break" was for the outer (existing) for-loop, which now exits inner, scoped_guard() loop. If GPIO driver did not probe, then driver will not bail out, but instead continue to configure the pin. Fix the issue by simplifying the code - the break in original code was leading directly to end of the function returning 0, thus we can simply return here rockchip_pinconf_defer_pin status. Reported-by: David Lechner Closes: https://lore.kernel.org/r/f5b38942-a584-4e78-a893-de4a219070b2@baylibre.com/ Fixes: e2c58cbe3aff ("pinctrl: rockchip: Simplify locking with scoped_guard()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index d87c0b1de616..f15b18f334ee 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3640,14 +3640,10 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, * or the gpio driver hasn't probed yet. */ scoped_guard(mutex, &bank->deferred_lock) { - if (!gpio || !gpio->direction_output) { - rc = rockchip_pinconf_defer_pin(bank, - pin - bank->pin_base, - param, arg); - if (rc) - return rc; - break; - } + if (!gpio || !gpio->direction_output) + return rockchip_pinconf_defer_pin(bank, + pin - bank->pin_base, + param, arg); } } From 01e10d0272b932f908b4f9b6609a10cb1f35fafe Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 24 Feb 2026 17:24:18 +0800 Subject: [PATCH 250/828] pinctrl: sunxi: Implement gpiochip::get_direction() After commit 471e998c0e31 ("gpiolib: remove redundant callback check"), a warning will be printed if the gpio driver does not implement this callback. The warning was added in commit e623c4303ed1 ("gpiolib: sanitize the return value of gpio_chip::get_direction()"), but was masked by the "redundant" check. The warning can be triggered by any action that calls the callback, such as dumping the GPIO state from /sys/kernel/debug/gpio. Implement it for the sunxi driver. This is simply a matter of reading out the mux value from the registers, then checking if it is one of the GPIO functions and which direction it is. Signed-off-by: Chen-Yu Tsai Reviewed-by: Jernej Skrabec Reviewed-by: Bartosz Golaszewski Reviewed-by: Andre Przywara Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 51 +++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 48434292a39b..c990b6118172 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -204,6 +204,32 @@ sunxi_pinctrl_desc_find_function_by_pin(struct sunxi_pinctrl *pctl, return NULL; } +static struct sunxi_desc_function * +sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl, + const u16 pin_num, + const u8 muxval) +{ + for (unsigned int i = 0; i < pctl->desc->npins; i++) { + const struct sunxi_desc_pin *pin = pctl->desc->pins + i; + struct sunxi_desc_function *func = pin->functions; + + if (pin->pin.number != pin_num) + continue; + + if (pin->variant && !(pctl->variant & pin->variant)) + continue; + + while (func->name) { + if (func->muxval == muxval) + return func; + + func++; + } + } + + return NULL; +} + static int sunxi_pctrl_get_groups_count(struct pinctrl_dev *pctldev) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); @@ -930,6 +956,30 @@ static const struct pinmux_ops sunxi_pmx_ops = { .strict = true, }; +static int sunxi_pinctrl_gpio_get_direction(struct gpio_chip *chip, + unsigned int offset) +{ + struct sunxi_pinctrl *pctl = gpiochip_get_data(chip); + const struct sunxi_desc_function *func; + u32 pin = offset + chip->base; + u32 reg, shift, mask; + u8 muxval; + + sunxi_mux_reg(pctl, offset, ®, &shift, &mask); + + muxval = (readl(pctl->membase + reg) & mask) >> shift; + + func = sunxi_pinctrl_desc_find_function_by_pin_and_mux(pctl, pin, muxval); + if (!func) + return -ENODEV; + + if (!strcmp(func->name, "gpio_out")) + return GPIO_LINE_DIRECTION_OUT; + if (!strcmp(func->name, "gpio_in") || !strcmp(func->name, "irq")) + return GPIO_LINE_DIRECTION_IN; + return -EINVAL; +} + static int sunxi_pinctrl_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { @@ -1599,6 +1649,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->chip->request = gpiochip_generic_request; pctl->chip->free = gpiochip_generic_free; pctl->chip->set_config = gpiochip_generic_config; + pctl->chip->get_direction = sunxi_pinctrl_gpio_get_direction; pctl->chip->direction_input = sunxi_pinctrl_gpio_direction_input; pctl->chip->direction_output = sunxi_pinctrl_gpio_direction_output; pctl->chip->get = sunxi_pinctrl_gpio_get; From fd80bd7105f88189f47d465ca8cb7d115570de30 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Fri, 20 Feb 2026 17:21:26 -0500 Subject: [PATCH 251/828] RDMA/ionic: Fix potential NULL pointer dereference in ionic_query_port The function ionic_query_port() calls ib_device_get_netdev() without checking the return value which could lead to NULL pointer dereference, Fix it by checking the return value and return -ENODEV if the 'ndev' is NULL. Fixes: 2075bbe8ef03 ("RDMA/ionic: Register device ops for miscellaneous functionality") Signed-off-by: Kamal Heib Link: https://patch.msgid.link/20260220222125.16973-2-kheib@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 164046d00e5d..bd4c73e530d0 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -81,6 +81,8 @@ static int ionic_query_port(struct ib_device *ibdev, u32 port, return -EINVAL; ndev = ib_device_get_netdev(ibdev, port); + if (!ndev) + return -ENODEV; if (netif_running(ndev) && netif_carrier_ok(ndev)) { attr->state = IB_PORT_ACTIVE; From f22c77ce49db0589103d96487dca56f5b2136362 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:47 -0400 Subject: [PATCH 252/828] RDMA/efa: Fix typo in efa_alloc_mr() The pattern is to check the entire driver request space, not just sizeof something unrelated. Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/1-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Acked-by: Michael Margolin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index b5b93b42e6c4..b0aa7c0238ed 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1661,7 +1661,7 @@ static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, struct efa_mr *mr; if (udata && udata->inlen && - !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { + !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); return ERR_PTR(-EINVAL); From 117942ca43e2e3c3d121faae530989931b7f67e1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:48 -0400 Subject: [PATCH 253/828] IB/mthca: Add missed mthca_unmap_user_db() for mthca_create_srq() Fix a user triggerable leak on the system call failure path. Cc: stable@vger.kernel.org Fixes: ec34a922d243 ("[PATCH] IB/mthca: Add SRQ implementation") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/2-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mthca/mthca_provider.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index ef0635064fba..5c182ae4fc2f 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -428,6 +428,8 @@ static int mthca_create_srq(struct ib_srq *ibsrq, if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { mthca_free_srq(to_mdev(ibsrq->device), srq); + mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar, + context->db_tab, ucmd.db_index); return -EFAULT; } @@ -436,6 +438,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq, static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); if (udata) { struct mthca_ucontext *context = rdma_udata_to_drv_context( @@ -446,8 +449,6 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); } - - mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); return 0; } From 74586c6da9ea222a61c98394f2fc0a604748438c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:49 -0400 Subject: [PATCH 254/828] RDMA/irdma: Fix kernel stack leak in irdma_create_user_ah() struct irdma_create_ah_resp { // 8 bytes, no padding __u32 ah_id; // offset 0 - SET (uresp.ah_id = ah->sc_ah.ah_info.ah_idx) __u8 rsvd[4]; // offset 4 - NEVER SET <- LEAK }; rsvd[4]: 4 bytes of stack memory leaked unconditionally. Only ah_id is assigned before ib_respond_udata(). The reserved members of the structure were not zeroed. Cc: stable@vger.kernel.org Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/3-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 15af53237217..7251cd7a2147 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -5212,7 +5212,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah, #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); struct irdma_device *iwdev = to_iwdev(ibah->pd->device); - struct irdma_create_ah_resp uresp; + struct irdma_create_ah_resp uresp = {}; struct irdma_ah *parent_ah; int err; From faa72102b178c7ae6c6afea23879e7c84fc59b4e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:50 -0400 Subject: [PATCH 255/828] RDMA/ionic: Fix kernel stack leak in ionic_create_cq() struct ionic_cq_resp resp { __u32 cqid[2]; // offset 0 - PARTIALLY SET (see below) __u8 udma_mask; // offset 8 - SET (resp.udma_mask = vcq->udma_mask) __u8 rsvd[7]; // offset 9 - NEVER SET <- LEAK }; rsvd[7]: 7 bytes of stack memory leaked unconditionally. cqid[2]: The loop at line 1256 iterates over udma_idx but skips indices where !(vcq->udma_mask & BIT(udma_idx)). The array has 2 entries but udma_count could be 1, meaning cqid[1] might never be written via ionic_create_cq_common(). If udma_mask only has bit 0 set, cqid[1] (4 bytes) is also leaked. So potentially 11 bytes leaked. Cc: stable@vger.kernel.org Fixes: e8521822c733 ("RDMA/ionic: Register device ops for control path") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/4-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Acked-by: Abhijit Gangurde Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_controlpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index 5b3f40bd98d8..4842931f5316 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -1218,7 +1218,7 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx); struct ionic_vcq *vcq = to_ionic_vcq(ibcq); struct ionic_tbl_buf buf = {}; - struct ionic_cq_resp resp; + struct ionic_cq_resp resp = {}; struct ionic_cq_req req; int udma_idx = 0, rc; From 983512f3a87fd8dc4c94dfa6b596b6e57df5aad7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 20 Feb 2026 19:38:58 +0100 Subject: [PATCH 256/828] net: Drop the lock in skb_may_tx_timestamp() skb_may_tx_timestamp() may acquire sock::sk_callback_lock. The lock must not be taken in IRQ context, only softirq is okay. A few drivers receive the timestamp via a dedicated interrupt and complete the TX timestamp from that handler. This will lead to a deadlock if the lock is already write-locked on the same CPU. Taking the lock can be avoided. The socket (pointed by the skb) will remain valid until the skb is released. The ->sk_socket and ->file member will be set to NULL once the user closes the socket which may happen before the timestamp arrives. If we happen to observe the pointer while the socket is closing but before the pointer is set to NULL then we may use it because both pointer (and the file's cred member) are RCU freed. Drop the lock. Use READ_ONCE() to obtain the individual pointer. Add a matching WRITE_ONCE() where the pointer are cleared. Link: https://lore.kernel.org/all/20260205145104.iWinkXHv@linutronix.de Fixes: b245be1f4db1a ("net-timestamp: no-payload only sysctl") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260220183858.N4ERjFW6@linutronix.de Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- net/core/skbuff.c | 23 ++++++++++++++++++----- net/socket.c | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 66b56288c1d3..6c9a83016e95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2098,7 +2098,7 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk->sk_socket = sock; + WRITE_ONCE(sk->sk_socket, sock); if (sock) { WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dc47d3efc72e..0e217041958a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5590,15 +5590,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { - bool ret; + struct socket *sock; + struct file *file; + bool ret = false; if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data))) return true; - read_lock_bh(&sk->sk_callback_lock); - ret = sk->sk_socket && sk->sk_socket->file && - file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + rcu_read_lock(); + sock = READ_ONCE(sk->sk_socket); + if (!sock) + goto out; + file = READ_ONCE(sock->file); + if (!file) + goto out; + ret = file_ns_capable(file, &init_user_ns, CAP_NET_RAW); +out: + rcu_read_unlock(); return ret; } diff --git a/net/socket.c b/net/socket.c index 136b98c54fb3..05952188127f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -674,7 +674,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) iput(SOCK_INODE(sock)); return; } - sock->file = NULL; + WRITE_ONCE(sock->file, NULL); } /** From bf4fde7db4a8e2613cba36d81ac271f3d66c28f7 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Tue, 24 Feb 2026 14:02:07 +0800 Subject: [PATCH 257/828] erofs: remove more unnecessary #ifdefs Many #ifdefs can be replaced with IS_ENABLED() to improve code readability. No functional changes. Signed-off-by: Ferry Meng Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang --- fs/erofs/super.c | 83 ++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index d4995686ac6c..972a0c82198d 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -424,26 +424,23 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) { -#ifdef CONFIG_FS_DAX - struct erofs_sb_info *sbi = fc->s_fs_info; + if (IS_ENABLED(CONFIG_FS_DAX)) { + struct erofs_sb_info *sbi = fc->s_fs_info; - switch (mode) { - case EROFS_MOUNT_DAX_ALWAYS: - set_opt(&sbi->opt, DAX_ALWAYS); - clear_opt(&sbi->opt, DAX_NEVER); - return true; - case EROFS_MOUNT_DAX_NEVER: - set_opt(&sbi->opt, DAX_NEVER); - clear_opt(&sbi->opt, DAX_ALWAYS); - return true; - default: + if (mode == EROFS_MOUNT_DAX_ALWAYS) { + set_opt(&sbi->opt, DAX_ALWAYS); + clear_opt(&sbi->opt, DAX_NEVER); + return true; + } else if (mode == EROFS_MOUNT_DAX_NEVER) { + set_opt(&sbi->opt, DAX_NEVER); + clear_opt(&sbi->opt, DAX_ALWAYS); + return true; + } DBG_BUGON(1); return false; } -#else errorfc(fc, "dax options not supported"); return false; -#endif } static int erofs_fc_parse_param(struct fs_context *fc, @@ -460,31 +457,26 @@ static int erofs_fc_parse_param(struct fs_context *fc, switch (opt) { case Opt_user_xattr: -#ifdef CONFIG_EROFS_FS_XATTR - if (result.boolean) + if (!IS_ENABLED(CONFIG_EROFS_FS_XATTR)) + errorfc(fc, "{,no}user_xattr options not supported"); + else if (result.boolean) set_opt(&sbi->opt, XATTR_USER); else clear_opt(&sbi->opt, XATTR_USER); -#else - errorfc(fc, "{,no}user_xattr options not supported"); -#endif break; case Opt_acl: -#ifdef CONFIG_EROFS_FS_POSIX_ACL - if (result.boolean) + if (!IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) + errorfc(fc, "{,no}acl options not supported"); + else if (result.boolean) set_opt(&sbi->opt, POSIX_ACL); else clear_opt(&sbi->opt, POSIX_ACL); -#else - errorfc(fc, "{,no}acl options not supported"); -#endif break; case Opt_cache_strategy: -#ifdef CONFIG_EROFS_FS_ZIP - sbi->opt.cache_strategy = result.uint_32; -#else - errorfc(fc, "compression not supported, cache_strategy ignored"); -#endif + if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP)) + errorfc(fc, "compression not supported, cache_strategy ignored"); + else + sbi->opt.cache_strategy = result.uint_32; break; case Opt_dax: if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) @@ -533,24 +525,21 @@ static int erofs_fc_parse_param(struct fs_context *fc, break; #endif case Opt_directio: -#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE - if (result.boolean) + if (!IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE)) + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); + else if (result.boolean) set_opt(&sbi->opt, DIRECT_IO); else clear_opt(&sbi->opt, DIRECT_IO); -#else - errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); -#endif break; case Opt_fsoffset: sbi->dif0.fsoff = result.uint_64; break; case Opt_inode_share: -#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE - set_opt(&sbi->opt, INODE_SHARE); -#else - errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); -#endif + if (!IS_ENABLED(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)) + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); + else + set_opt(&sbi->opt, INODE_SHARE); break; } return 0; @@ -809,8 +798,7 @@ static int erofs_fc_get_tree(struct fs_context *fc) ret = get_tree_bdev_flags(fc, erofs_fc_fill_super, IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) ? GET_TREE_BDEV_QUIET_LOOKUP : 0); -#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE - if (ret == -ENOTBLK) { + if (IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && ret == -ENOTBLK) { struct file *file; if (!fc->source) @@ -824,7 +812,6 @@ static int erofs_fc_get_tree(struct fs_context *fc) sbi->dif0.file->f_mapping->a_ops->read_folio) return get_tree_nodev(fc, erofs_fc_fill_super); } -#endif return ret; } @@ -1108,12 +1095,12 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",dax=never"); if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO)) seq_puts(seq, ",directio"); -#ifdef CONFIG_EROFS_FS_ONDEMAND - if (sbi->fsid) - seq_printf(seq, ",fsid=%s", sbi->fsid); - if (sbi->domain_id) - seq_printf(seq, ",domain_id=%s", sbi->domain_id); -#endif + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND)) { + if (sbi->fsid) + seq_printf(seq, ",fsid=%s", sbi->fsid); + if (sbi->domain_id) + seq_printf(seq, ",domain_id=%s", sbi->domain_id); + } if (sbi->dif0.fsoff) seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff); if (test_opt(opt, INODE_SHARE)) From 3d7e6ce34f4fcc7083510c28b17a7c36462a25d4 Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Sun, 22 Feb 2026 05:06:33 +0000 Subject: [PATCH 258/828] net: usb: pegasus: enable basic endpoint checking pegasus_probe() fills URBs with hardcoded endpoint pipes without verifying the endpoint descriptors: - usb_rcvbulkpipe(dev, 1) for RX data - usb_sndbulkpipe(dev, 2) for TX data - usb_rcvintpipe(dev, 3) for status interrupts A malformed USB device can present these endpoints with transfer types that differ from what the driver assumes. Add a pegasus_usb_ep enum for endpoint numbers, replacing magic constants throughout. Add usb_check_bulk_endpoints() and usb_check_int_endpoints() calls before any resource allocation to verify endpoint types before use, rejecting devices with mismatched descriptors at probe time, and avoid triggering assertion. Similar fix to - commit 90b7f2961798 ("net: usb: rtl8150: enable basic endpoint checking") - commit 9e7021d2aeae ("net: usb: catc: enable basic endpoint checking") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ziyi Guo Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260222050633.410165-1-n7l8m4@u.northwestern.edu Signed-off-by: Paolo Abeni --- drivers/net/usb/pegasus.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 7b6d6eb60709..b84968c5f074 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -28,6 +28,17 @@ static const char driver_name[] = "pegasus"; BMSR_100FULL | BMSR_ANEGCAPABLE) #define CARRIER_CHECK_DELAY (2 * HZ) +/* + * USB endpoints. + */ + +enum pegasus_usb_ep { + PEGASUS_USB_EP_CONTROL = 0, + PEGASUS_USB_EP_BULK_IN = 1, + PEGASUS_USB_EP_BULK_OUT = 2, + PEGASUS_USB_EP_INT_IN = 3, +}; + static bool loopback; static bool mii_mode; static char *devid; @@ -542,7 +553,7 @@ static void read_bulk_callback(struct urb *urb) goto tl_sched; goon: usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); rx_status = usb_submit_urb(pegasus->rx_urb, GFP_ATOMIC); @@ -582,7 +593,7 @@ static void rx_fixup(struct tasklet_struct *t) return; } usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); try_again: @@ -710,7 +721,7 @@ static netdev_tx_t pegasus_start_xmit(struct sk_buff *skb, ((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16); skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len); usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb, - usb_sndbulkpipe(pegasus->usb, 2), + usb_sndbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_OUT), pegasus->tx_buff, count, write_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->tx_urb, GFP_ATOMIC))) { @@ -837,7 +848,7 @@ static int pegasus_open(struct net_device *net) set_registers(pegasus, EthID, 6, net->dev_addr); usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->rx_urb, GFP_KERNEL))) { @@ -848,7 +859,7 @@ static int pegasus_open(struct net_device *net) } usb_fill_int_urb(pegasus->intr_urb, pegasus->usb, - usb_rcvintpipe(pegasus->usb, 3), + usb_rcvintpipe(pegasus->usb, PEGASUS_USB_EP_INT_IN), pegasus->intr_buff, sizeof(pegasus->intr_buff), intr_callback, pegasus, pegasus->intr_interval); if ((res = usb_submit_urb(pegasus->intr_urb, GFP_KERNEL))) { @@ -1133,10 +1144,24 @@ static int pegasus_probe(struct usb_interface *intf, pegasus_t *pegasus; int dev_index = id - pegasus_ids; int res = -ENOMEM; + static const u8 bulk_ep_addr[] = { + PEGASUS_USB_EP_BULK_IN | USB_DIR_IN, + PEGASUS_USB_EP_BULK_OUT | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + PEGASUS_USB_EP_INT_IN | USB_DIR_IN, + 0}; if (pegasus_blacklisted(dev)) return -ENODEV; + /* Verify that all required endpoints are present */ + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(&intf->dev, "Missing or invalid endpoints\n"); + return -ENODEV; + } + net = alloc_etherdev(sizeof(struct pegasus)); if (!net) goto out; From 4a1ddb0f1c48c2b56f21d8b5200e2e29adf4c1df Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Feb 2026 12:09:00 +0100 Subject: [PATCH 259/828] pidfs: avoid misleading break The break would only break out of the scoped_guard() loop, not the switch statement. It still works correct as is ofc but let's avoid the confusion. Reported-by: David Lechner Link:: https://lore.kernel.org/cd2153f1-098b-463c-bbc1-5c6ca9ef1f12@baylibre.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 1e20e36e0ed5..21f9f011a957 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -577,9 +577,8 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct user_namespace *user_ns; user_ns = task_cred_xxx(task, user_ns); - if (!ns_ref_get(user_ns)) - break; - ns_common = to_ns_common(user_ns); + if (ns_ref_get(user_ns)) + ns_common = to_ns_common(user_ns); } #endif break; @@ -589,9 +588,8 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct pid_namespace *pid_ns; pid_ns = task_active_pid_ns(task); - if (!ns_ref_get(pid_ns)) - break; - ns_common = to_ns_common(pid_ns); + if (ns_ref_get(pid_ns)) + ns_common = to_ns_common(pid_ns); } #endif break; From c8dbdc6e380e7e96a51706db3e4b7870d8a9402d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 22 Feb 2026 16:26:01 +0100 Subject: [PATCH 260/828] net: phy: register phy led_triggers during probe to avoid AB-BA deadlock There is an AB-BA deadlock when both LEDS_TRIGGER_NETDEV and LED_TRIGGER_PHY are enabled: [ 1362.049207] [<8054e4b8>] led_trigger_register+0x5c/0x1fc <-- Trying to get lock "triggers_list_lock" via down_write(&triggers_list_lock); [ 1362.054536] [<80662830>] phy_led_triggers_register+0xd0/0x234 [ 1362.060329] [<8065e200>] phy_attach_direct+0x33c/0x40c [ 1362.065489] [<80651fc4>] phylink_fwnode_phy_connect+0x15c/0x23c [ 1362.071480] [<8066ee18>] mtk_open+0x7c/0xba0 [ 1362.075849] [<806d714c>] __dev_open+0x280/0x2b0 [ 1362.080384] [<806d7668>] __dev_change_flags+0x244/0x24c [ 1362.085598] [<806d7698>] dev_change_flags+0x28/0x78 [ 1362.090528] [<807150e4>] dev_ioctl+0x4c0/0x654 <-- Hold lock "rtnl_mutex" by calling rtnl_lock(); [ 1362.094985] [<80694360>] sock_ioctl+0x2f4/0x4e0 [ 1362.099567] [<802e9c4c>] sys_ioctl+0x32c/0xd8c [ 1362.104022] [<80014504>] syscall_common+0x34/0x58 Here LED_TRIGGER_PHY is registering LED triggers during phy_attach while holding RTNL and then taking triggers_list_lock. [ 1362.191101] [<806c2640>] register_netdevice_notifier+0x60/0x168 <-- Trying to get lock "rtnl_mutex" via rtnl_lock(); [ 1362.197073] [<805504ac>] netdev_trig_activate+0x194/0x1e4 [ 1362.202490] [<8054e28c>] led_trigger_set+0x1d4/0x360 <-- Hold lock "triggers_list_lock" by down_read(&triggers_list_lock); [ 1362.207511] [<8054eb38>] led_trigger_write+0xd8/0x14c [ 1362.212566] [<80381d98>] sysfs_kf_bin_write+0x80/0xbc [ 1362.217688] [<8037fcd8>] kernfs_fop_write_iter+0x17c/0x28c [ 1362.223174] [<802cbd70>] vfs_write+0x21c/0x3c4 [ 1362.227712] [<802cc0c4>] ksys_write+0x78/0x12c [ 1362.232164] [<80014504>] syscall_common+0x34/0x58 Here LEDS_TRIGGER_NETDEV is being enabled on an LED. It first takes triggers_list_lock and then RTNL. A classical AB-BA deadlock. phy_led_triggers_registers() does not require the RTNL, it does not make any calls into the network stack which require protection. There is also no requirement the PHY has been attached to a MAC, the triggers only make use of phydev state. This allows the call to phy_led_triggers_registers() to be placed elsewhere. PHY probe() and release() don't hold RTNL, so solving the AB-BA deadlock. Reported-by: Shiji Yang Closes: https://lore.kernel.org/all/OS7PR01MB13602B128BA1AD3FA38B6D1FFBC69A@OS7PR01MB13602.jpnprd01.prod.outlook.com/ Fixes: 06f502f57d0d ("leds: trigger: Introduce a NETDEV trigger") Cc: stable@vger.kernel.org Signed-off-by: Andrew Lunn Tested-by: Shiji Yang Link: https://patch.msgid.link/20260222152601.1978655-1-andrew@lunn.ch Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_device.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9b8eaac63b90..cbb4af604aa5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1866,8 +1866,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, goto error; phy_resume(phydev); - if (!phydev->is_on_sfp_module) - phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by @@ -1982,9 +1980,6 @@ void phy_detach(struct phy_device *phydev) phydev->phy_link_change = NULL; phydev->phylink = NULL; - if (!phydev->is_on_sfp_module) - phy_led_triggers_unregister(phydev); - if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); @@ -3778,16 +3773,27 @@ static int phy_probe(struct device *dev) /* Set the state to READY by default */ phydev->state = PHY_READY; + /* Register the PHY LED triggers */ + if (!phydev->is_on_sfp_module) + phy_led_triggers_register(phydev); + /* Get the LEDs from the device tree, and instantiate standard * LEDs for them. */ - if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) + if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) { err = of_phy_leds(phydev); + if (err) + goto out; + } + + return 0; out: + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); + /* Re-assert the reset signal on error */ - if (err) - phy_device_reset(phydev, 1); + phy_device_reset(phydev, 1); return err; } @@ -3801,6 +3807,9 @@ static int phy_remove(struct device *dev) if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) phy_leds_unregister(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); + phydev->state = PHY_DOWN; phy_cleanup_ports(phydev); From 78437ab3b769f80526416570f60173c89858dd84 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 00:58:11 +0100 Subject: [PATCH 261/828] clk: scu/imx8qxp: do not register driver in probe() imx_clk_scu_init() registers the imx_clk_scu_driver while commonly being called from IMX driver's probe() callbacks. However, it neither makes sense to register drivers from probe() callbacks of other drivers, nor does the driver core allow registering drivers with a device lock already being held. The latter was revealed by commit dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") leading to a deadlock condition described in [1]. Besides that, nothing seems to unregister the imx_clk_scu_driver once the corresponding driver module is unloaded, which leaves the driver-core with a dangling pointer. Also, if there are multiple matching devices for the imx8qxp_clk_driver, imx8qxp_clk_probe() calls imx_clk_scu_init() multiple times. However, any subsequent call after the first one will fail, since the driver-core does not allow to register the same struct platform_driver multiple times. Hence, register the imx_clk_scu_driver from module_init() and unregister it in module_exit(). Note that we first register the imx8qxp_clk_driver and then call imx_clk_scu_module_init() to avoid having to call imx_clk_scu_module_exit() in the unwind path of imx8qxp_clk_init(). Fixes: dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") Fixes: 220175cd3979 ("clk: imx: scu: fix build break when compiled as modules") Reported-by: Alexander Stein Closes: https://lore.kernel.org/lkml/13955113.uLZWGnKmhe@steina-w/ Tested-by: Alexander Stein # TQMa8x/MBa8x Link: https://lore.kernel.org/lkml/DFU7CEPUSG9A.1KKGVW4HIPMSH@kernel.org/ [1] Acked-by: Abel Vesa Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/20260212235842.85934-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/clk/imx/clk-imx8qxp.c | 24 +++++++++++++++++++++++- drivers/clk/imx/clk-scu.c | 12 +++++++++++- drivers/clk/imx/clk-scu.h | 2 ++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index 3ae162625bb1..c781425a005e 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -346,7 +346,29 @@ static struct platform_driver imx8qxp_clk_driver = { }, .probe = imx8qxp_clk_probe, }; -module_platform_driver(imx8qxp_clk_driver); + +static int __init imx8qxp_clk_init(void) +{ + int ret; + + ret = platform_driver_register(&imx8qxp_clk_driver); + if (ret) + return ret; + + ret = imx_clk_scu_module_init(); + if (ret) + platform_driver_unregister(&imx8qxp_clk_driver); + + return ret; +} +module_init(imx8qxp_clk_init); + +static void __exit imx8qxp_clk_exit(void) +{ + imx_clk_scu_module_exit(); + platform_driver_unregister(&imx8qxp_clk_driver); +} +module_exit(imx8qxp_clk_exit); MODULE_AUTHOR("Aisheng Dong "); MODULE_DESCRIPTION("NXP i.MX8QXP clock driver"); diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index 33e637ad3579..a85ec48a798b 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -191,6 +191,16 @@ static bool imx_scu_clk_is_valid(u32 rsrc_id) return p != NULL; } +int __init imx_clk_scu_module_init(void) +{ + return platform_driver_register(&imx_clk_scu_driver); +} + +void __exit imx_clk_scu_module_exit(void) +{ + return platform_driver_unregister(&imx_clk_scu_driver); +} + int imx_clk_scu_init(struct device_node *np, const struct imx_clk_scu_rsrc_table *data) { @@ -215,7 +225,7 @@ int imx_clk_scu_init(struct device_node *np, rsrc_table = data; } - return platform_driver_register(&imx_clk_scu_driver); + return 0; } /* diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index af7b697f51ca..ca82f2cce897 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -25,6 +25,8 @@ extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8dxl; extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qxp; extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qm; +int __init imx_clk_scu_module_init(void); +void __exit imx_clk_scu_module_exit(void); int imx_clk_scu_init(struct device_node *np, const struct imx_clk_scu_rsrc_table *data); struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec, From fb73d0e19f12b793bfe013171d931587f37e3552 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 23 Feb 2026 13:10:20 +0530 Subject: [PATCH 262/828] MAINTAINERS: Update AMD XGBE driver maintainers Due to additional responsibilities, Shyam Sundar S K will no longer be supporting the AMD XGBE driver. Maintenance will be handled by Raju Rangoju going forward. Cc: Raju Rangoju Signed-off-by: Shyam Sundar S K Link: https://patch.msgid.link/20260223074020.1987884-1-Shyam-sundar.S-k@amd.com Signed-off-by: Paolo Abeni --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e1a6c6cc2b7c..cf0313ca6039 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1292,7 +1292,6 @@ F: include/trace/events/amdxdna.h F: include/uapi/drm/amdxdna_accel.h AMD XGBE DRIVER -M: "Shyam Sundar S K" M: Raju Rangoju L: netdev@vger.kernel.org S: Maintained From ab39cc4cb8ceecdc2b61747433e7237f1ac2b789 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Tue, 24 Feb 2026 07:21:06 -0500 Subject: [PATCH 263/828] cpufreq: intel_pstate: Fix NULL pointer dereference in update_cpu_qos_request() The update_cpu_qos_request() function attempts to initialize the 'freq' variable by dereferencing 'cpudata' before verifying if the 'policy' is valid. This issue occurs on systems booted with the "nosmt" parameter, where all_cpu_data[cpu] is NULL for the SMT sibling threads. As a result, any call to update_qos_requests() will result in a NULL pointer dereference as the code will attempt to access pstate.turbo_freq using the NULL cpudata pointer. Also, pstate.turbo_freq may be updated by intel_pstate_get_hwp_cap() after initializing the 'freq' variable, so it is better to defer the 'freq' until intel_pstate_get_hwp_cap() has been called. Fix this by deferring the 'freq' assignment until after the policy and driver_data have been validated. Fixes: ae1bdd23b99f ("cpufreq: intel_pstate: Adjust frequency percentage computations") Reported-by: Jirka Hladky Closes: https://lore.kernel.org/all/CAE4VaGDfiPvz3AzrwrwM4kWB3SCkMci25nPO8W1JmTBd=xHzZg@mail.gmail.com/ Signed-off-by: David Arcari Cc: 6.18+ # 6.18+ [ rjw: Added one paragraph to the changelog ] Link: https://patch.msgid.link/20260224122106.228116-1-darcari@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a48af3540c74..bdc37080d319 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1647,8 +1647,8 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) { struct cpudata *cpudata = all_cpu_data[cpu]; - unsigned int freq = cpudata->pstate.turbo_freq; struct freq_qos_request *req; + unsigned int freq; struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) @@ -1661,6 +1661,8 @@ static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) if (hwp_active) intel_pstate_get_hwp_cap(cpudata); + freq = cpudata->pstate.turbo_freq; + if (type == FREQ_QOS_MIN) { freq = DIV_ROUND_UP(freq * global.min_perf_pct, 100); } else { From 5ede90206273ff156a778254f0f972a55e973c89 Mon Sep 17 00:00:00 2001 From: Sofia Schneider Date: Sun, 22 Feb 2026 23:52:40 -0300 Subject: [PATCH 264/828] ACPI: OSI: Add DMI quirk for Acer Aspire One D255 The screen backlight turns off during boot (specifically during udev device initialization) when returning true for _OSI("Windows 2009"). Analyzing the device's DSDT reveals that the firmware takes a different code path when Windows 7 is reported, which leads to the backlight shutoff. Add a DMI quirk to invoke dmi_disable_osi_win7 for this model. Signed-off-by: Sofia Schneider Link: https://patch.msgid.link/20260223025240.518509-1-sofia@schn.dev Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c index f2c943b934be..9470f1830ff5 100644 --- a/drivers/acpi/osi.c +++ b/drivers/acpi/osi.c @@ -389,6 +389,19 @@ static const struct dmi_system_id acpi_osi_dmi_table[] __initconst = { }, }, + /* + * The screen backlight turns off during udev device creation + * when returning true for _OSI("Windows 2009") + */ + { + .callback = dmi_disable_osi_win7, + .ident = "Acer Aspire One D255", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "AOD255"), + }, + }, + /* * The wireless hotkey does not work on those machines when * returning true for _OSI("Windows 2012") From e710b2283725f0db9e5b99c2483df3f4a7feadbd Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 23 Feb 2026 11:04:48 +0100 Subject: [PATCH 265/828] Revert "hwmon: add SMARC-sAM67 support" This reverts commit 443b39c82c322c9f3c38bea0389fe927ba00b3b4. I was just informed that this product is discontinued (without being ever released to the market). Pull the plug and let's not waste any more maintainers time. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20260223100459.844967-4-mwalle@kernel.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/index.rst | 1 - Documentation/hwmon/sa67.rst | 41 --------- MAINTAINERS | 1 - drivers/hwmon/Kconfig | 10 --- drivers/hwmon/Makefile | 1 - drivers/hwmon/sa67mcu-hwmon.c | 161 ---------------------------------- 6 files changed, 215 deletions(-) delete mode 100644 Documentation/hwmon/sa67.rst delete mode 100644 drivers/hwmon/sa67mcu-hwmon.c diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index d91dbb20c7dc..b2ca8513cfcd 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst @@ -220,7 +220,6 @@ Hardware Monitoring Kernel Drivers q54sj108a2 qnap-mcu-hwmon raspberrypi-hwmon - sa67 sbrmi sbtsi_temp sch5627 diff --git a/Documentation/hwmon/sa67.rst b/Documentation/hwmon/sa67.rst deleted file mode 100644 index 029c7c169b7f..000000000000 --- a/Documentation/hwmon/sa67.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-only - -Kernel driver sa67mcu -===================== - -Supported chips: - - * Kontron sa67mcu - - Prefix: 'sa67mcu' - - Datasheet: not available - -Authors: Michael Walle - -Description ------------ - -The sa67mcu is a board management controller which also exposes a hardware -monitoring controller. - -The controller has two voltage and one temperature sensor. The values are -hold in two 8 bit registers to form one 16 bit value. Reading the lower byte -will also capture the high byte to make the access atomic. The unit of the -volatge sensors are 1mV and the unit of the temperature sensor is 0.1degC. - -Sysfs entries -------------- - -The following attributes are supported. - -======================= ======================================================== -in0_label "VDDIN" -in0_input Measured VDDIN voltage. - -in1_label "VDD_RTC" -in1_input Measured VDD_RTC voltage. - -temp1_input MCU temperature. Roughly the board temperature. -======================= ======================================================== - diff --git a/MAINTAINERS b/MAINTAINERS index 55af015174a5..d5a2078a6717 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24337,7 +24337,6 @@ F: Documentation/devicetree/bindings/interrupt-controller/kontron,sl28cpld-intc. F: Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml F: Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml F: drivers/gpio/gpio-sl28cpld.c -F: drivers/hwmon/sa67mcu-hwmon.c F: drivers/hwmon/sl28cpld-hwmon.c F: drivers/irqchip/irq-sl28cpld.c F: drivers/pwm/pwm-sl28cpld.c diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 41c381764c2b..486152a8ea77 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1927,16 +1927,6 @@ config SENSORS_RASPBERRYPI_HWMON This driver can also be built as a module. If so, the module will be called raspberrypi-hwmon. -config SENSORS_SA67MCU - tristate "Kontron sa67mcu hardware monitoring driver" - depends on MFD_SL28CPLD || COMPILE_TEST - help - If you say yes here you get support for the voltage and temperature - monitor of the sa67 board management controller. - - This driver can also be built as a module. If so, the module - will be called sa67mcu-hwmon. - config SENSORS_SL28CPLD tristate "Kontron sl28cpld hardware monitoring driver" depends on MFD_SL28CPLD || COMPILE_TEST diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index eade8e3b1bde..5833c807c688 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -199,7 +199,6 @@ obj-$(CONFIG_SENSORS_PT5161L) += pt5161l.o obj-$(CONFIG_SENSORS_PWM_FAN) += pwm-fan.o obj-$(CONFIG_SENSORS_QNAP_MCU_HWMON) += qnap-mcu-hwmon.o obj-$(CONFIG_SENSORS_RASPBERRYPI_HWMON) += raspberrypi-hwmon.o -obj-$(CONFIG_SENSORS_SA67MCU) += sa67mcu-hwmon.o obj-$(CONFIG_SENSORS_SBTSI) += sbtsi_temp.o obj-$(CONFIG_SENSORS_SBRMI) += sbrmi.o obj-$(CONFIG_SENSORS_SCH56XX_COMMON)+= sch56xx-common.o diff --git a/drivers/hwmon/sa67mcu-hwmon.c b/drivers/hwmon/sa67mcu-hwmon.c deleted file mode 100644 index 22f703b7b256..000000000000 --- a/drivers/hwmon/sa67mcu-hwmon.c +++ /dev/null @@ -1,161 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sl67mcu hardware monitoring driver - * - * Copyright 2025 Kontron Europe GmbH - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define SA67MCU_VOLTAGE(n) (0x00 + ((n) * 2)) -#define SA67MCU_TEMP(n) (0x04 + ((n) * 2)) - -struct sa67mcu_hwmon { - struct regmap *regmap; - u32 offset; -}; - -static int sa67mcu_hwmon_read(struct device *dev, - enum hwmon_sensor_types type, u32 attr, - int channel, long *input) -{ - struct sa67mcu_hwmon *hwmon = dev_get_drvdata(dev); - unsigned int offset; - u8 reg[2]; - int ret; - - switch (type) { - case hwmon_in: - switch (attr) { - case hwmon_in_input: - offset = hwmon->offset + SA67MCU_VOLTAGE(channel); - break; - default: - return -EOPNOTSUPP; - } - break; - case hwmon_temp: - switch (attr) { - case hwmon_temp_input: - offset = hwmon->offset + SA67MCU_TEMP(channel); - break; - default: - return -EOPNOTSUPP; - } - break; - default: - return -EOPNOTSUPP; - } - - /* Reading the low byte will capture the value */ - ret = regmap_bulk_read(hwmon->regmap, offset, reg, ARRAY_SIZE(reg)); - if (ret) - return ret; - - *input = reg[1] << 8 | reg[0]; - - /* Temperatures are s16 and in 0.1degC steps. */ - if (type == hwmon_temp) - *input = sign_extend32(*input, 15) * 100; - - return 0; -} - -static const struct hwmon_channel_info * const sa67mcu_hwmon_info[] = { - HWMON_CHANNEL_INFO(in, - HWMON_I_INPUT | HWMON_I_LABEL, - HWMON_I_INPUT | HWMON_I_LABEL), - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), - NULL -}; - -static const char *const sa67mcu_hwmon_in_labels[] = { - "VDDIN", - "VDD_RTC", -}; - -static int sa67mcu_hwmon_read_string(struct device *dev, - enum hwmon_sensor_types type, u32 attr, - int channel, const char **str) -{ - switch (type) { - case hwmon_in: - switch (attr) { - case hwmon_in_label: - *str = sa67mcu_hwmon_in_labels[channel]; - return 0; - default: - return -EOPNOTSUPP; - } - default: - return -EOPNOTSUPP; - } -} - -static const struct hwmon_ops sa67mcu_hwmon_ops = { - .visible = 0444, - .read = sa67mcu_hwmon_read, - .read_string = sa67mcu_hwmon_read_string, -}; - -static const struct hwmon_chip_info sa67mcu_hwmon_chip_info = { - .ops = &sa67mcu_hwmon_ops, - .info = sa67mcu_hwmon_info, -}; - -static int sa67mcu_hwmon_probe(struct platform_device *pdev) -{ - struct sa67mcu_hwmon *hwmon; - struct device *hwmon_dev; - int ret; - - if (!pdev->dev.parent) - return -ENODEV; - - hwmon = devm_kzalloc(&pdev->dev, sizeof(*hwmon), GFP_KERNEL); - if (!hwmon) - return -ENOMEM; - - hwmon->regmap = dev_get_regmap(pdev->dev.parent, NULL); - if (!hwmon->regmap) - return -ENODEV; - - ret = device_property_read_u32(&pdev->dev, "reg", &hwmon->offset); - if (ret) - return -EINVAL; - - hwmon_dev = devm_hwmon_device_register_with_info(&pdev->dev, - "sa67mcu_hwmon", hwmon, - &sa67mcu_hwmon_chip_info, - NULL); - if (IS_ERR(hwmon_dev)) - dev_err(&pdev->dev, "failed to register as hwmon device"); - - return PTR_ERR_OR_ZERO(hwmon_dev); -} - -static const struct of_device_id sa67mcu_hwmon_of_match[] = { - { .compatible = "kontron,sa67mcu-hwmon", }, - {} -}; -MODULE_DEVICE_TABLE(of, sa67mcu_hwmon_of_match); - -static struct platform_driver sa67mcu_hwmon_driver = { - .probe = sa67mcu_hwmon_probe, - .driver = { - .name = "sa67mcu-hwmon", - .of_match_table = sa67mcu_hwmon_of_match, - }, -}; -module_platform_driver(sa67mcu_hwmon_driver); - -MODULE_DESCRIPTION("sa67mcu Hardware Monitoring Driver"); -MODULE_AUTHOR("Michael Walle "); -MODULE_LICENSE("GPL"); From 364410170ab33f6e7ef0eb2afb12bf89b0feb3a6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Feb 2026 07:59:20 -0500 Subject: [PATCH 266/828] nfsd: report the requested maximum number of threads instead of number running The current netlink and /proc interfaces deviate from their traditional values when dynamic threading is enabled, and there is currently no way to know what the current setting is. This patch brings the reporting back in line with traditional behavior. Make these interfaces report the requested maximum number of threads instead of the number currently running. Also, update documentation and comments to reflect that this value represents a maximum and not the number currently running. Fixes: d8316b837c2c ("nfsd: add controls to set the minimum number of threads per pool") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- Documentation/netlink/specs/nfsd.yaml | 4 ++-- fs/nfsd/nfsctl.c | 18 +++++++++--------- fs/nfsd/nfssvc.c | 7 ++++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index badb2fe57c98..f87b5a05e5e9 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml @@ -152,7 +152,7 @@ operations: - compound-ops - name: threads-set - doc: set the number of running threads + doc: set the maximum number of running threads attribute-set: server flags: [admin-perm] do: @@ -165,7 +165,7 @@ operations: - min-threads - name: threads-get - doc: get the number of running threads + doc: get the maximum number of running threads attribute-set: server do: reply: diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b06adb5d5a2e..369da69d5efe 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -377,15 +377,15 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) } /* - * write_threads - Start NFSD, or report the current number of running threads + * write_threads - Start NFSD, or report the configured number of threads * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with '\n'-terminated C - * string numeric value representing the number of - * running NFSD threads; + * string numeric value representing the configured + * number of NFSD threads; * return code is the size in bytes of the string * On error: return code is zero * @@ -399,8 +399,8 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) * Output: * On success: NFS service is started; * passed-in buffer filled with '\n'-terminated C - * string numeric value representing the number of - * running NFSD threads; + * string numeric value representing the configured + * number of NFSD threads; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ @@ -430,7 +430,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) } /* - * write_pool_threads - Set or report the current number of threads per pool + * write_pool_threads - Set or report the configured number of threads per pool * * Input: * buf: ignored @@ -447,7 +447,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing integer values representing the - * number of NFSD threads in each pool; + * configured number of NFSD threads in each pool; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ @@ -1657,7 +1657,7 @@ out_unlock: } /** - * nfsd_nl_threads_get_doit - get the number of running threads + * nfsd_nl_threads_get_doit - get the maximum number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * @@ -1700,7 +1700,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, - sp->sp_nrthreads); + sp->sp_nrthrmax); if (err) goto err_unlock; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0887ee601d3c..4a04208393b8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -239,12 +239,13 @@ static void nfsd_net_free(struct percpu_ref *ref) int nfsd_nrthreads(struct net *net) { - int rv = 0; + int i, rv = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) - rv = nn->nfsd_serv->sv_nrthreads; + for (i = 0; i < nn->nfsd_serv->sv_nrpools; ++i) + rv += nn->nfsd_serv->sv_pools[i].sp_nrthrmax; mutex_unlock(&nfsd_mutex); return rv; } @@ -659,7 +660,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) if (serv) for (i = 0; i < serv->sv_nrpools && i < n; i++) - nthreads[i] = serv->sv_pools[i].sp_nrthreads; + nthreads[i] = serv->sv_pools[i].sp_nrthrmax; return 0; } From 297318a1c26dabb5a2d8540fdf436c22094eb2d7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Feb 2026 12:52:18 +0100 Subject: [PATCH 267/828] spi: dt-bindings: snps,dw-abp-ssi: Remove unused bindings As stated in the da0a672268b3 ("spi: dw: Remove not-going-to-be-supported code for Baikal SoC") the Baikal platforms are not supported and the respective driver code was removed. Remove the currently unused bindings. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20260224115218.3499222-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- .../bindings/spi/snps,dw-apb-ssi.yaml | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index 81838577cf9c..8ebebcebca16 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -22,21 +22,6 @@ allOf: properties: reg: minItems: 2 - - if: - properties: - compatible: - contains: - enum: - - baikal,bt1-sys-ssi - then: - properties: - mux-controls: - maxItems: 1 - required: - - mux-controls - else: - required: - - interrupts - if: properties: compatible: @@ -75,10 +60,6 @@ properties: const: intel,mountevans-imc-ssi - description: AMD Pensando Elba SoC SPI Controller const: amd,pensando-elba-spi - - description: Baikal-T1 SPI Controller - const: baikal,bt1-ssi - - description: Baikal-T1 System Boot SPI Controller - const: baikal,bt1-sys-ssi - description: Canaan Kendryte K210 SoS SPI Controller const: canaan,k210-spi - description: Renesas RZ/N1 SPI Controller @@ -170,6 +151,7 @@ required: - "#address-cells" - "#size-cells" - clocks + - interrupts examples: - | @@ -190,15 +172,4 @@ examples: rx-sample-delay-ns = <7>; }; }; - - | - spi@1f040100 { - compatible = "baikal,bt1-sys-ssi"; - reg = <0x1f040100 0x900>, - <0x1c000000 0x1000000>; - #address-cells = <1>; - #size-cells = <0>; - mux-controls = <&boot_mux>; - clocks = <&ccu_sys>; - clock-names = "ssi_clk"; - }; ... From 07ed4f05bbfd2bc014974dcc4297fd3aa1cb88c0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 14:00:14 -0800 Subject: [PATCH 268/828] hwmon: (it87) Check the it87_lock() return value Return early in it87_resume() if it87_lock() fails instead of ignoring the return value of that function. This patch suppresses a Clang thread-safety warning. Cc: Frank Crawford Cc: Guenter Roeck Cc: Jean Delvare Cc: linux-hwmon@vger.kernel.org Fixes: 376e1a937b30 ("hwmon: (it87) Add calls to smbus_enable/smbus_disable as required") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20260223220102.2158611-15-bart.vanassche@linux.dev [groeck: Declare 'ret' at the beginning of it87_resume()] Signed-off-by: Guenter Roeck --- drivers/hwmon/it87.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index e233aafa8856..5cfb98a0512f 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -3590,10 +3590,13 @@ static int it87_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct it87_data *data = dev_get_drvdata(dev); + int err; it87_resume_sio(pdev); - it87_lock(data); + err = it87_lock(data); + if (err) + return err; it87_check_pwm(dev); it87_check_limit_regs(data); From 96a1fd0d84b17360840f344826897fa71049870e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 12 Feb 2026 14:50:38 -0700 Subject: [PATCH 269/828] cxl: Fix race of nvdimm_bus object when creating nvdimm objects Found issue during running of cxl-translate.sh unit test. Adding a 3s sleep right before the test seems to make the issue reproduce fairly consistently. The cxl_translate module has dependency on cxl_acpi and causes orphaned nvdimm objects to reprobe after cxl_acpi is removed. The nvdimm_bus object is registered by the cxl_nvb object when cxl_acpi_probe() is called. With the nvdimm_bus object missing, __nd_device_register() will trigger NULL pointer dereference when accessing the dev->parent that points to &nvdimm_bus->dev. [ 192.884510] BUG: kernel NULL pointer dereference, address: 000000000000006c [ 192.895383] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20250812-19.fc42 08/12/2025 [ 192.897721] Workqueue: cxl_port cxl_bus_rescan_queue [cxl_core] [ 192.899459] RIP: 0010:kobject_get+0xc/0x90 [ 192.924871] Call Trace: [ 192.925959] [ 192.926976] ? pm_runtime_init+0xb9/0xe0 [ 192.929712] __nd_device_register.part.0+0x4d/0xc0 [libnvdimm] [ 192.933314] __nvdimm_create+0x206/0x290 [libnvdimm] [ 192.936662] cxl_nvdimm_probe+0x119/0x1d0 [cxl_pmem] [ 192.940245] cxl_bus_probe+0x1a/0x60 [cxl_core] [ 192.943349] really_probe+0xde/0x380 This patch also relies on the previous change where devm_cxl_add_nvdimm_bridge() is called from drivers/cxl/pmem.c instead of drivers/cxl/core.c to ensure the dependency of cxl_acpi on cxl_pmem. 1. Set probe_type of cxl_nvb to PROBE_FORCE_SYNCHRONOUS to ensure the driver is probed synchronously when add_device() is called. 2. Add a check in __devm_cxl_add_nvdimm_bridge() to ensure that the cxl_nvb driver is attached during cxl_acpi_probe(). 3. Take the cxl_root uport_dev lock and the cxl_nvb->dev lock in devm_cxl_add_nvdimm() before checking nvdimm_bus is valid. 4. Set cxl_nvdimm flag to CXL_NVD_F_INVALIDATED so cxl_nvdimm_probe() will exit with -EBUSY. The removal of cxl_nvdimm devices should prevent any orphaned devices from probing once the nvdimm_bus is gone. [ dj: Fixed 0-day reported kdoc issue. ] [ dj: Fix cxl_nvb reference leak on error. Gregory (kreview-0811365) ] Suggested-by: Dan Williams Fixes: 8fdcb1704f61 ("cxl/pmem: Add initial infrastructure for pmem support") Tested-by: Alison Schofield Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20260205001633.1813643-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pmem.c | 29 +++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 5 +++++ drivers/cxl/pmem.c | 10 ++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b652e3486038..68462e38a977 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -115,6 +115,15 @@ static void unregister_nvb(void *_cxl_nvb) device_unregister(&cxl_nvb->dev); } +static bool cxl_nvdimm_bridge_failed_attach(struct cxl_nvdimm_bridge *cxl_nvb) +{ + struct device *dev = &cxl_nvb->dev; + + guard(device)(dev); + /* If the device has no driver, then it failed to attach. */ + return dev->driver == NULL; +} + struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port) { @@ -138,6 +147,11 @@ struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, if (rc) goto err; + if (cxl_nvdimm_bridge_failed_attach(cxl_nvb)) { + unregister_nvb(cxl_nvb); + return ERR_PTR(-ENODEV); + } + rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb); if (rc) return ERR_PTR(rc); @@ -248,6 +262,21 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, if (!cxl_nvb) return -ENODEV; + /* + * Take the uport_dev lock to guard against race of nvdimm_bus object. + * cxl_acpi_probe() registers the nvdimm_bus and is done under the + * root port uport_dev lock. + * + * Take the cxl_nvb device lock to ensure that cxl_nvb driver is in a + * consistent state. And the driver registers nvdimm_bus. + */ + guard(device)(cxl_nvb->port->uport_dev); + guard(device)(&cxl_nvb->dev); + if (!cxl_nvb->nvdimm_bus) { + rc = -ENODEV; + goto err_alloc; + } + cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd); if (IS_ERR(cxl_nvd)) { rc = PTR_ERR(cxl_nvd); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f5850800f400..9b947286eb9b 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -574,11 +574,16 @@ struct cxl_nvdimm_bridge { #define CXL_DEV_ID_LEN 19 +enum { + CXL_NVD_F_INVALIDATED = 0, +}; + struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ u64 dirty_shutdowns; + unsigned long flags; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index c67b30b516bf..082ec0f1c3a0 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -14,7 +14,7 @@ static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); /** - * __devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology + * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology * @host: platform firmware root device * @port: CXL port at the root of a CXL topology * @@ -143,6 +143,9 @@ static int cxl_nvdimm_probe(struct device *dev) struct nvdimm *nvdimm; int rc; + if (test_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags)) + return -EBUSY; + set_exclusive_cxl_commands(mds, exclusive_cmds); rc = devm_add_action_or_reset(dev, clear_exclusive, mds); if (rc) @@ -323,8 +326,10 @@ static int detach_nvdimm(struct device *dev, void *data) scoped_guard(device, dev) { if (dev->driver) { cxl_nvd = to_cxl_nvdimm(dev); - if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) + if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) { release = true; + set_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags); + } } } if (release) @@ -367,6 +372,7 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = { .probe = cxl_nvdimm_bridge_probe, .id = CXL_DEVICE_NVDIMM_BRIDGE, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .suppress_bind_attrs = true, }, }; From 60b5d1f68338aff2c5af0113f04aefa7169c50c2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 19 Feb 2026 16:16:17 -0800 Subject: [PATCH 270/828] cxl/mbox: validate payload size before accessing contents in cxl_payload_from_user_allowed() cxl_payload_from_user_allowed() casts and dereferences the input payload without first verifying its size. When a raw mailbox command is sent with an undersized payload (ie: 1 byte for CXL_MBOX_OP_CLEAR_LOG, which expects a 16-byte UUID), uuid_equal() reads past the allocated buffer, triggering a KASAN splat: BUG: KASAN: slab-out-of-bounds in memcmp+0x176/0x1d0 lib/string.c:683 Read of size 8 at addr ffff88810130f5c0 by task syz.1.62/2258 CPU: 2 UID: 0 PID: 2258 Comm: syz.1.62 Not tainted 6.19.0-dirty #3 PREEMPT(voluntary) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xab/0xe0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xce/0x650 mm/kasan/report.c:482 kasan_report+0xce/0x100 mm/kasan/report.c:595 memcmp+0x176/0x1d0 lib/string.c:683 uuid_equal include/linux/uuid.h:73 [inline] cxl_payload_from_user_allowed drivers/cxl/core/mbox.c:345 [inline] cxl_mbox_cmd_ctor drivers/cxl/core/mbox.c:368 [inline] cxl_validate_cmd_from_user drivers/cxl/core/mbox.c:522 [inline] cxl_send_cmd+0x9c0/0xb50 drivers/cxl/core/mbox.c:643 __cxl_memdev_ioctl drivers/cxl/core/memdev.c:698 [inline] cxl_memdev_ioctl+0x14f/0x190 drivers/cxl/core/memdev.c:713 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl fs/ioctl.c:583 [inline] __x64_sys_ioctl+0x18e/0x210 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xa8/0x330 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fdaf331ba79 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fdaf1d77038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fdaf3585fa0 RCX: 00007fdaf331ba79 RDX: 00002000000001c0 RSI: 00000000c030ce02 RDI: 0000000000000003 RBP: 00007fdaf33749df R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fdaf3586038 R14: 00007fdaf3585fa0 R15: 00007ffced2af768 Add 'in_size' parameter to cxl_payload_from_user_allowed() and validate the payload is large enough. Fixes: 6179045ccc0c ("cxl/mbox: Block immediate mode in SET_PARTITION_INFO command") Fixes: 206f9fa9d555 ("cxl/mbox: Add Clear Log mailbox command") Signed-off-by: Davidlohr Bueso Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260220001618.963490-2-dave@stgolabs.net Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656..e7a6452bf544 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -311,6 +311,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * cxl_payload_from_user_allowed() - Check contents of in_payload. * @opcode: The mailbox command opcode. * @payload_in: Pointer to the input payload passed in from user space. + * @in_size: Size of @payload_in in bytes. * * Return: * * true - payload_in passes check for @opcode. @@ -325,12 +326,15 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * * The specific checks are determined by the opcode. */ -static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in, + size_t in_size) { switch (opcode) { case CXL_MBOX_OP_SET_PARTITION_INFO: { struct cxl_mbox_set_partition_info *pi = payload_in; + if (in_size < sizeof(*pi)) + return false; if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) return false; break; @@ -338,6 +342,8 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) case CXL_MBOX_OP_CLEAR_LOG: { const uuid_t *uuid = (uuid_t *)payload_in; + if (in_size < sizeof(uuid_t)) + return false; /* * Restrict the ‘Clear log’ action to only apply to * Vendor debug logs. @@ -365,7 +371,8 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd, if (IS_ERR(mbox_cmd->payload_in)) return PTR_ERR(mbox_cmd->payload_in); - if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) { + if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in, + in_size)) { dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); kvfree(mbox_cmd->payload_in); From 0a70b7cd397e545e926c93715ff6366b67c716f6 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 23 Feb 2026 11:13:39 -0800 Subject: [PATCH 271/828] cxl: Test CXL_DECODER_F_LOCK as a bitmask The CXL decoder flags are defined as bitmasks, not bit indices. Using test_bit() to check them interprets the mask value as a bit index, which is the wrong test. For CXL_DECODER_F_LOCK the test reads beyond the defined bits, causing the test to always return false and allowing resets that should have been blocked. Replace test_bit() with a bitmask check. Fixes: 2230c4bdc412 ("cxl: Add handling of locked CXL decoder") Signed-off-by: Alison Schofield Reviewed-by: Gregory Price Tested-by: Gregory Price Link: https://patch.msgid.link/98851c4770e4631753cf9f75b58a3a6daeca2ea2.1771873256.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 2 +- drivers/cxl/core/region.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e3f0c39e6812..c222e98ae736 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -904,7 +904,7 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) return; - if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + if (cxld->flags & CXL_DECODER_F_LOCK) return; if (port->commit_end == id) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index fec37af1dfbf..780ec947ecf2 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1100,7 +1100,7 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr, static void cxl_region_setup_flags(struct cxl_region *cxlr, struct cxl_decoder *cxld) { - if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) { + if (cxld->flags & CXL_DECODER_F_LOCK) { set_bit(CXL_REGION_F_LOCK, &cxlr->flags); clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } From e46f25f5a81f6f1a9ab93bcda80d5dfaea9f4897 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 23 Feb 2026 11:13:40 -0800 Subject: [PATCH 272/828] cxl/region: Test CXL_DECODER_F_NORMALIZED_ADDRESSING as a bitmask The CXL decoder flags are defined as bitmasks, not bit indices. Using test_bit() to check them interprets the mask value as a bit index, which is the wrong test. For CXL_DECODER_F_NORMALIZED_ADDRESSING the test reads beyond the flags word, making the flag sometimes appear set and blocking creation of CXL region debugfs attributes that support poison operations. Replace test_bit() with a bitmask check. Found with cxl-test. Fixes: 208f432406b7 ("cxl: Disable HPA/SPA translation handlers for Normalized Addressing") Signed-off-by: Alison Schofield Reviewed-by: Gregory Price Tested-by: Gregory Price Link: https://patch.msgid.link/63fe4a6203e40e404347f1cdc7a1c55cb4959b86.1771873256.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 780ec947ecf2..42874948b589 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1105,7 +1105,7 @@ static void cxl_region_setup_flags(struct cxl_region *cxlr, clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } - if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags)) + if (cxld->flags & CXL_DECODER_F_NORMALIZED_ADDRESSING) set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags); } From 43277d8f57b490550d33f6f4cb73c28ec6024696 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:19 -0800 Subject: [PATCH 273/828] selftests/bpf: Replace strncpy() with strscpy() strncpy() does not guarantee NULL-termination and is considered deprecated [1]. Replace strncpy() calls with strscpy(). [1] https://docs.kernel.org/process/deprecated.html#strncpy-on-nul-terminated-strings Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-4-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.c | 3 +-- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 3 +-- tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/queue_stack_map.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c | 2 +- tools/testing/selftests/bpf/prog_tests/task_local_data.h | 2 +- tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 2 +- tools/testing/selftests/bpf/test_progs.c | 2 +- tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 ++-- 9 files changed, 12 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 5374b7e16d53..b82f572641b7 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -581,8 +581,7 @@ int open_tuntap(const char *dev_name, bool need_mac) return -1; ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + strscpy(ifr.ifr_name, dev_name); err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 5225d69bf79b..c69080ca14f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -346,8 +346,7 @@ static void test_task_sleepable(void) close(finish_pipe[1]); test_data = malloc(sizeof(char) * 10); - strncpy(test_data, "test_data", 10); - test_data[9] = '\0'; + strscpy(test_data, "test_data", 10); test_data_long = malloc(sizeof(char) * 5000); for (int i = 0; i < 5000; ++i) { diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 08bae13248c4..fb4892681464 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -570,7 +570,7 @@ static int create_tap(const char *ifname) }; int fd, ret; - strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, ifname); fd = open("/dev/net/tun", O_RDWR); if (fd < 0) @@ -599,7 +599,7 @@ static int ifup(const char *ifname) struct ifreq ifr = {}; int sk, ret; - strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, ifname); sk = socket(PF_INET, SOCK_DGRAM, 0); if (sk < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index a043af9cd6d9..41441325e179 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -28,9 +28,9 @@ static void test_queue_stack_map_by_type(int type) vals[i] = rand(); if (type == QUEUE) - strncpy(file, "./test_queue_map.bpf.o", sizeof(file)); + strscpy(file, "./test_queue_map.bpf.o"); else if (type == STACK) - strncpy(file, "./test_stack_map.bpf.o", sizeof(file)); + strscpy(file, "./test_stack_map.bpf.o"); else return; diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c index 3eefdfed1db9..657d897958b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c +++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c @@ -34,7 +34,7 @@ void test_skc_to_unix_sock(void) memset(&sockaddr, 0, sizeof(sockaddr)); sockaddr.sun_family = AF_UNIX; - strncpy(sockaddr.sun_path, sock_path, strlen(sock_path)); + strscpy(sockaddr.sun_path, sock_path); sockaddr.sun_path[0] = '\0'; err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)); diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h index 0f86b9275cf9..8342e2fe5260 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h @@ -262,7 +262,7 @@ retry: if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) goto retry; - strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); + strscpy(tld_meta_p->metadata[i].name, name); atomic_store(&tld_meta_p->metadata[i].size, size); return (tld_key_t){(__s16)off}; } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 76d72a59365e..64fbda082309 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -1095,7 +1095,7 @@ static int tun_open(char *name) ifr.ifr_flags = IFF_TUN | IFF_NO_PI; if (*name) - strncpy(ifr.ifr_name, name, IFNAMSIZ); + strscpy(ifr.ifr_name, name); err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl TUNSETIFF")) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 02a85dda30e6..d1418ec1f351 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1799,7 +1799,7 @@ static int worker_main_send_subtests(int sock, struct test_state *state) msg.subtest_done.num = i; - strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME); + strscpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME); msg.subtest_done.error_cnt = subtest_state->error_cnt; msg.subtest_done.skipped = subtest_state->skipped; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 3d8de0d4c96a..6db3b5555a22 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -550,7 +550,7 @@ static int rxq_num(const char *ifname) struct ifreq ifr = { .ifr_data = (void *)&ch, }; - strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); + strscpy(ifr.ifr_name, ifname); int fd, ret; fd = socket(AF_UNIX, SOCK_DGRAM, 0); @@ -571,7 +571,7 @@ static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *c struct ifreq ifr = { .ifr_data = (void *)cfg, }; - strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); + strscpy(ifr.ifr_name, ifname); int fd, ret; fd = socket(AF_UNIX, SOCK_DGRAM, 0); From 3ed0bc2d4994c718fb5476b45d4aafec42a1d040 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:20 -0800 Subject: [PATCH 274/828] selftests/bpf: Use strscpy in bpftool_helpers.c Replace strncpy() calls in bpftool_helpers.c with strscpy(). Pass the destination buffer size to detect_bpftool_path() instead of hardcoding BPFTOOL_PATH_MAX_LEN. Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-5-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpftool_helpers.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c index a5824945a4a5..595a636fa13b 100644 --- a/tools/testing/selftests/bpf/bpftool_helpers.c +++ b/tools/testing/selftests/bpf/bpftool_helpers.c @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "bpftool_helpers.h" #include #include #include +#include "bpf_util.h" +#include "bpftool_helpers.h" + #define BPFTOOL_PATH_MAX_LEN 64 #define BPFTOOL_FULL_CMD_MAX_LEN 512 #define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool" -static int detect_bpftool_path(char *buffer) +static int detect_bpftool_path(char *buffer, size_t size) { char tmp[BPFTOOL_PATH_MAX_LEN]; @@ -18,7 +20,7 @@ static int detect_bpftool_path(char *buffer) */ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH); if (access(tmp, X_OK) == 0) { - strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN); + strscpy(buffer, tmp, size); return 0; } @@ -27,7 +29,7 @@ static int detect_bpftool_path(char *buffer) */ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH); if (access(tmp, X_OK) == 0) { - strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN); + strscpy(buffer, tmp, size); return 0; } @@ -44,7 +46,7 @@ static int run_command(char *args, char *output_buf, size_t output_max_len) int ret; /* Detect and cache bpftool binary location */ - if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path)) + if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path, sizeof(bpftool_path))) return 1; ret = snprintf(command, BPFTOOL_FULL_CMD_MAX_LEN, "%s %s%s", From 9d8685239e85ba5a4899b5b3534c732e3ae5f2aa Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:21 -0800 Subject: [PATCH 275/828] selftests/bpf: Use memcpy() for bounded non-NULL-terminated copies Replace strncpy() with memcpy() in cases where the source is non-NULL-terminated and the copy length is known. Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-6-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c | 6 ++++-- tools/testing/selftests/bpf/test_verifier.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index dd75ccb03770..469e92869523 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -308,8 +308,10 @@ static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches return -1; } - strncpy(type_str, type, type_sz); - strncpy(field_str, field, field_sz); + memcpy(type_str, type, type_sz); + type_str[type_sz] = '\0'; + memcpy(field_str, field, field_sz); + field_str[field_sz] = '\0'; btf_id = btf__find_by_name(btf, type_str); if (btf_id < 0) { PRINT_FAIL("No BTF info for type %s\n", type_str); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 27db34ecf3f5..a8ae03c57bba 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1320,7 +1320,7 @@ static bool cmp_str_seq(const char *log, const char *exp) printf("FAIL\nTestcase bug\n"); return false; } - strncpy(needle, exp, len); + memcpy(needle, exp, len); needle[len] = 0; q = strstr(log, needle); if (!q) { From 4021848a903e65f74cf88997c12b96d6bc2453e6 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:22 -0800 Subject: [PATCH 276/828] selftests/bpf: Pass through build flags to bpftool and resolve_btfids EXTRA_* and SAN_* build flags were not correctly propagated to bpftool and resolve_btids when building selftests/bpf. This led to various build errors on attempt to build with SAN_CFLAGS="-fsanitize=address", for example. Fix the makefiles to address this: - Pass SAN_CFLAGS/SAN_LDFLAGS to bpftool and resolve_btfids build - Propagate EXTRA_LDFLAGS to resolve_btfids link command - Use pkg-config to detect zlib and zstd for resolve_btfids, similar libelf handling Also check for ASAN flag in selftests/bpf/Makefile for convenience. Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-7-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/bpf/resolve_btfids/Makefile | 7 +++++-- tools/testing/selftests/bpf/Makefile | 13 +++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 1733a6e93a07..ef083602b73a 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -65,6 +65,9 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) +ZLIB_LIBS := $(shell $(HOSTPKG_CONFIG) zlib --libs 2>/dev/null || echo -lz) +ZSTD_LIBS := $(shell $(HOSTPKG_CONFIG) libzstd --libs 2>/dev/null || echo -lzstd) + HOSTCFLAGS_resolve_btfids += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ @@ -73,7 +76,7 @@ HOSTCFLAGS_resolve_btfids += -g \ $(LIBELF_FLAGS) \ -Wall -Werror -LIBS = $(LIBELF_LIBS) -lz +LIBS = $(LIBELF_LIBS) $(ZLIB_LIBS) $(ZSTD_LIBS) export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include @@ -83,7 +86,7 @@ $(BINARY_IN): fixdep FORCE prepare | $(OUTPUT) $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) $(call msg,LINK,$@) - $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) + $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) $(EXTRA_LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6776158f1f3e..72a9ba41f95e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -27,7 +27,11 @@ ifneq ($(wildcard $(GENHDR)),) endif BPF_GCC ?= $(shell command -v bpf-gcc;) +ifdef ASAN +SAN_CFLAGS ?= -fsanitize=address -fno-omit-frame-pointer +else SAN_CFLAGS ?= +endif SAN_LDFLAGS ?= $(SAN_CFLAGS) RELEASE ?= OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) @@ -326,8 +330,8 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ - EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -338,8 +342,8 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ - EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -404,6 +408,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by From c5c1e313493cd836863bb673bb2b8beaa915cad9 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:23 -0800 Subject: [PATCH 277/828] resolve_btfids: Fix memory leaks reported by ASAN Running resolve_btfids with ASAN reveals memory leaks in btf_id handling. - Change get_id() to use a local buffer - Make btf_id__add() strdup the name internally - Add btf_id__free_all() that frees all nodese of a tree - Call the cleanup function on exit for every tree Acked-by: Jiri Olsa Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-8-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/bpf/resolve_btfids/main.c | 81 ++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index ca7fcd03efb6..5208f650080f 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -226,7 +226,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name) } static struct btf_id *__btf_id__add(struct rb_root *root, - char *name, + const char *name, enum btf_id_kind kind, bool unique) { @@ -250,7 +250,11 @@ static struct btf_id *__btf_id__add(struct rb_root *root, id = zalloc(sizeof(*id)); if (id) { pr_debug("adding symbol %s\n", name); - id->name = name; + id->name = strdup(name); + if (!id->name) { + free(id); + return NULL; + } id->kind = kind; rb_link_node(&id->rb_node, parent, p); rb_insert_color(&id->rb_node, root); @@ -258,17 +262,21 @@ static struct btf_id *__btf_id__add(struct rb_root *root, return id; } -static inline struct btf_id *btf_id__add(struct rb_root *root, char *name, enum btf_id_kind kind) +static inline struct btf_id *btf_id__add(struct rb_root *root, + const char *name, + enum btf_id_kind kind) { return __btf_id__add(root, name, kind, false); } -static inline struct btf_id *btf_id__add_unique(struct rb_root *root, char *name, enum btf_id_kind kind) +static inline struct btf_id *btf_id__add_unique(struct rb_root *root, + const char *name, + enum btf_id_kind kind) { return __btf_id__add(root, name, kind, true); } -static char *get_id(const char *prefix_end) +static int get_id(const char *prefix_end, char *buf, size_t buf_sz) { /* * __BTF_ID__func__vfs_truncate__0 @@ -277,28 +285,28 @@ static char *get_id(const char *prefix_end) */ int len = strlen(prefix_end); int pos = sizeof("__") - 1; - char *p, *id; + char *p; if (pos >= len) - return NULL; + return -1; - id = strdup(prefix_end + pos); - if (id) { - /* - * __BTF_ID__func__vfs_truncate__0 - * id = ^ - * - * cut the unique id part - */ - p = strrchr(id, '_'); - p--; - if (*p != '_') { - free(id); - return NULL; - } - *p = '\0'; - } - return id; + if (len - pos >= buf_sz) + return -1; + + strcpy(buf, prefix_end + pos); + /* + * __BTF_ID__func__vfs_truncate__0 + * buf = ^ + * + * cut the unique id part + */ + p = strrchr(buf, '_'); + p--; + if (*p != '_') + return -1; + *p = '\0'; + + return 0; } static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind kind) @@ -335,10 +343,9 @@ static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind k static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) { - char *id; + char id[KSYM_NAME_LEN]; - id = get_id(name + size); - if (!id) { + if (get_id(name + size, id, sizeof(id))) { pr_err("FAILED to parse symbol name: %s\n", name); return NULL; } @@ -346,6 +353,21 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, BTF_ID_KIND_SYM); } +static void btf_id__free_all(struct rb_root *root) +{ + struct rb_node *next; + struct btf_id *id; + + next = rb_first(root); + while (next) { + id = rb_entry(next, struct btf_id, rb_node); + next = rb_next(&id->rb_node); + rb_erase(&id->rb_node, root); + free(id->name); + free(id); + } +} + static void bswap_32_data(void *data, u32 nr_bytes) { u32 cnt, i; @@ -1547,6 +1569,11 @@ dump_btf: out: btf__free(obj.base_btf); btf__free(obj.btf); + btf_id__free_all(&obj.structs); + btf_id__free_all(&obj.unions); + btf_id__free_all(&obj.typedefs); + btf_id__free_all(&obj.funcs); + btf_id__free_all(&obj.sets); if (obj.efile.elf) { elf_end(obj.efile.elf); close(obj.efile.fd); From 45897ced3c1d3940fb5b68fe48c9e144143ae0ae Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:24 -0800 Subject: [PATCH 278/828] selftests/bpf: Add DENYLIST.asan Add a denylist file for tests that should be skipped when built with userspace ASAN: $ make ... SAN_CFLAGS="-fsanitize=address -fno-omit-frame-pointer" Skip the following tests: - *arena*: userspace ASAN does not understand BPF arena maps and gets confused particularly when map_extra is non-zero - non-zero map_extra leads to mmap with MAP_FIXED, and ASAN treats this as an unknown memory region - task_local_data: ASAN complains about "incorrect" aligned_alloc() usage, but it's intentional in the test - uprobe_multi_test: very slow with ASAN enabled Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-9-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.asan | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/testing/selftests/bpf/DENYLIST.asan diff --git a/tools/testing/selftests/bpf/DENYLIST.asan b/tools/testing/selftests/bpf/DENYLIST.asan new file mode 100644 index 000000000000..d7fe372a2293 --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.asan @@ -0,0 +1,3 @@ +*arena* +task_local_data +uprobe_multi_test From a1a771bd649212ef32cf9b0bcc63213a762d354a Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:25 -0800 Subject: [PATCH 279/828] selftests/bpf: Refactor bpf_get_ksyms() trace helper ASAN reported a memory leak in bpf_get_ksyms(): it allocates a struct ksyms internally and never frees it. Move struct ksyms to trace_helpers.h and return it from the bpf_get_ksyms(), giving ownership to the caller. Add filtered_syms and filtered_cnt fields to the ksyms to hold the filtered array of symbols, previously returned by bpf_get_ksyms(). Fixup the call sites: kprobe_multi_test and bench_trigger. Signed-off-by: Ihor Solodrai Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260223190736.649171-10-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/benchs/bench_trigger.c | 14 ++++++----- .../bpf/prog_tests/kprobe_multi_test.c | 12 ++++------ tools/testing/selftests/bpf/trace_helpers.c | 23 ++++++++++--------- tools/testing/selftests/bpf/trace_helpers.h | 11 +++++++-- 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index aeec9edd3851..f74b313d6ae4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -230,8 +230,8 @@ static void trigger_fentry_setup(void) static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); - char **syms = NULL; - size_t cnt = 0; + struct bpf_link *link = NULL; + struct ksyms *ksyms = NULL; /* Some recursive functions will be skipped in * bpf_get_ksyms -> skip_entry, as they can introduce sufficient @@ -241,16 +241,18 @@ static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) * So, don't run the kprobe-multi-all and kretprobe-multi-all on * a debug kernel. */ - if (bpf_get_ksyms(&syms, &cnt, true)) { + if (bpf_get_ksyms(&ksyms, true)) { fprintf(stderr, "failed to get ksyms\n"); exit(1); } - opts.syms = (const char **) syms; - opts.cnt = cnt; + opts.syms = (const char **)ksyms->filtered_syms; + opts.cnt = ksyms->filtered_cnt; opts.retprobe = kretprobe; /* attach empty to all the kernel functions except bpf_get_numa_node_id. */ - if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) { + link = bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts); + free_kallsyms_local(ksyms); + if (!link) { fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 9caef222e528..f81dcd609ee9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -456,25 +456,23 @@ static void test_kprobe_multi_bench_attach(bool kernel) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; - char **syms = NULL; - size_t cnt = 0; + struct ksyms *ksyms = NULL; - if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms")) + if (!ASSERT_OK(bpf_get_ksyms(&ksyms, kernel), "bpf_get_ksyms")) return; skel = kprobe_multi_empty__open_and_load(); if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) goto cleanup; - opts.syms = (const char **) syms; - opts.cnt = cnt; + opts.syms = (const char **)ksyms->filtered_syms; + opts.cnt = ksyms->filtered_cnt; do_bench_test(skel, &opts); cleanup: kprobe_multi_empty__destroy(skel); - if (syms) - free(syms); + free_kallsyms_local(ksyms); } static void test_kprobe_multi_bench_attach_addr(bool kernel) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index eeaab7013ca2..0e63daf83ed5 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -24,12 +24,6 @@ #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" -struct ksyms { - struct ksym *syms; - size_t sym_cap; - size_t sym_cnt; -}; - static struct ksyms *ksyms; static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -54,6 +48,8 @@ void free_kallsyms_local(struct ksyms *ksyms) if (!ksyms) return; + free(ksyms->filtered_syms); + if (!ksyms->syms) { free(ksyms); return; @@ -610,7 +606,7 @@ static int search_kallsyms_compare(const void *p1, const struct ksym *p2) return compare_name(p1, p2->name); } -int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) +int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel) { size_t cap = 0, cnt = 0; char *name = NULL, *ksym_name, **syms = NULL; @@ -637,8 +633,10 @@ int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) else f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); - if (!f) + if (!f) { + free_kallsyms_local(ksyms); return -EINVAL; + } map = hashmap__new(symbol_hash, symbol_equal, NULL); if (IS_ERR(map)) { @@ -679,15 +677,18 @@ int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) syms[cnt++] = ksym_name; } - *symsp = syms; - *cntp = cnt; + ksyms->filtered_syms = syms; + ksyms->filtered_cnt = cnt; + *ksymsp = ksyms; error: free(name); fclose(f); hashmap__free(map); - if (err) + if (err) { free(syms); + free_kallsyms_local(ksyms); + } return err; } diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index a5576b2dfc26..d5bf1433675d 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -23,7 +23,14 @@ struct ksym { long addr; char *name; }; -struct ksyms; + +struct ksyms { + struct ksym *syms; + size_t sym_cap; + size_t sym_cnt; + char **filtered_syms; + size_t filtered_cnt; +}; typedef int (*ksym_cmp_t)(const void *p1, const void *p2); typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2); @@ -53,7 +60,7 @@ ssize_t get_rel_offset(uintptr_t addr); int read_build_id(const char *path, char *build_id, size_t size); -int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel); +int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel); int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel); #endif From 9d0272c91fbc3ae86f2c3b6ba0a0b0ee77d862e3 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:26 -0800 Subject: [PATCH 280/828] selftests/bpf: Fix memory leaks in tests Fix trivial memory leaks detected by userspace ASAN: - htab_update: free value buffer in test_reenter_update cleanup - test_xsk: inline pkt_stream_replace() in testapp_stats_rx_full() and testapp_stats_fill_empty() - testing_helpers: free buffer allocated by getline() in parse_test_list_file Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-11-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/htab_update.c | 1 + .../selftests/bpf/prog_tests/test_xsk.c | 24 +++++++++++++++---- tools/testing/selftests/bpf/testing_helpers.c | 1 + 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c index d0b405eb2966..ea1a6766fbe9 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c @@ -61,6 +61,7 @@ static void test_reenter_update(void) ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy"); out: + free(value); htab_update__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c index bab4a31621c7..7e38ec6e656b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -2003,9 +2003,17 @@ int testapp_stats_tx_invalid_descs(struct test_spec *test) int testapp_stats_rx_full(struct test_spec *test) { - if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE)) + struct pkt_stream *tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); + if (!tmp) return TEST_FAILURE; - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_tx->xsk->pkt_stream = tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = tmp; test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; @@ -2015,9 +2023,17 @@ int testapp_stats_rx_full(struct test_spec *test) int testapp_stats_fill_empty(struct test_spec *test) { - if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE)) + struct pkt_stream *tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); + if (!tmp) return TEST_FAILURE; - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_tx->xsk->pkt_stream = tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = tmp; test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 16eb37e5bad6..66af0d13751a 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -212,6 +212,7 @@ int parse_test_list_file(const char *path, break; } + free(buf); fclose(f); return err; } From 3eb4a2e399c6766ea0c8291e2adb5e6dc42b6e68 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:27 -0800 Subject: [PATCH 281/828] selftests/bpf: Fix cleanup in check_fd_array_cnt__fd_array_too_big() The Close() macro uses the passed in expression three times, which leads to repeated execution in case it has side effects. That is, Close(i--) would decrement i three times. ASAN caught a stack-buffer-undeflow error at a point where this was overlooked. Fix it. Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-12-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/fd_array.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index c534b4d5f9da..3078d8264deb 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -412,8 +412,8 @@ static void check_fd_array_cnt__fd_array_too_big(void) ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG"); cleanup_fds: - while (i > 0) - Close(extra_fds[--i]); + while (i-- > 0) + Close(extra_fds[i]); } void test_fd_array_cnt(void) From ff9511410d5fbdec21a3bbfaa477aa1f56735b33 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:28 -0800 Subject: [PATCH 282/828] veristat: Fix a memory leak for preset ENUMERATOR ASAN detected a memory leak in veristat. The cleanup code handling ENUMERATOR value missed freeing strdup-ed svalue. Fix it. Acked-by: Mykyta Yatsenko Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-13-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1be1e353d40a..75f85e0362f5 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -3378,6 +3378,8 @@ int main(int argc, char **argv) } } free(env.presets[i].atoms); + if (env.presets[i].value.type == ENUMERATOR) + free(env.presets[i].value.svalue); } free(env.presets); return -err; From f7ac552be7f13e834a942f64b1ac87e7755b32f4 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:29 -0800 Subject: [PATCH 283/828] selftests/bpf: Fix use-after-free in xdp_metadata test ASAN reported a use-after-free in close_xsk(). The xsk->socket internally references xsk->umem via socket->ctx->umem, so the socket must be deleted before the umem. Fix the order of operations in close_xsk(). Acked-by: Mykyta Yatsenko Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-14-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 19f92affc2da..5c31054ad4a4 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -126,10 +126,10 @@ static int open_xsk(int ifindex, struct xsk *xsk) static void close_xsk(struct xsk *xsk) { - if (xsk->umem) - xsk_umem__delete(xsk->umem); if (xsk->socket) xsk_socket__delete(xsk->socket); + if (xsk->umem) + xsk_umem__delete(xsk->umem); munmap(xsk->umem_area, UMEM_SIZE); } From 68b8bea1eec392aa50736a859b8f1418067a3bc4 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:30 -0800 Subject: [PATCH 284/828] selftests/bpf: Fix double thread join in uprobe_multi_test ASAN reported a "joining already joined thread" error. The release_child() may be called multiple times for the same struct child. Fix by resetting child->thread to 0 after pthread_join. Also memset(0) static child variable in test_attach_api(). Acked-by: Mykyta Yatsenko Acked-by: Jiri Olsa Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-15-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 2ee17ef1dae2..56cbea280fbd 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -62,8 +62,10 @@ static void release_child(struct child *child) return; close(child->go[1]); close(child->go[0]); - if (child->thread) + if (child->thread) { pthread_join(child->thread, NULL); + child->thread = 0; + } close(child->c2p[0]); close(child->c2p[1]); if (child->pid > 0) @@ -331,6 +333,8 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi { static struct child child; + memset(&child, 0, sizeof(child)); + /* no pid filter */ __test_attach_api(binary, pattern, opts, NULL); From 71dca2950fe9abf8e6fd3d9f5e6342da6634b898 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:31 -0800 Subject: [PATCH 285/828] selftests/bpf: Fix resource leaks caused by missing cleanups ASAN reported a number of resource leaks: - Add missing *__destroy(skel) calls - Replace bpf_link__detach() with bpf_link__destroy() where appropriate - cgrp_local_storage: Add bpf_link__destroy() when bpf_iter_create fails - dynptr: Add missing bpf_object__close() Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-16-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/cgrp_local_storage.c | 4 ++- .../testing/selftests/bpf/prog_tests/dynptr.c | 5 +++- .../selftests/bpf/prog_tests/sockmap_basic.c | 28 +++++++++---------- .../selftests/bpf/prog_tests/sockmap_listen.c | 2 +- .../bpf/prog_tests/struct_ops_private_stack.c | 4 +-- .../selftests/bpf/prog_tests/tc_opts.c | 6 ++-- .../selftests/bpf/prog_tests/test_tc_tunnel.c | 5 +++- 7 files changed, 29 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 9015e2c2ab12..478a77cb67e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -202,7 +202,7 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) iter_fd = bpf_iter_create(bpf_link__fd(link)); if (!ASSERT_GE(iter_fd, 0, "iter_create")) - goto out; + goto out_link; /* trigger the program run */ (void)read(iter_fd, buf, sizeof(buf)); @@ -210,6 +210,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); close(iter_fd); +out_link: + bpf_link__destroy(link); out: cgrp_ls_sleepable__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b9f86cb91e81..5fda11590708 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -137,11 +137,14 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ ); link = bpf_program__attach(prog); - if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) { + bpf_object__close(obj); goto cleanup; + } err = bpf_prog_test_run_opts(aux_prog_fd, &topts); bpf_link__destroy(link); + bpf_object__close(obj); if (!ASSERT_OK(err, "test_run")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 256707e7d20d..dd3c757859f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -204,7 +204,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) /* Fail since bpf_link for the same prog type has been created. */ link2 = bpf_program__attach_sockmap(prog_clone, map); if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) { - bpf_link__detach(link2); + bpf_link__destroy(link2); goto out; } @@ -230,7 +230,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) if (!ASSERT_OK(err, "bpf_link_update")) goto out; out: - bpf_link__detach(link); + bpf_link__destroy(link); test_skmsg_load_helpers__destroy(skel); } @@ -417,7 +417,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void) if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) goto out; - bpf_link__detach(link); + bpf_link__destroy(link); err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); if (!ASSERT_OK(err, "bpf_prog_attach")) @@ -426,7 +426,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void) /* Fail since attaching with the same prog/map has been done. */ link = bpf_program__attach_sockmap(prog, map); if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap")) - bpf_link__detach(link); + bpf_link__destroy(link); err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT); if (!ASSERT_OK(err, "bpf_prog_detach2")) @@ -747,13 +747,13 @@ static void test_sockmap_skb_verdict_peek_with_link(void) test_sockmap_skb_verdict_peek_helper(map); ASSERT_EQ(pass->bss->clone_called, 1, "clone_called"); out: - bpf_link__detach(link); + bpf_link__destroy(link); test_sockmap_pass_prog__destroy(pass); } static void test_sockmap_unconnected_unix(void) { - int err, map, stream = 0, dgram = 0, zero = 0; + int err, map, stream = -1, dgram = -1, zero = 0; struct test_sockmap_pass_prog *skel; skel = test_sockmap_pass_prog__open_and_load(); @@ -764,22 +764,22 @@ static void test_sockmap_unconnected_unix(void) stream = xsocket(AF_UNIX, SOCK_STREAM, 0); if (stream < 0) - return; + goto out; dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); - if (dgram < 0) { - close(stream); - return; - } + if (dgram < 0) + goto out; err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); - ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + if (!ASSERT_ERR(err, "bpf_map_update_elem(stream)")) + goto out; err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); ASSERT_OK(err, "bpf_map_update_elem(dgram)"); - +out: close(stream); close(dgram); + test_sockmap_pass_prog__destroy(skel); } static void test_sockmap_many_socket(void) @@ -1027,7 +1027,7 @@ static void test_sockmap_skb_verdict_vsock_poll(void) if (xrecv_nonblock(conn, &buf, 1, 0) != 1) FAIL("xrecv_nonblock"); detach: - bpf_link__detach(link); + bpf_link__destroy(link); close: xclose(conn); xclose(peer); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index f1bdccc7e4e7..cc0c68bab907 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -899,7 +899,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); - bpf_link__detach(link); + bpf_link__destroy(link); } static void redir_partial(int family, int sotype, int sock_map, int parser_map) diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c index 4006879ca3fe..d42123a0fb16 100644 --- a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c @@ -54,9 +54,7 @@ static void test_private_stack_fail(void) } err = struct_ops_private_stack_fail__load(skel); - if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) - goto cleanup; - return; + ASSERT_ERR(err, "struct_ops_private_stack_fail__load"); cleanup: struct_ops_private_stack_fail__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index dd7a138d8c3d..2955750ddada 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -1360,10 +1360,8 @@ static void test_tc_opts_dev_cleanup_target(int target) assert_mprog_count_ifindex(ifindex, target, 4); - ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); - ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); - ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); - return; + goto cleanup; + cleanup3: err = bpf_prog_detach_opts(fd3, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c index 0fe0a8f62486..7fc4d7dd70ef 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c @@ -699,7 +699,7 @@ void test_tc_tunnel(void) return; if (!ASSERT_OK(setup(), "global setup")) - return; + goto out; for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) { cfg = &subtests_cfg[i]; @@ -711,4 +711,7 @@ void test_tc_tunnel(void) subtest_cleanup(cfg); } cleanup(); + +out: + test_tc_tunnel__destroy(skel); } From 2bb270a0ac68990648c5e84abf73bb7493230ea6 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:32 -0800 Subject: [PATCH 286/828] selftests/bpf: Free bpf_object in test_sysctl ASAN reported a resource leak due to the bpf_object not being tracked in test_sysctl. Add obj field to struct sysctl_test to properly clean it up. Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-17-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/test_sysctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c index 273dd41ca09e..2a1ff821bc97 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -27,6 +27,7 @@ struct sysctl_test { OP_EPERM, SUCCESS, } result; + struct bpf_object *obj; }; static struct sysctl_test tests[] = { @@ -1471,6 +1472,7 @@ static int load_sysctl_prog_file(struct sysctl_test *test) return -1; } + test->obj = obj; return prog_fd; } @@ -1573,6 +1575,7 @@ out: /* Detaching w/o checking return code: best effort attempt. */ if (progfd != -1) bpf_prog_detach(cgfd, atype); + bpf_object__close(test->obj); close(progfd); printf("[%s]\n", err ? "FAIL" : "PASS"); return err; From 3e711c8e4707c46cc45d9775b50204d0f2790d77 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:07:33 -0800 Subject: [PATCH 287/828] selftests/bpf: Fix array bounds warning in jit_disasm_helpers Compiler cannot infer upper bound for labels.cnt and warns about potential buffer overflow in snprintf. Add an explicit bounds check (... && i < MAX_LOCAL_LABELS) in the loop condition to fix the warning. Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223190736.649171-18-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/jit_disasm_helpers.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c index febd6b12e372..364c557c5115 100644 --- a/tools/testing/selftests/bpf/jit_disasm_helpers.c +++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c @@ -122,15 +122,15 @@ static int disasm_one_func(FILE *text_out, uint8_t *image, __u32 len) pc += cnt; } qsort(labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32); - for (i = 0; i < labels.cnt; ++i) - /* gcc is unable to infer upper bound for labels.cnt and assumes - * it to be U32_MAX. U32_MAX takes 10 decimal digits. - * snprintf below prints into labels.names[*], - * which has space only for two digits and a letter. - * To avoid truncation warning use (i % MAX_LOCAL_LABELS), - * which informs gcc about printed value upper bound. - */ - snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i % MAX_LOCAL_LABELS); + /* gcc is unable to infer upper bound for labels.cnt and + * assumes it to be U32_MAX. U32_MAX takes 10 decimal digits. + * snprintf below prints into labels.names[*], which has space + * only for two digits and a letter. To avoid truncation + * warning use (i < MAX_LOCAL_LABELS), which informs gcc about + * printed value upper bound. + */ + for (i = 0; i < labels.cnt && i < MAX_LOCAL_LABELS; ++i) + snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i); /* now print with labels */ labels.print_phase = true; From ad90ecedad755e2f3e31364ec3130e5bb2f4b64a Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:11:16 -0800 Subject: [PATCH 288/828] selftests/bpf: Fix out-of-bounds array access bugs reported by ASAN - kmem_cache_iter: remove unnecessary debug output - lwt_seg6local: change the type of foobar to char[] - the sizeof(foobar) returned the pointer size and not a string length as intended - verifier_log: increase prog_name buffer size in verif_log_subtest() - compiler has a conservative estimate of fixed_log_sz value, making ASAN complain on snprint() call Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223191118.655185-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c | 7 ++----- tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c | 2 +- tools/testing/selftests/bpf/prog_tests/verifier_log.c | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 6e35e13c2022..399fe9103f83 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -104,11 +104,8 @@ void test_kmem_cache_iter(void) if (!ASSERT_GE(iter_fd, 0, "iter_create")) goto destroy; - memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf)) > 0) { - /* Read out all contents */ - printf("%s", buf); - } + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ /* Next reads should return 0 */ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c index 3bc730b7c7fa..1b25d5c5f8fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c @@ -117,7 +117,7 @@ void test_lwt_seg6local(void) const char *ns1 = NETNS_BASE "1"; const char *ns6 = NETNS_BASE "6"; struct nstoken *nstoken = NULL; - const char *foobar = "foobar"; + const char foobar[] = "foobar"; ssize_t bytes; int sfd, cfd; char buf[7]; diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 8337c6bc5b95..aaa2854974c0 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -47,7 +47,7 @@ static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error) static void verif_log_subtest(const char *name, bool expect_load_error, int log_level) { LIBBPF_OPTS(bpf_prog_load_opts, opts); - char *exp_log, prog_name[16], op_name[32]; + char *exp_log, prog_name[24], op_name[32]; struct test_log_buf *skel; struct bpf_program *prog; size_t fixed_log_sz; From a2714e730304c79bf85d2178387255ef8348b897 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:11:17 -0800 Subject: [PATCH 289/828] selftests/bpf: Check BPFTOOL env var in detect_bpftool_path() The bpftool_maps_access and bpftool_metadata tests may fail on BPF CI with "command not found", depending on a workflow. This happens because detect_bpftool_path() only checks two hardcoded relative paths: - ./tools/sbin/bpftool - ../tools/sbin/bpftool Add support for a BPFTOOL environment variable that allows specifying the exact path to the bpftool binary. Acked-by: Mykyta Yatsenko Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223191118.655185-2-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpftool_helpers.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c index 595a636fa13b..929fc257f431 100644 --- a/tools/testing/selftests/bpf/bpftool_helpers.c +++ b/tools/testing/selftests/bpf/bpftool_helpers.c @@ -14,6 +14,17 @@ static int detect_bpftool_path(char *buffer, size_t size) { char tmp[BPFTOOL_PATH_MAX_LEN]; + const char *env_path; + + /* First, check if BPFTOOL environment variable is set */ + env_path = getenv("BPFTOOL"); + if (env_path && access(env_path, X_OK) == 0) { + strscpy(buffer, env_path, size); + return 0; + } else if (env_path) { + fprintf(stderr, "bpftool '%s' doesn't exist or is not executable\n", env_path); + return 1; + } /* Check default bpftool location (will work if we are running the * default flavor of test_progs) @@ -33,7 +44,7 @@ static int detect_bpftool_path(char *buffer, size_t size) return 0; } - /* Failed to find bpftool binary */ + fprintf(stderr, "Failed to detect bpftool path, use BPFTOOL env var to override\n"); return 1; } From 4c9d07865c06bb9b9faff1ecf6c4b7cc8f7d67a9 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 23 Feb 2026 11:11:18 -0800 Subject: [PATCH 290/828] selftests/bpf: Don't override SIGSEGV handler with ASAN test_progs has custom SIGSEGV handler, which interferes with the address sanitizer [1]. Add an #ifndef to avoid this. Additionally, declare an __asan_on_error() to dump the test logs in the same way it happens in the custom SIGSEGV handler. [1] https://lore.kernel.org/bpf/73d832948b01dbc0ebc60d85574bdf8537f3a810.camel@gmail.com/ Acked-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Signed-off-by: Ihor Solodrai Link: https://lore.kernel.org/r/20260223191118.655185-3-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 36 +++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index d1418ec1f351..0929f4a7bda4 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1261,14 +1261,8 @@ int get_bpf_max_tramp_links(void) return ret; } -#define MAX_BACKTRACE_SZ 128 -void crash_handler(int signum) +static void dump_crash_log(void) { - void *bt[MAX_BACKTRACE_SZ]; - size_t sz; - - sz = backtrace(bt, ARRAY_SIZE(bt)); - fflush(stdout); stdout = env.stdout_saved; stderr = env.stderr_saved; @@ -1277,12 +1271,32 @@ void crash_handler(int signum) env.test_state->error_cnt++; dump_test_log(env.test, env.test_state, true, false, NULL); } +} + +#define MAX_BACKTRACE_SZ 128 + +void crash_handler(int signum) +{ + void *bt[MAX_BACKTRACE_SZ]; + size_t sz; + + sz = backtrace(bt, ARRAY_SIZE(bt)); + + dump_crash_log(); + if (env.worker_id != -1) fprintf(stderr, "[%d]: ", env.worker_id); fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +#ifdef __SANITIZE_ADDRESS__ +void __asan_on_error(void) +{ + dump_crash_log(); +} +#endif + void hexdump(const char *prefix, const void *buf, size_t len) { for (int i = 0; i < len; i++) { @@ -1944,13 +1958,15 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; + int err, i; + +#ifndef __SANITIZE_ADDRESS__ struct sigaction sigact = { .sa_handler = crash_handler, .sa_flags = SA_RESETHAND, - }; - int err, i; - + }; sigaction(SIGSEGV, &sigact, NULL); +#endif env.stdout_saved = stdout; env.stderr_saved = stderr; From 56e0a838277b12d48477b9c7478ceb5ae528ae2c Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 24 Feb 2026 15:57:53 +0800 Subject: [PATCH 291/828] MAINTAINERS: Update Shawn Guo's address for HiSilicon PCIe controller driver Shawn is no longer with Linaro. Use his korg email address instead. Signed-off-by: Shawn Guo Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260224075753.122091-1-shawnguo@kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 55af015174a5..f02f50c9d695 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20509,7 +20509,7 @@ F: Documentation/devicetree/bindings/pci/hisilicon,kirin-pcie.yaml F: drivers/pci/controller/dwc/pcie-kirin.c PCIE DRIVER FOR HISILICON STB -M: Shawn Guo +M: Shawn Guo L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt From 30df81f2228d65bddf492db3929d9fcaffd38fc5 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 23 Feb 2026 14:56:09 +0800 Subject: [PATCH 292/828] scsi: ufs: core: Fix possible NULL pointer dereference in ufshcd_add_command_trace() The kernel log indicates a crash in ufshcd_add_command_trace, due to a NULL pointer dereference when accessing hwq->id. This can happen if ufshcd_mcq_req_to_hwq() returns NULL. This patch adds a NULL check for hwq before accessing its id field to prevent a kernel crash. Kernel log excerpt: [] notify_die+0x4c/0x8c [] __die+0x60/0xb0 [] die+0x4c/0xe0 [] die_kernel_fault+0x74/0x88 [] __do_kernel_fault+0x314/0x318 [] do_page_fault+0xa4/0x5f8 [] do_translation_fault+0x34/0x54 [] do_mem_abort+0x50/0xa8 [] el1_abort+0x3c/0x64 [] el1h_64_sync_handler+0x44/0xcc [] el1h_64_sync+0x80/0x88 [] ufshcd_add_command_trace+0x23c/0x320 [] ufshcd_compl_one_cqe+0xa4/0x404 [] ufshcd_mcq_poll_cqe_lock+0xac/0x104 [] ufs_mtk_mcq_intr+0x54/0x74 [ufs_mediatek_mod] [] __handle_irq_event_percpu+0xc8/0x348 [] handle_irq_event+0x3c/0xa8 [] handle_fasteoi_irq+0xf8/0x294 [] generic_handle_domain_irq+0x54/0x80 [] gic_handle_irq+0x1d4/0x330 [] call_on_irq_stack+0x44/0x68 [] do_interrupt_handler+0x78/0xd8 [] el1_interrupt+0x48/0xa8 [] el1h_64_irq_handler+0x14/0x24 [] el1h_64_irq+0x80/0x88 [] arch_local_irq_enable+0x4/0x1c [] cpuidle_enter+0x34/0x54 [] do_idle+0x1dc/0x2f8 [] cpu_startup_entry+0x30/0x3c [] secondary_start_kernel+0x134/0x1ac [] __secondary_switched+0xc4/0xcc Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260223065657.2432447-1-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 571a73860561..4dc7fbc2a6db 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -518,8 +518,8 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, struct scsi_cmnd *cmd, if (hba->mcq_enabled) { struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq); - - hwq_id = hwq->id; + if (hwq) + hwq_id = hwq->id; } else { doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); } From 62c015373e1cdb1cdca824bd2dbce2dac0819467 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 23 Feb 2026 18:37:57 +0800 Subject: [PATCH 293/828] scsi: ufs: core: Move link recovery for hibern8 exit failure to wl_resume Move the link recovery trigger from ufshcd_uic_pwr_ctrl() to __ufshcd_wl_resume(). Ensure link recovery is only attempted when hibern8 exit fails during resume, not during hibern8 enter in suspend. Improve error handling and prevent unnecessary link recovery attempts. Fixes: 35dabf4503b9 ("scsi: ufs: core: Use link recovery when h8 exit fails during runtime resume") Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260223103906.2533654-1-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4dc7fbc2a6db..85987373b961 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4390,14 +4390,6 @@ out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); mutex_unlock(&hba->uic_cmd_mutex); - /* - * If the h8 exit fails during the runtime resume process, it becomes - * stuck and cannot be recovered through the error handler. To fix - * this, use link recovery instead of the error handler. - */ - if (ret && hba->pm_op_in_progress) - ret = ufshcd_link_recovery(hba); - return ret; } @@ -10200,7 +10192,15 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) } else { dev_err(hba->dev, "%s: hibern8 exit failed %d\n", __func__, ret); - goto vendor_suspend; + /* + * If the h8 exit fails during the runtime resume + * process, it becomes stuck and cannot be recovered + * through the error handler. To fix this, use link + * recovery instead of the error handler. + */ + ret = ufshcd_link_recovery(hba); + if (ret) + goto vendor_suspend; } } else if (ufshcd_is_link_off(hba)) { /* From 74b6e83942dcc9f3cca9e561b205a5b19940a344 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 19 Feb 2026 12:50:29 -0800 Subject: [PATCH 294/828] drm/gpusvm: Fix drm_gpusvm_pages_valid_unlocked() kernel-doc The kernel-doc for drm_gpusvm_pages_valid_unlocked() was stale and still referenced old range-based arguments and naming. Update the documentation to match the current function arguments and signature. Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20260219205029.1011336-1-matthew.brost@intel.com --- drivers/gpu/drm/drm_gpusvm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 24180bfdf5a2..9ef9e52c0547 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1338,14 +1338,14 @@ bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); /** - * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked + * drm_gpusvm_pages_valid_unlocked() - GPU SVM pages valid unlocked * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * - * This function determines if a GPU SVM range pages are valid. Expected be - * called without holding gpusvm->notifier_lock. + * This function determines if a GPU SVM pages are valid. Expected be called + * without holding gpusvm->notifier_lock. * - * Return: True if GPU SVM range has valid pages, False otherwise + * Return: True if GPU SVM pages are valid, False otherwise */ static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, struct drm_gpusvm_pages *svm_pages) From 0902010c8d163f7b62e655efda1a843529152c7c Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 18:07:59 +0800 Subject: [PATCH 295/828] regulator: fp9931: Fix PM runtime reference leak in fp9931_hwmon_read() In fp9931_hwmon_read(), if regmap_read() failed, the function returned the error code without calling pm_runtime_put_autosuspend(), causing a PM reference leak. Fixes: 12d821bd13d4 ("regulator: Add FP9931/JD9930 driver") Signed-off-by: Felix Gu Reviewed-by: Andreas Kemnade Link: https://patch.msgid.link/20260224-fp9931-v1-1-1cf05cabef4a@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/fp9931.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/fp9931.c b/drivers/regulator/fp9931.c index 7fbcc6327cc6..abea3b69d8a0 100644 --- a/drivers/regulator/fp9931.c +++ b/drivers/regulator/fp9931.c @@ -144,13 +144,12 @@ static int fp9931_hwmon_read(struct device *dev, enum hwmon_sensor_types type, return ret; ret = regmap_read(data->regmap, FP9931_REG_TMST_VALUE, &val); - if (ret) - return ret; + if (!ret) + *temp = (s8)val * 1000; pm_runtime_put_autosuspend(data->dev); - *temp = (s8)val * 1000; - return 0; + return ret; } static umode_t fp9931_hwmon_is_visible(const void *data, From 4baaddaa44af01cd4ce239493060738fd0881835 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 19:19:03 +0800 Subject: [PATCH 296/828] regulator: bq257xx: Fix device node reference leak in bq257xx_reg_dt_parse_gpio() In bq257xx_reg_dt_parse_gpio(), if fails to get subchild, it returns without calling of_node_put(child), causing the device node reference leak. Fixes: 981dd162b635 ("regulator: bq257xx: Add bq257xx boost regulator driver") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260224-bq257-v1-1-8ebbc731c1c3@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/bq257xx-regulator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/bq257xx-regulator.c b/drivers/regulator/bq257xx-regulator.c index fc1ccede4468..dab8f1ab4450 100644 --- a/drivers/regulator/bq257xx-regulator.c +++ b/drivers/regulator/bq257xx-regulator.c @@ -115,11 +115,10 @@ static void bq257xx_reg_dt_parse_gpio(struct platform_device *pdev) return; subchild = of_get_child_by_name(child, pdata->desc.of_match); + of_node_put(child); if (!subchild) return; - of_node_put(child); - pdata->otg_en_gpio = devm_fwnode_gpiod_get_index(&pdev->dev, of_fwnode_handle(subchild), "enable", 0, From bfd7db781e2e7a99b086d645a104d16e368f58ff Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 20:23:30 +0800 Subject: [PATCH 297/828] regulator: Kconfig: fix a typo Fixes a typo in Kconfig, HWWON -> HWMON Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260224-kconfig-v1-1-b0c5459ed7a0@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index a708fc63f581..d10b6f9243d5 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -508,7 +508,7 @@ config REGULATOR_FP9931 This driver supports the FP9931/JD9930 voltage regulator chip which is used to provide power to Electronic Paper Displays so it is found in E-Book readers. - If HWWON is enabled, it also provides temperature measurement. + If HWMON is enabled, it also provides temperature measurement. config REGULATOR_LM363X tristate "TI LM363X voltage regulators" From e08f2adcf990b8cf272e90898401a9e481c1c667 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 24 Feb 2026 13:36:09 +0200 Subject: [PATCH 298/828] Revert "irqchip/ls-extirq: Use for_each_of_imap_item iterator" This reverts commit 3ac6dfe3d7a2396602b67667249b146504dfbd2a. The ls-extirq uses interrupt-map but it's a non-standard use documented in fsl,ls-extirq.yaml: # The driver(drivers/irqchip/irq-ls-extirq.c) have not use standard DT # function to parser interrupt-map. So it doesn't consider '#address-size' # in parent interrupt controller, such as GIC. # # When dt-binding verify interrupt-map, item data matrix is spitted at # incorrect position. Remove interrupt-map restriction because it always # wrong. This means that by using for_each_of_imap_item and the underlying of_irq_parse_imap_parent() on its interrupt-map property will effectively break its functionality Revert the patch making use of for_each_of_imap_item() in ls-extirq. Fixes: 3ac6dfe3d7a2 ("irqchip/ls-extirq: Use for_each_of_imap_item iterator") Signed-off-by: Ioana Ciornei Signed-off-by: Thomas Gleixner Acked-by: Herve Codina Link: https://patch.msgid.link/20260224113610.1129022-2-ioana.ciornei@nxp.com --- drivers/irqchip/irq-ls-extirq.c | 47 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index a7e9c3885b09..96f9c20621cf 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -125,32 +125,45 @@ static const struct irq_domain_ops extirq_domain_ops = { static int ls_extirq_parse_map(struct ls_extirq_data *priv, struct device_node *node) { - struct of_imap_parser imap_parser; - struct of_imap_item imap_item; + const __be32 *map; + u32 mapsize; int ret; - ret = of_imap_parser_init(&imap_parser, node, &imap_item); - if (ret) - return ret; + map = of_get_property(node, "interrupt-map", &mapsize); + if (!map) + return -ENOENT; + if (mapsize % sizeof(*map)) + return -EINVAL; + mapsize /= sizeof(*map); - for_each_of_imap_item(&imap_parser, &imap_item) { + while (mapsize) { struct device_node *ipar; - u32 hwirq; - int i; + u32 hwirq, intsize, j; - hwirq = imap_item.child_imap[0]; - if (hwirq >= MAXIRQ) { - of_node_put(imap_item.parent_args.np); + if (mapsize < 3) + return -EINVAL; + hwirq = be32_to_cpup(map); + if (hwirq >= MAXIRQ) return -EINVAL; - } priv->nirq = max(priv->nirq, hwirq + 1); - ipar = of_node_get(imap_item.parent_args.np); - priv->map[hwirq].fwnode = of_fwnode_handle(ipar); + ipar = of_find_node_by_phandle(be32_to_cpup(map + 2)); + map += 3; + mapsize -= 3; + if (!ipar) + return -EINVAL; + priv->map[hwirq].fwnode = &ipar->fwnode; + ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize); + if (ret) + return ret; - priv->map[hwirq].param_count = imap_item.parent_args.args_count; - for (i = 0; i < priv->map[hwirq].param_count; i++) - priv->map[hwirq].param[i] = imap_item.parent_args.args[i]; + if (intsize > mapsize) + return -EINVAL; + + priv->map[hwirq].param_count = intsize; + for (j = 0; j < intsize; ++j) + priv->map[hwirq].param[j] = be32_to_cpup(map++); + mapsize -= intsize; } return 0; } From fe5669e363b129cde285bfb4d45abb72d1d77cfc Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 24 Feb 2026 13:36:10 +0200 Subject: [PATCH 299/828] irqchip/ls-extirq: Fix devm_of_iomap() error check The devm_of_iomap() function returns an ERR_PTR() encoded error code on failure. Replace the incorrect check against NULL with IS_ERR(). Fixes: 05cd654829dd ("irqchip/ls-extirq: Convert to a platform driver to make it work again") Reported-by: Dan Carpenter Signed-off-by: Ioana Ciornei Signed-off-by: Thomas Gleixner Reviewed-by: Herve Codina Link: https://patch.msgid.link/20260224113610.1129022-3-ioana.ciornei@nxp.com Closes: https://lore.kernel.org/all/aYXvfbfT6w0TMsXS@stanley.mountain/ --- drivers/irqchip/irq-ls-extirq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index 96f9c20621cf..d724fe843980 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -190,8 +190,10 @@ static int ls_extirq_probe(struct platform_device *pdev) return dev_err_probe(dev, -ENOMEM, "Failed to allocate memory\n"); priv->intpcr = devm_of_iomap(dev, node, 0, NULL); - if (!priv->intpcr) - return dev_err_probe(dev, -ENOMEM, "Cannot ioremap OF node %pOF\n", node); + if (IS_ERR(priv->intpcr)) { + return dev_err_probe(dev, PTR_ERR(priv->intpcr), + "Cannot ioremap OF node %pOF\n", node); + } ret = ls_extirq_parse_map(priv, node); if (ret) From 095f5693329221ffd9dfe5fcd0079263d462e441 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Tue, 17 Feb 2026 15:25:59 +0800 Subject: [PATCH 300/828] tools/sched_ext: Sync README.md Kconfig with upstream scx Sync the documentation with the upstream scx repository to reflect the current recommended configuration. Ref: https://github.com/sched-ext/scx/blob/main/README.md#build--install Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/sched_ext/README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/sched_ext/README.md b/tools/sched_ext/README.md index 56a9d1557ac4..6e282bce453c 100644 --- a/tools/sched_ext/README.md +++ b/tools/sched_ext/README.md @@ -58,14 +58,8 @@ CONFIG_SCHED_CLASS_EXT=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_DEBUG_INFO_BTF=y -``` - -It's also recommended that you also include the following Kconfig options: - -``` CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y -CONFIG_PAHOLE_HAS_BTF_TAG=y ``` There is a `Kconfig` file in this directory whose contents you can append to From ee0ff6690f2641b8f6ba8026ec17f6bc48f86649 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Tue, 17 Feb 2026 15:26:00 +0800 Subject: [PATCH 301/828] tools/sched_ext: Add Kconfig to sync with upstream Add the missing Kconfig file to tools/sched_ext/ as referenced in the README. Ref: https://github.com/sched-ext/scx/blob/main/kernel.config Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/sched_ext/Kconfig | 61 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tools/sched_ext/Kconfig diff --git a/tools/sched_ext/Kconfig b/tools/sched_ext/Kconfig new file mode 100644 index 000000000000..275bd97ae62b --- /dev/null +++ b/tools/sched_ext/Kconfig @@ -0,0 +1,61 @@ +# sched-ext mandatory options +# +CONFIG_BPF=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_SCHED_CLASS_EXT=y + +# Required by some rust schedulers (e.g. scx_p2dq) +# +CONFIG_KALLSYMS_ALL=y + +# Required on arm64 +# +# CONFIG_DEBUG_INFO_REDUCED is not set + +# LAVD tracks futex to give an additional time slice for futex holder +# (i.e., avoiding lock holder preemption) for better system-wide progress. +# LAVD first tries to use ftrace to trace futex function calls. +# If that is not available, it tries to use a tracepoint. +CONFIG_FUNCTION_TRACER=y + +# Enable scheduling debugging +# +CONFIG_SCHED_DEBUG=y + +# Enable extra scheduling features (for a better code coverage while testing +# the schedulers) +# +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_CORE=y +CONFIG_SCHED_MC=y + +# Enable fully preemptible kernel for a better test coverage of the schedulers +# +# CONFIG_PREEMPT_NONE is not set +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT=y +CONFIG_PREEMPT_DYNAMIC=y + +# Additional debugging information (useful to catch potential locking issues) +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_PROVE_LOCKING=y + +# Bpftrace headers (for additional debug info) +CONFIG_BPF_EVENTS=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_KPROBES=y +CONFIG_KPROBE_EVENTS=y +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y +CONFIG_DEBUG_FS=y + +# Enable access to kernel configuration and headers at runtime +CONFIG_IKHEADERS=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IKCONFIG=y From a46435537a844d0f7b4b620baf962cad136422de Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Feb 2026 11:36:09 -0700 Subject: [PATCH 302/828] io_uring/cmd_net: use READ_ONCE() for ->addr3 read Any SQE read should use READ_ONCE(), to ensure the result is read once and only once. Doesn't really matter for this case, but it's better to keep these 100% consistent and always use READ_ONCE() for the prep side of SQE handling. Fixes: 5d24321e4c15 ("io_uring: Introduce getsockname io_uring cmd") Signed-off-by: Jens Axboe --- io_uring/cmd_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c index 57ddaf874611..125a81c520a6 100644 --- a/io_uring/cmd_net.c +++ b/io_uring/cmd_net.c @@ -146,7 +146,7 @@ static int io_uring_cmd_getsockname(struct socket *sock, return -EINVAL; uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ulen = u64_to_user_ptr(sqe->addr3); + ulen = u64_to_user_ptr(READ_ONCE(sqe->addr3)); peer = READ_ONCE(sqe->optlen); if (peer > 1) return -EINVAL; From 09833d99db36d74456a4d13eb29c32d56ff8f2b6 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 13 Feb 2026 10:54:10 +0100 Subject: [PATCH 303/828] mm/kfence: disable KFENCE upon KASAN HW tags enablement KFENCE does not currently support KASAN hardware tags. As a result, the two features are incompatible when enabled simultaneously. Given that MTE provides deterministic protection and KFENCE is a sampling-based debugging tool, prioritize the stronger hardware protections. Disable KFENCE initialization and free the pre-allocated pool if KASAN hardware tags are detected to ensure the system maintains the security guarantees provided by MTE. Link: https://lkml.kernel.org/r/20260213095410.1862978-1-glider@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Alexander Potapenko Suggested-by: Marco Elver Reviewed-by: Marco Elver Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Ernesto Martinez Garcia Cc: Greg KH Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index b4ea3262c925..b5aedf505cec 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -916,6 +917,20 @@ void __init kfence_alloc_pool_and_metadata(void) if (!kfence_sample_interval) return; + /* + * If KASAN hardware tags are enabled, disable KFENCE, because it + * does not support MTE yet. + */ + if (kasan_hw_tags_enabled()) { + pr_info("disabled as KASAN HW tags are enabled\n"); + if (__kfence_pool) { + memblock_free(__kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = NULL; + } + kfence_sample_interval = 0; + return; + } + /* * If the pool has already been initialized by arch, there is no need to * re-allocate the memory pool. From eb9549346f7578eda3755683ac2cfb4d94c0675f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Feb 2026 13:17:44 +0100 Subject: [PATCH 304/828] mm: change vma_alloc_folio_noprof() macro to inline function In a few rare configurations with extra warnings eanbled, the new drm_pagemap_migrate_populate_ram_pfn() calls vma_alloc_folio_noprof() but that does not use all the arguments, leading to a harmless warning: drivers/gpu/drm/drm_pagemap.c: In function 'drm_pagemap_migrate_populate_ram_pfn': drivers/gpu/drm/drm_pagemap.c:701:63: error: parameter 'addr' set but not used [-Werror=unused-but-set-parameter=] 701 | unsigned long addr) | ~~~~~~~~~~~~~~^~~~ Replace the macro with an inline function so the compiler can see how the argument would be used, but is still able to optimize out the assignments. Link: https://lkml.kernel.org/r/20260216121751.2378374-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Lorenzo Stoakes Acked-by: Zi Yan Reviewed-by: Suren Baghdasaryan Cc: Alexei Starovoitov Cc: Brendan Jackman Cc: David Hildenbrand Cc: Johannes Weiner Cc: Joshua Hahn Cc: Kefeng Wang Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Shakeel Butt Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/gfp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b30a0529d48..f82d74a77cad 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -339,8 +339,11 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde { return folio_alloc_noprof(gfp, order); } -#define vma_alloc_folio_noprof(gfp, order, vma, addr) \ - folio_alloc_noprof(gfp, order) +static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, + struct vm_area_struct *vma, unsigned long addr) +{ + return folio_alloc_noprof(gfp, order); +} #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) From dd085fe9a8ebfc5d10314c60452db38d2b75e609 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sat, 14 Feb 2026 05:45:35 +0530 Subject: [PATCH 305/828] mm: thp: deny THP for files on anonymous inodes file_thp_enabled() incorrectly allows THP for files on anonymous inodes (e.g. guest_memfd and secretmem). These files are created via alloc_file_pseudo(), which does not call get_write_access() and leaves inode->i_writecount at 0. Combined with S_ISREG(inode->i_mode) being true, they appear as read-only regular files when CONFIG_READ_ONLY_THP_FOR_FS is enabled, making them eligible for THP collapse. Anonymous inodes can never pass the inode_is_open_for_write() check since their i_writecount is never incremented through the normal VFS open path. The right thing to do is to exclude them from THP eligibility altogether, since CONFIG_READ_ONLY_THP_FOR_FS was designed for real filesystem files (e.g. shared libraries), not for pseudo-filesystem inodes. For guest_memfd, this allows khugepaged and MADV_COLLAPSE to create large folios in the page cache via the collapse path, but the guest_memfd fault handler does not support large folios. This triggers WARN_ON_ONCE(folio_test_large(folio)) in kvm_gmem_fault_user_mapping(). For secretmem, collapse_file() tries to copy page contents through the direct map, but secretmem pages are removed from the direct map. This can result in a kernel crash: BUG: unable to handle page fault for address: ffff88810284d000 RIP: 0010:memcpy_orig+0x16/0x130 Call Trace: collapse_file hpage_collapse_scan_file madvise_collapse Secretmem is not affected by the crash on upstream as the memory failure recovery handles the failed copy gracefully, but it still triggers confusing false memory failure reports: Memory failure: 0x106d96f: recovery action for clean unevictable LRU page: Recovered Check IS_ANON_FILE(inode) in file_thp_enabled() to deny THP for all anonymous inode files. Link: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44 Link: https://lore.kernel.org/linux-mm/CAEvNRgHegcz3ro35ixkDw39ES8=U6rs6S7iP0gkR9enr7HoGtA@mail.gmail.com Link: https://lkml.kernel.org/r/20260214001535.435626-1-kartikey406@gmail.com Fixes: 7fbb5e188248 ("mm: remove VM_EXEC requirement for THP eligibility") Signed-off-by: Deepanshu Kartikey Reported-by: syzbot+33a04338019ac7e43a44@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44 Tested-by: syzbot+33a04338019ac7e43a44@syzkaller.appspotmail.com Tested-by: Lance Yang Acked-by: David Hildenbrand (Arm) Reviewed-by: Barry Song Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: Lorenzo Stoakes Cc: Baolin Wang Cc: Dev Jain Cc: Fangrui Song Cc: Liam Howlett Cc: Nico Pache Cc: Ryan Roberts Cc: Yang Shi Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d4ca8cfd7f9d..8e2746ea74ad 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -94,6 +94,9 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) inode = file_inode(vma->vm_file); + if (IS_ANON_FILE(inode)) + return false; + return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } From f85b1c6af5bc3872f994df0a5688c1162de07a62 Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Mon, 16 Feb 2026 14:22:19 +0100 Subject: [PATCH 306/828] liveupdate: luo_file: remember retrieve() status LUO keeps track of successful retrieve attempts on a LUO file. It does so to avoid multiple retrievals of the same file. Multiple retrievals cause problems because once the file is retrieved, the serialized data structures are likely freed and the file is likely in a very different state from what the code expects. The retrieve boolean in struct luo_file keeps track of this, and is passed to the finish callback so it knows what work was already done and what it has left to do. All this works well when retrieve succeeds. When it fails, luo_retrieve_file() returns the error immediately, without ever storing anywhere that a retrieve was attempted or what its error code was. This results in an errored LIVEUPDATE_SESSION_RETRIEVE_FD ioctl to userspace, but nothing prevents it from trying this again. The retry is problematic for much of the same reasons listed above. The file is likely in a very different state than what the retrieve logic normally expects, and it might even have freed some serialization data structures. Attempting to access them or free them again is going to break things. For example, if memfd managed to restore 8 of its 10 folios, but fails on the 9th, a subsequent retrieve attempt will try to call kho_restore_folio() on the first folio again, and that will fail with a warning since it is an invalid operation. Apart from the retry, finish() also breaks. Since on failure the retrieved bool in luo_file is never touched, the finish() call on session close will tell the file handler that retrieve was never attempted, and it will try to access or free the data structures that might not exist, much in the same way as the retry attempt. There is no sane way of attempting the retrieve again. Remember the error retrieve returned and directly return it on a retry. Also pass this status code to finish() so it can make the right decision on the work it needs to do. This is done by changing the bool to an integer. A value of 0 means retrieve was never attempted, a positive value means it succeeded, and a negative value means it failed and the error code is the value. Link: https://lkml.kernel.org/r/20260216132221.987987-1-pratyush@kernel.org Fixes: 7c722a7f44e0 ("liveupdate: luo_file: implement file systems callbacks") Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Mike Rapoport (Microsoft) Cc: Pasha Tatashin Cc: Signed-off-by: Andrew Morton --- include/linux/liveupdate.h | 9 +++++--- kernel/liveupdate/luo_file.c | 41 ++++++++++++++++++++++-------------- mm/memfd_luo.c | 7 +++++- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h index fe82a6c3005f..dd11fdc76a5f 100644 --- a/include/linux/liveupdate.h +++ b/include/linux/liveupdate.h @@ -23,8 +23,11 @@ struct file; /** * struct liveupdate_file_op_args - Arguments for file operation callbacks. * @handler: The file handler being called. - * @retrieved: The retrieve status for the 'can_finish / finish' - * operation. + * @retrieve_status: The retrieve status for the 'can_finish / finish' + * operation. A value of 0 means the retrieve has not been + * attempted, a positive value means the retrieve was + * successful, and a negative value means the retrieve failed, + * and the value is the error code of the call. * @file: The file object. For retrieve: [OUT] The callback sets * this to the new file. For other ops: [IN] The caller sets * this to the file being operated on. @@ -40,7 +43,7 @@ struct file; */ struct liveupdate_file_op_args { struct liveupdate_file_handler *handler; - bool retrieved; + int retrieve_status; struct file *file; u64 serialized_data; void *private_data; diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c index 8c79058253e1..5acee4174bf0 100644 --- a/kernel/liveupdate/luo_file.c +++ b/kernel/liveupdate/luo_file.c @@ -134,9 +134,12 @@ static LIST_HEAD(luo_file_handler_list); * state that is not preserved. Set by the handler's .preserve() * callback, and must be freed in the handler's .unpreserve() * callback. - * @retrieved: A flag indicating whether a user/kernel in the new kernel has + * @retrieve_status: Status code indicating whether a user/kernel in the new kernel has * successfully called retrieve() on this file. This prevents - * multiple retrieval attempts. + * multiple retrieval attempts. A value of 0 means a retrieve() + * has not been attempted, a positive value means the retrieve() + * was successful, and a negative value means the retrieve() + * failed, and the value is the error code of the call. * @mutex: A mutex that protects the fields of this specific instance * (e.g., @retrieved, @file), ensuring that operations like * retrieving or finishing a file are atomic. @@ -161,7 +164,7 @@ struct luo_file { struct file *file; u64 serialized_data; void *private_data; - bool retrieved; + int retrieve_status; struct mutex mutex; struct list_head list; u64 token; @@ -298,7 +301,6 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) luo_file->file = file; luo_file->fh = fh; luo_file->token = token; - luo_file->retrieved = false; mutex_init(&luo_file->mutex); args.handler = fh; @@ -577,7 +579,12 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token, return -ENOENT; guard(mutex)(&luo_file->mutex); - if (luo_file->retrieved) { + if (luo_file->retrieve_status < 0) { + /* Retrieve was attempted and it failed. Return the error code. */ + return luo_file->retrieve_status; + } + + if (luo_file->retrieve_status > 0) { /* * Someone is asking for this file again, so get a reference * for them. @@ -590,16 +597,19 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token, args.handler = luo_file->fh; args.serialized_data = luo_file->serialized_data; err = luo_file->fh->ops->retrieve(&args); - if (!err) { - luo_file->file = args.file; - - /* Get reference so we can keep this file in LUO until finish */ - get_file(luo_file->file); - *filep = luo_file->file; - luo_file->retrieved = true; + if (err) { + /* Keep the error code for later use. */ + luo_file->retrieve_status = err; + return err; } - return err; + luo_file->file = args.file; + /* Get reference so we can keep this file in LUO until finish */ + get_file(luo_file->file); + *filep = luo_file->file; + luo_file->retrieve_status = 1; + + return 0; } static int luo_file_can_finish_one(struct luo_file_set *file_set, @@ -615,7 +625,7 @@ static int luo_file_can_finish_one(struct luo_file_set *file_set, args.handler = luo_file->fh; args.file = luo_file->file; args.serialized_data = luo_file->serialized_data; - args.retrieved = luo_file->retrieved; + args.retrieve_status = luo_file->retrieve_status; can_finish = luo_file->fh->ops->can_finish(&args); } @@ -632,7 +642,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set, args.handler = luo_file->fh; args.file = luo_file->file; args.serialized_data = luo_file->serialized_data; - args.retrieved = luo_file->retrieved; + args.retrieve_status = luo_file->retrieve_status; luo_file->fh->ops->finish(&args); luo_flb_file_finish(luo_file->fh); @@ -788,7 +798,6 @@ int luo_file_deserialize(struct luo_file_set *file_set, luo_file->file = NULL; luo_file->serialized_data = file_ser[i].data; luo_file->token = file_ser[i].token; - luo_file->retrieved = false; mutex_init(&luo_file->mutex); list_add_tail(&luo_file->list, &file_set->files_list); } diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 5c17da3880c5..e485b828d173 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -326,7 +326,12 @@ static void memfd_luo_finish(struct liveupdate_file_op_args *args) struct memfd_luo_folio_ser *folios_ser; struct memfd_luo_ser *ser; - if (args->retrieved) + /* + * If retrieve was successful, nothing to do. If it failed, retrieve() + * already cleaned up everything it could. So nothing to do there + * either. Only need to clean up when retrieve was not called. + */ + if (args->retrieve_status) return; ser = phys_to_virt(args->serialized_data); From 319d0bff22f3dd7a982c289e8336da69f0581299 Mon Sep 17 00:00:00 2001 From: "Vlastimil Babka (SUSE)" Date: Tue, 17 Feb 2026 11:21:52 +0100 Subject: [PATCH 307/828] MAINTAINERS, mailmap: update e-mail address for Vlastimil Babka Hopefully improve e-mail performance. Link: https://lkml.kernel.org/r/20260217102151.10425-2-vbabka@kernel.org Signed-off-by: Vlastimil Babka (SUSE) Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.mailmap b/.mailmap index e1cf6bb85d33..b3785362f73f 100644 --- a/.mailmap +++ b/.mailmap @@ -876,6 +876,7 @@ Vivien Didelot Vlad Dogaru Vladimir Davydov Vladimir Davydov +Vlastimil Babka WangYuli WangYuli Weiwen Hu diff --git a/MAINTAINERS b/MAINTAINERS index 55af015174a5..02fe14782533 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16656,7 +16656,7 @@ M: Andrew Morton M: David Hildenbrand R: Lorenzo Stoakes R: Liam R. Howlett -R: Vlastimil Babka +R: Vlastimil Babka R: Mike Rapoport R: Suren Baghdasaryan R: Michal Hocko @@ -16786,7 +16786,7 @@ M: Andrew Morton M: David Hildenbrand R: Lorenzo Stoakes R: Liam R. Howlett -R: Vlastimil Babka +R: Vlastimil Babka R: Mike Rapoport R: Suren Baghdasaryan R: Michal Hocko @@ -16841,7 +16841,7 @@ F: mm/oom_kill.c MEMORY MANAGEMENT - PAGE ALLOCATOR M: Andrew Morton -M: Vlastimil Babka +M: Vlastimil Babka R: Suren Baghdasaryan R: Michal Hocko R: Brendan Jackman @@ -16887,7 +16887,7 @@ M: David Hildenbrand M: Lorenzo Stoakes R: Rik van Riel R: Liam R. Howlett -R: Vlastimil Babka +R: Vlastimil Babka R: Harry Yoo R: Jann Horn L: linux-mm@kvack.org @@ -16986,7 +16986,7 @@ MEMORY MAPPING M: Andrew Morton M: Liam R. Howlett M: Lorenzo Stoakes -R: Vlastimil Babka +R: Vlastimil Babka R: Jann Horn R: Pedro Falcato L: linux-mm@kvack.org @@ -17016,7 +17016,7 @@ M: Andrew Morton M: Suren Baghdasaryan M: Liam R. Howlett M: Lorenzo Stoakes -R: Vlastimil Babka +R: Vlastimil Babka R: Shakeel Butt L: linux-mm@kvack.org S: Maintained @@ -17032,7 +17032,7 @@ M: Andrew Morton M: Liam R. Howlett M: Lorenzo Stoakes M: David Hildenbrand -R: Vlastimil Babka +R: Vlastimil Babka R: Jann Horn L: linux-mm@kvack.org S: Maintained @@ -23174,7 +23174,7 @@ K: \b(?i:rust)\b RUST [ALLOC] M: Danilo Krummrich R: Lorenzo Stoakes -R: Vlastimil Babka +R: Vlastimil Babka R: Liam R. Howlett R: Uladzislau Rezki L: rust-for-linux@vger.kernel.org @@ -24350,7 +24350,7 @@ F: Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR -M: Vlastimil Babka +M: Vlastimil Babka M: Andrew Morton R: Christoph Lameter R: David Rientjes From fdb24a820a5832ec4532273282cbd4f22c291a0d Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 17 Feb 2026 05:09:55 +0000 Subject: [PATCH 308/828] Squashfs: check metadata block offset is within range Syzkaller reports a "general protection fault in squashfs_copy_data" This is ultimately caused by a corrupted index look-up table, which produces a negative metadata block offset. This is subsequently passed to squashfs_copy_data (via squashfs_read_metadata) where the negative offset causes an out of bounds access. The fix is to check that the offset is within range in squashfs_read_metadata. This will trap this and other cases. Link: https://lkml.kernel.org/r/20260217050955.138351-1-phillip@squashfs.org.uk Fixes: f400e12656ab ("Squashfs: cache operations") Reported-by: syzbot+a9747fe1c35a5b115d3f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/699234e2.a70a0220.2c38d7.00e2.GAE@google.com/ Signed-off-by: Phillip Lougher Cc: Christian Brauner Cc: Signed-off-by: Andrew Morton --- fs/squashfs/cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 8e958db5f786..67abd4dff222 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -344,6 +344,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, if (unlikely(length < 0)) return -EIO; + if (unlikely(*offset < 0 || *offset >= SQUASHFS_METADATA_SIZE)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { From c80f46ac228b48403866d65391ad09bdf0e8562a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 14 Feb 2026 13:41:21 -0800 Subject: [PATCH 309/828] mm/damon/core: disallow non-power of two min_region_sz DAMON core uses min_region_sz parameter value as the DAMON region alignment. The alignment is made using ALIGN() and ALIGN_DOWN(), which support only the power of two alignments. But DAMON core API callers can set min_region_sz to an arbitrary number. Users can also set it indirectly, using addr_unit. When the alignment is not properly set, DAMON behavior becomes difficult to expect and understand, makes it effectively broken. It doesn't cause a kernel crash-like significant issue, though. Fix the issue by disallowing min_region_sz input that is not a power of two. Add the check to damon_commit_ctx(), as all DAMON API callers who set min_region_sz uses the function. This can be a sort of behavioral change, but it does not break users, for the following reasons. As the symptom is making DAMON effectively broken, it is not reasonable to believe there are real use cases of non-power of two min_region_sz. There is no known use case or issue reports from the setup, either. In future, if we find real use cases of non-power of two alignments and we can support it with low enough overhead, we can consider moving the restriction. But, for now, simply disallowing the corner case should be good enough as a hot fix. Link: https://lkml.kernel.org/r/20260214214124.87689-1-sj@kernel.org Fixes: d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") Signed-off-by: SeongJae Park Cc: Quanmin Yan Cc: [6.18+] Signed-off-by: Andrew Morton --- mm/damon/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 01eba1a547d4..adfc52fee9dc 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1252,6 +1252,9 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) { int err; + if (!is_power_of_2(src->min_region_sz)) + return -EINVAL; + err = damon_commit_schemes(dst, src); if (err) return err; From d155aab90fffa00f93cea1f107aef0a3d548b2ff Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 20 Feb 2026 15:49:40 +0100 Subject: [PATCH 310/828] mm/kfence: fix KASAN hardware tag faults during late enablement When KASAN hardware tags are enabled, re-enabling KFENCE late (via /sys/module/kfence/parameters/sample_interval) causes KASAN faults. This happens because the KFENCE pool and metadata are allocated via the page allocator, which tags the memory, while KFENCE continues to access it using untagged pointers during initialization. Use __GFP_SKIP_KASAN for late KFENCE pool and metadata allocations to ensure the memory remains untagged, consistent with early allocations from memblock. To support this, add __GFP_SKIP_KASAN to the allowlist in __alloc_contig_verify_gfp_mask(). Link: https://lkml.kernel.org/r/20260220144940.2779209-1-glider@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Alexander Potapenko Suggested-by: Ernesto Martinez Garcia Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Greg KH Cc: Kees Cook Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 14 ++++++++------ mm/page_alloc.c | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index b5aedf505cec..7393957f9a20 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -1004,14 +1004,14 @@ static int kfence_init_late(void) #ifdef CONFIG_CONTIG_ALLOC struct page *pages; - pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node, - NULL); + pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL | __GFP_SKIP_KASAN, + first_online_node, NULL); if (!pages) return -ENOMEM; __kfence_pool = page_to_virt(pages); - pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node, - NULL); + pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL | __GFP_SKIP_KASAN, + first_online_node, NULL); if (pages) kfence_metadata_init = page_to_virt(pages); #else @@ -1021,11 +1021,13 @@ static int kfence_init_late(void) return -EINVAL; } - __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL); + __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, + GFP_KERNEL | __GFP_SKIP_KASAN); if (!__kfence_pool) return -ENOMEM; - kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL); + kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, + GFP_KERNEL | __GFP_SKIP_KASAN); #endif if (!kfence_metadata_init) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fcc32737f451..2d4b6f1a554e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6928,7 +6928,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask) { const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM; const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | - __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO; + __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO | + __GFP_SKIP_KASAN; const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN; /* From 079c24d5690262e83ee476e2a548e416f3237511 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 19 Feb 2026 15:36:56 -0800 Subject: [PATCH 311/828] mm/tracing: rss_stat: ensure curr is false from kthread context The rss_stat trace event allows userspace tools, like Perfetto [1], to inspect per-process RSS metric changes over time. The curr field was introduced to rss_stat in commit e4dcad204d3a ("rss_stat: add support to detect RSS updates of external mm"). Its intent is to indicate whether the RSS update is for the mm_struct of the current execution context; and is set to false when operating on a remote mm_struct (e.g., via kswapd or a direct reclaimer). However, an issue arises when a kernel thread temporarily adopts a user process's mm_struct. Kernel threads do not have their own mm_struct and normally have current->mm set to NULL. To operate on user memory, they can "borrow" a memory context using kthread_use_mm(), which sets current->mm to the user process's mm. This can be observed, for example, in the USB Function Filesystem (FFS) driver. The ffs_user_copy_worker() handles AIO completions and uses kthread_use_mm() to copy data to a user-space buffer. If a page fault occurs during this copy, the fault handler executes in the kthread's context. At this point, current is the kthread, but current->mm points to the user process's mm. Since the rss_stat event (from the page fault) is for that same mm, the condition current->mm == mm becomes true, causing curr to be incorrectly set to true when the trace event is emitted. This is misleading because it suggests the mm belongs to the kthread, confusing userspace tools that track per-process RSS changes and corrupting their mm_id-to-process association. Fix this by ensuring curr is always false when the trace event is emitted from a kthread context by checking for the PF_KTHREAD flag. Link: https://lkml.kernel.org/r/20260219233708.1971199-1-kaleshsingh@google.com Link: https://perfetto.dev/ [1] Fixes: e4dcad204d3a ("rss_stat: add support to detect RSS updates of external mm") Signed-off-by: Kalesh Singh Acked-by: Zi Yan Acked-by: SeongJae Park Reviewed-by: Pedro Falcato Cc: "David Hildenbrand (Arm)" Cc: Joel Fernandes Cc: Lorenzo Stoakes Cc: Minchan Kim Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: [5.10+] Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 7f93e754da5c..cd7920c81f85 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -440,7 +440,13 @@ TRACE_EVENT(rss_stat, TP_fast_assign( __entry->mm_id = mm_ptr_to_hash(mm); - __entry->curr = !!(current->mm == mm); + /* + * curr is true if the mm matches the current task's mm_struct. + * Since kthreads (PF_KTHREAD) have no mm_struct of their own + * but can borrow one via kthread_use_mm(), we must filter them + * out to avoid incorrectly attributing the RSS update to them. + */ + __entry->curr = current->mm == mm && !(current->flags & PF_KTHREAD); __entry->member = member; __entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member]) << PAGE_SHIFT); From a4ab97e34bb687a2ca63fc70b47e8762e689797f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 22 Feb 2026 19:57:02 +0800 Subject: [PATCH 312/828] mm: fix NULL NODE_DATA dereference for memoryless nodes on boot Commit d49004c5f0c1 ("arch, mm: consolidate initialization of nodes, zones and memory map") moved free_area_init() from setup_arch() to mm_core_init_early(), which runs after setup_arch() returns. This changed the ordering relative to init_cpu_to_node() on x86. Before the commit, free_area_init() ran during paging_init() (called from setup_arch()) *before* init_cpu_to_node(). After the commit, it runs *after* init_cpu_to_node(). On machines with memoryless NUMA nodes (e.g., node 0 has CPUs but no memory), this causes a NULL pointer dereference: 1. numa_register_nodes() skips memoryless nodes: no alloc_node_data() and no node_set_online() for them. 2. init_cpu_to_node() sets memoryless nodes online (they have CPUs) but does not allocate NODE_DATA. 3. free_area_init() checks "if (!node_online(nid))" to decide whether to call alloc_offline_node_data(). Since the memoryless node is now online, the allocation is skipped, leaving NODE_DATA(nid) == NULL. 4. The immediate "pgdat = NODE_DATA(nid)" dereferences NULL. The crash happens before console_init(), so no output is visible without earlyprintk. With earlyprintk enabled, the following panic is observed: BUG: unable to handle page fault for address: 000000000002a1e0 Oops: Oops: 0000 [#1] SMP NOPTI RIP: 0010:free_area_init_node+0x3a/0x540 Call Trace: free_area_init+0x331/0x4e0 start_kernel+0x69/0x4a0 x86_64_start_reservations+0x24/0x30 x86_64_start_kernel+0x125/0x130 common_startup_64+0x13e/0x148 Kernel panic - not syncing: Attempted to kill the idle task! Fix this by checking "if (!NODE_DATA(nid))" instead of "if (!node_online(nid))". This directly tests whether the per-node data structure needs to be allocated, regardless of the node's online status. This change is also safe for non-x86 architectures as they all allocate NODE_DATA for every node including memoryless ones, so the check simply evaluates to false with no change in behavior. Link: https://lkml.kernel.org/r/20260222115702.3659-1-ming.lei@redhat.com Fixes: d49004c5f0c1 ("arch, mm: consolidate initialization of nodes, zones and memory map") Signed-off-by: Ming Lei Reviewed-by: Mike Rapoport (Microsoft) Signed-off-by: Andrew Morton --- mm/mm_init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 61d983d23f55..df34797691bd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1896,7 +1896,11 @@ static void __init free_area_init(void) for_each_node(nid) { pg_data_t *pgdat; - if (!node_online(nid)) + /* + * If an architecture has not allocated node data for + * this node, presume the node is memoryless or offline. + */ + if (!NODE_DATA(nid)) alloc_offline_node_data(nid); pgdat = NODE_DATA(nid); From 37a012c5c10c3364c5cba5def30dd7a17a6b587a Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Mon, 23 Feb 2026 17:09:05 +0000 Subject: [PATCH 313/828] mailmap: add entry for Daniele Alessandrelli My Intel email is going to bounce soon. Map it to my personal Gmail address. Link: https://lkml.kernel.org/r/20260223170905.278956-1-daniele.alessandrelli@intel.com Signed-off-by: Daniele Alessandrelli Cc: Daniele Alessandrelli Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index b3785362f73f..cd09bc545586 100644 --- a/.mailmap +++ b/.mailmap @@ -211,6 +211,7 @@ Daniel Borkmann Daniel Borkmann Daniel Borkmann Daniel Thompson +Daniele Alessandrelli Danilo Krummrich David Brownell David Collins From 410aed670cddac1de4f0c2865f30ec623fd20f78 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Mon, 23 Feb 2026 16:00:26 +0000 Subject: [PATCH 314/828] MAINTAINERS: update Yosry Ahmed's email address Use my kernel.org email address. Link: https://lkml.kernel.org/r/20260223160027.122307-1-yosry@kernel.org Signed-off-by: Yosry Ahmed Cc: Johannes Weiner Cc: Nhat Pham Signed-off-by: Andrew Morton --- .mailmap | 3 ++- MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index cd09bc545586..c124a1306d26 100644 --- a/.mailmap +++ b/.mailmap @@ -892,7 +892,8 @@ Yanteng Si Ying Huang Yixun Lan Yixun Lan -Yosry Ahmed +Yosry Ahmed +Yosry Ahmed Yu-Chun Lin Yusuke Goda Zack Rusin diff --git a/MAINTAINERS b/MAINTAINERS index 02fe14782533..e4572a36afd2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -29186,7 +29186,7 @@ K: zstd ZSWAP COMPRESSED SWAP CACHING M: Johannes Weiner -M: Yosry Ahmed +M: Yosry Ahmed M: Nhat Pham R: Chengming Zhou L: linux-mm@kvack.org From 2f38fd99c0004676d835ae96ac4f3b54edc02c82 Mon Sep 17 00:00:00 2001 From: wangshuaiwei Date: Tue, 24 Feb 2026 14:32:28 +0800 Subject: [PATCH 315/828] scsi: ufs: core: Fix shift out of bounds when MAXQ=32 According to JESD223F, the maximum number of queues (MAXQ) is 32. When MCQ is enabled and ESI is disabled, nr_hw_queues=32 causes a shift overflow problem. Fix this by using 64-bit intermediate values to handle the nr_hw_queues=32 case safely. Signed-off-by: wangshuaiwei Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260224063228.50112-1-wangshuaiwei1@xiaomi.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 85987373b961..899e663fea6e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7110,7 +7110,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba) ret = ufshcd_vops_get_outstanding_cqs(hba, &outstanding_cqs); if (ret) - outstanding_cqs = (1U << hba->nr_hw_queues) - 1; + outstanding_cqs = (1ULL << hba->nr_hw_queues) - 1; /* Exclude the poll queues */ nr_queues = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; From bfbc0b5b32a8f28ce284add619bf226716a59bc0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Feb 2026 11:51:16 -0700 Subject: [PATCH 316/828] media: dvb-core: fix wrong reinitialization of ringbuffer on reopen dvb_dvr_open() calls dvb_ringbuffer_init() when a new reader opens the DVR device. dvb_ringbuffer_init() calls init_waitqueue_head(), which reinitializes the waitqueue list head to empty. Since dmxdev->dvr_buffer.queue is a shared waitqueue (all opens of the same DVR device share it), this orphans any existing waitqueue entries from io_uring poll or epoll, leaving them with stale prev/next pointers while the list head is reset to {self, self}. The waitqueue and spinlock in dvr_buffer are already properly initialized once in dvb_dmxdev_init(). The open path only needs to reset the buffer data pointer, size, and read/write positions. Replace the dvb_ringbuffer_init() call in dvb_dvr_open() with direct assignment of data/size and a call to dvb_ringbuffer_reset(), which properly resets pread, pwrite, and error with correct memory ordering without touching the waitqueue or spinlock. Cc: stable@vger.kernel.org Fixes: 34731df288a5f ("V4L/DVB (3501): Dmxdev: use dvb_ringbuffer") Reported-by: syzbot+ab12f0c08dd7ab8d057c@syzkaller.appspotmail.com Tested-by: syzbot+ab12f0c08dd7ab8d057c@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/698a26d3.050a0220.3b3015.007d.GAE@google.com/ Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/media/dvb-core/dmxdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index 7cfed24054d0..3c8bc75e4d6c 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -168,7 +168,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) mutex_unlock(&dmxdev->mutex); return -ENOMEM; } - dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE); + dmxdev->dvr_buffer.data = mem; + dmxdev->dvr_buffer.size = DVR_BUFFER_SIZE; + dvb_ringbuffer_reset(&dmxdev->dvr_buffer); if (dmxdev->may_do_mmap) dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr", &dmxdev->mutex, From 6acf7860dcc79ed045cc9e6a79c8a8bb6959dba7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Feb 2026 06:21:44 -0800 Subject: [PATCH 317/828] zloop: advertise a volatile write cache Zloop is file system backed and thus needs to sync the underlying file system to persist data. Set BLK_FEAT_WRITE_CACHE so that the block layer actually send flush commands, and fix the flush implementation as sync_filesystem requires s_umount to be held and the code currently misses that. Fixes: eb0570c7df23 ("block: new zoned loop block device driver") Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 8e334f5025fc..ae9bf2a85c21 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -542,6 +542,21 @@ out: zloop_put_cmd(cmd); } +/* + * Sync the entire FS containing the zone files instead of walking all files. + */ +static int zloop_flush(struct zloop_device *zlo) +{ + struct super_block *sb = file_inode(zlo->data_dir)->i_sb; + int ret; + + down_read(&sb->s_umount); + ret = sync_filesystem(sb); + up_read(&sb->s_umount); + + return ret; +} + static void zloop_handle_cmd(struct zloop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); @@ -562,11 +577,7 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd) zloop_rw(cmd); return; case REQ_OP_FLUSH: - /* - * Sync the entire FS containing the zone files instead of - * walking all files - */ - cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); + cmd->ret = zloop_flush(zlo); break; case REQ_OP_ZONE_RESET: cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); @@ -981,7 +992,8 @@ static int zloop_ctl_add(struct zloop_options *opts) struct queue_limits lim = { .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, .chunk_sectors = opts->zone_size, - .features = BLK_FEAT_ZONED, + .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE, + }; unsigned int nr_zones, i, j; struct zloop_device *zlo; From 3c4617117a2b7682cf037be5e5533e379707f050 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Feb 2026 06:21:45 -0800 Subject: [PATCH 318/828] zloop: check for spurious options passed to remove Zloop uses a command option parser for all control commands, but most options are only valid for adding a new device. Check for incorrectly specified options in the remove handler. Fixes: eb0570c7df23 ("block: new zoned loop block device driver") Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index ae9bf2a85c21..9e3bb538d5fc 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -1174,7 +1174,12 @@ static int zloop_ctl_remove(struct zloop_options *opts) int ret; if (!(opts->mask & ZLOOP_OPT_ID)) { - pr_err("No ID specified\n"); + pr_err("No ID specified for remove\n"); + return -EINVAL; + } + + if (opts->mask & ~ZLOOP_OPT_ID) { + pr_err("Invalid option specified for remove\n"); return -EINVAL; } From 201ceb94aa1def0024a7c18ce643e5f65026be06 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 23 Feb 2026 19:37:51 -0800 Subject: [PATCH 319/828] kunit: irq: Ensure timer doesn't fire too frequently Fix a bug where kunit_run_irq_test() could hang if the system is too slow. This was noticed with the crypto library tests in certain VMs. Specifically, if kunit_irq_test_timer_func() and the associated hrtimer code took over 5us to run, then the CPU would spend all its time executing that code in hardirq context. As a result, the task executing kunit_run_irq_test() never had a chance to run, exit the loop, and cancel the timer. To fix it, make kunit_irq_test_timer_func() increase the timer interval when the other contexts aren't having a chance to run. Fixes: 950a81224e8b ("lib/crypto: tests: Add hash-test-template.h and gen-hash-testvecs.py") Cc: stable@vger.kernel.org Reviewed-by: David Gow Link: https://lore.kernel.org/r/20260224033751.97615-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/kunit/run-in-irq-context.h | 44 +++++++++++++++++++----------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h index c89b1b1b12dd..bfe60d6cf28d 100644 --- a/include/kunit/run-in-irq-context.h +++ b/include/kunit/run-in-irq-context.h @@ -12,16 +12,16 @@ #include #include -#define KUNIT_IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5) - struct kunit_irq_test_state { bool (*func)(void *test_specific_state); void *test_specific_state; bool task_func_reported_failure; bool hardirq_func_reported_failure; bool softirq_func_reported_failure; + atomic_t task_func_calls; atomic_t hardirq_func_calls; atomic_t softirq_func_calls; + ktime_t interval; struct hrtimer timer; struct work_struct bh_work; }; @@ -30,14 +30,25 @@ static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer) { struct kunit_irq_test_state *state = container_of(timer, typeof(*state), timer); + int task_calls, hardirq_calls, softirq_calls; WARN_ON_ONCE(!in_hardirq()); - atomic_inc(&state->hardirq_func_calls); + task_calls = atomic_read(&state->task_func_calls); + hardirq_calls = atomic_inc_return(&state->hardirq_func_calls); + softirq_calls = atomic_read(&state->softirq_func_calls); + + /* + * If the timer is firing too often for the softirq or task to ever have + * a chance to run, increase the timer interval. This is needed on very + * slow systems. + */ + if (hardirq_calls >= 20 && (softirq_calls == 0 || task_calls == 0)) + state->interval = ktime_add_ns(state->interval, 250); if (!state->func(state->test_specific_state)) state->hardirq_func_reported_failure = true; - hrtimer_forward_now(&state->timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL); + hrtimer_forward_now(&state->timer, state->interval); queue_work(system_bh_wq, &state->bh_work); return HRTIMER_RESTART; } @@ -86,10 +97,14 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), struct kunit_irq_test_state state = { .func = func, .test_specific_state = test_specific_state, + /* + * Start with a 5us timer interval. If the system can't keep + * up, kunit_irq_test_timer_func() will increase it. + */ + .interval = us_to_ktime(5), }; unsigned long end_jiffies; - int hardirq_calls, softirq_calls; - bool allctx = false; + int task_calls, hardirq_calls, softirq_calls; /* * Set up a hrtimer (the way we access hardirq context) and a work @@ -104,21 +119,18 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), * and hardirq), or 1 second, whichever comes first. */ end_jiffies = jiffies + HZ; - hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL, - HRTIMER_MODE_REL_HARD); - for (int task_calls = 0, calls = 0; - ((calls < max_iterations) || !allctx) && - !time_after(jiffies, end_jiffies); - task_calls++) { + hrtimer_start(&state.timer, state.interval, HRTIMER_MODE_REL_HARD); + do { if (!func(test_specific_state)) state.task_func_reported_failure = true; + task_calls = atomic_inc_return(&state.task_func_calls); hardirq_calls = atomic_read(&state.hardirq_func_calls); softirq_calls = atomic_read(&state.softirq_func_calls); - calls = task_calls + hardirq_calls + softirq_calls; - allctx = (task_calls > 0) && (hardirq_calls > 0) && - (softirq_calls > 0); - } + } while ((task_calls + hardirq_calls + softirq_calls < max_iterations || + (task_calls == 0 || hardirq_calls == 0 || + softirq_calls == 0)) && + !time_after(jiffies, end_jiffies)); /* Cancel the timer and work. */ hrtimer_cancel(&state.timer); From 4b44cbb264d0ed3f2f2bc2659db6ce45882f4670 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Feb 2026 15:24:52 -0800 Subject: [PATCH 320/828] overflow: Make sure size helpers are always inlined With kmalloc_obj() performing implicit size calculations, the embedded size_mul() calls, while marked inline, were not always being inlined. I noticed a couple places where allocations were making a call out for things that would otherwise be compile-time calculated. Force the compilers to always inline these calculations. Reviewed-by: Gustavo A. R. Silva Link: https://patch.msgid.link/20260224232451.work.614-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/overflow.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index eddd987a8513..a8cb6319b4fb 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -42,7 +42,7 @@ * both the type-agnostic benefits of the macros while also being able to * enforce that the return value is, in fact, checked. */ -static inline bool __must_check __must_check_overflow(bool overflow) +static __always_inline bool __must_check __must_check_overflow(bool overflow) { return unlikely(overflow); } @@ -327,7 +327,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) * with any overflow causing the return value to be SIZE_MAX. The * lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +static __always_inline size_t __must_check size_mul(size_t factor1, size_t factor2) { size_t bytes; @@ -346,7 +346,7 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2) * with any overflow causing the return value to be SIZE_MAX. The * lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_add(size_t addend1, size_t addend2) +static __always_inline size_t __must_check size_add(size_t addend1, size_t addend2) { size_t bytes; @@ -367,7 +367,7 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2) * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). * The lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) +static __always_inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) { size_t bytes; From 2f61f38a217462411fed950e843b82bc119884cf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 12:19:08 +0000 Subject: [PATCH 321/828] net: stmmac: fix timestamping configuration after suspend/resume When stmmac_init_timestamping() is called, it clears the receive and transmit path booleans that allow timestamps to be read. These are never re-initialised until after userspace requests timestamping features to be enabled. However, our copy of the timestamp configuration is not cleared, which means we return the old configuration to userspace when requested. This is inconsistent. Fix this by clearing the timestamp configuration. Fixes: d6228b7cdd6e ("net: stmmac: implement the SIOCGHWTSTAMP ioctl") Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://patch.msgid.link/E1vuUu4-0000000Afea-0j9B@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 82375d34ad57..91f638ce132e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -853,6 +853,7 @@ static int stmmac_init_timestamping(struct stmmac_priv *priv) netdev_info(priv->dev, "IEEE 1588-2008 Advanced Timestamp supported\n"); + memset(&priv->tstamp_config, 0, sizeof(priv->tstamp_config)); priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; From c601fd5414315fc515f746b499110e46272e7243 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 24 Feb 2026 22:12:28 +0000 Subject: [PATCH 322/828] drm/client: Do not destroy NULL modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'modes' in drm_client_modeset_probe may fail to kcalloc. If this occurs, we jump to 'out', calling modes_destroy on it, which dereferences it. This may result in a NULL pointer dereference in the error case. Prevent that. Fixes: 3039cc0c0653 ("drm/client: Make copies of modes") Signed-off-by: Jonathan Cavitt Cc: Ville Syrjälä Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20260224221227.69126-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/drm_client_modeset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 262b1b8773c5..bb49b8361271 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -930,7 +930,8 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, mutex_unlock(&client->modeset_mutex); out: kfree(crtcs); - modes_destroy(dev, modes, connector_count); + if (modes) + modes_destroy(dev, modes, connector_count); kfree(modes); kfree(offsets); kfree(enabled); From 83236b2e43dba00bee5b82eb5758816b1a674f6a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2026 21:39:58 -1000 Subject: [PATCH 323/828] sched_ext: Disable preemption between scx_claim_exit() and kicking helper work scx_claim_exit() atomically sets exit_kind, which prevents scx_error() from triggering further error handling. After claiming exit, the caller must kick the helper kthread work which initiates bypass mode and teardown. If the calling task gets preempted between claiming exit and kicking the helper work, and the BPF scheduler fails to schedule it back (since error handling is now disabled), the helper work is never queued, bypass mode never activates, tasks stop being dispatched, and the system wedges. Disable preemption across scx_claim_exit() and the subsequent work kicking in all callers - scx_disable() and scx_vexit(). Add lockdep_assert_preemption_disabled() to scx_claim_exit() to enforce the requirement. Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c18e81e8ef51..9280381f8923 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4423,10 +4423,19 @@ done: scx_bypass(false); } +/* + * Claim the exit on @sch. The caller must ensure that the helper kthread work + * is kicked before the current task can be preempted. Once exit_kind is + * claimed, scx_error() can no longer trigger, so if the current task gets + * preempted and the BPF scheduler fails to schedule it back, the helper work + * will never be kicked and the whole system can wedge. + */ static bool scx_claim_exit(struct scx_sched *sch, enum scx_exit_kind kind) { int none = SCX_EXIT_NONE; + lockdep_assert_preemption_disabled(); + if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind)) return false; @@ -4449,6 +4458,7 @@ static void scx_disable(enum scx_exit_kind kind) rcu_read_lock(); sch = rcu_dereference(scx_root); if (sch) { + guard(preempt)(); scx_claim_exit(sch, kind); kthread_queue_work(sch->helper, &sch->disable_work); } @@ -4771,6 +4781,8 @@ static bool scx_vexit(struct scx_sched *sch, { struct scx_exit_info *ei = sch->exit_info; + guard(preempt)(); + if (!scx_claim_exit(sch, kind)) return false; From 29c8b85adb47daefc213381bc1831787f512d89b Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 25 Feb 2026 08:31:40 +0000 Subject: [PATCH 324/828] irqchip/gic-v5: Fix inversion of IRS_IDR0.virt flag It appears that a !! became ! during a cleanup, resulting in inverted logic when detecting if a host GICv5 implementation is capable of virtualization. Re-add the missing !, fixing the behaviour. Fixes: 3227c3a89d65f ("irqchip/gic-v5: Check if impl is virt capable") Signed-off-by: Sascha Bischoff Link: https://patch.msgid.link/20260225083130.3378490-1-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v5-irs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index eeeb40fb0eaa..4ed2bfa17c01 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -744,7 +744,7 @@ static int __init gicv5_irs_init(struct device_node *node) */ if (list_empty(&irs_nodes)) { idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR0); - gicv5_global_data.virt_capable = !FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); + gicv5_global_data.virt_capable = !!FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1); irs_setup_pri_bits(idr); From 7fe8dec3f628e9779f1631576f8e693370050348 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Feb 2026 09:52:28 +0100 Subject: [PATCH 325/828] ALSA: usb-audio: Cap the packet size pre-calculations We calculate the possible packet sizes beforehand for adaptive and synchronous endpoints, but we didn't take care of the max frame size for those pre-calculated values. When a device or a bus limits the packet size, a high sample rate or a high number of channels may lead to the packet sizes that are larger than the given limit, which results in an error from the USB core at submitting URBs. As a simple workaround, just add the sanity checks of pre-calculated packet sizes to have the upper boundary of ep->maxframesize. Fixes: f0bd62b64016 ("ALSA: usb-audio: Improve frames size computation") Link: https://bugzilla.kernel.org/show_bug.cgi?id=221076 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260225085233.316306-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 73bce9712dbd..c887c2f5b25d 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -1378,6 +1378,9 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, return -EINVAL; } + ep->packsize[0] = min(ep->packsize[0], ep->maxframesize); + ep->packsize[1] = min(ep->packsize[1], ep->maxframesize); + /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; ep->freqshift = INT_MIN; From 7cb2a5422f5bbdf1cf32eae0eda41000485b9346 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Feb 2026 09:52:29 +0100 Subject: [PATCH 326/828] ALSA: usb-audio: Check max frame size for implicit feedback mode, too When the packet sizes are taken from the capture stream in the implicit feedback mode, the sizes might be larger than the upper boundary defined by the descriptor. As already done for other transfer modes, we have to cap the sizes accordingly at sending, otherwise this would lead to an error in USB core at submission of URBs. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221076 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260225085233.316306-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index c887c2f5b25d..2e70dbc479b4 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -221,6 +221,7 @@ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, packet = ctx->packet_size[idx]; if (packet) { + packet = min(packet, ep->maxframesize); if (avail && packet >= avail) return -EAGAIN; return packet; From c5bf24c8aba1ff711226ee0f039ff01a5754692b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Feb 2026 09:52:30 +0100 Subject: [PATCH 327/828] ALSA: usb-audio: Avoid implicit feedback mode on DIYINHK USB Audio 2.0 Although DIYINHK USB Audio 2.0 (ID 20b1:2009) shows the implicit feedback source for the capture stream, this would cause several problems for the playback. Namely, the device can get wMaxPackSize 1024 for 24/32 bit format with 6 channels, and when a high sample rate like 352.8kHz or 384kHz is played, the packet size overflows the max limit. Also, the device has another two playback altsets, and those aren't properly handled with the implicit feedback. Since the device has been working well even before introducing the implicit feedback, we can assume that it works fine in the async mode. This patch adds the explicit skip of the implicit fb detection to make the playback running in the async mode. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221076 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260225085233.316306-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index fbceed8e8d36..c6a78efbcaa3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2365,6 +2365,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7281, /* Hauppauge HVR-950Q-MXL */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x20b1, 0x2009, /* XMOS Ltd DIYINHK USB Audio 2.0 */ + QUIRK_FLAG_SKIP_IMPLICIT_FB | QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2040, 0x8200, /* Hauppauge Woodbury */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */ From 4e9113c533acee2ba1f72fd68ee6ecd36b64484e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Feb 2026 09:52:31 +0100 Subject: [PATCH 328/828] ALSA: usb-audio: Use inclusive terms Replace the remaining with inclusive terms; it's only this function name we overlooked at the previous conversion. Fixes: 53837b4ac2bd ("ALSA: usb-audio: Replace slave/master terms") Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260225085233.316306-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 2e70dbc479b4..bf4401aba76c 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -160,8 +160,8 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep) * This won't be used for implicit feedback which takes the packet size * returned from the sync source */ -static int slave_next_packet_size(struct snd_usb_endpoint *ep, - unsigned int avail) +static int synced_next_packet_size(struct snd_usb_endpoint *ep, + unsigned int avail) { unsigned int phase; int ret; @@ -228,7 +228,7 @@ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, } if (ep->sync_source) - return slave_next_packet_size(ep, avail); + return synced_next_packet_size(ep, avail); else return next_packet_size(ep, avail); } From 4a2d046e4b13202a6301a993961f5b30ae4d7119 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 24 Feb 2026 18:31:25 +0800 Subject: [PATCH 329/828] erofs: fix interlaced plain identification for encoded extents Only plain data whose start position and on-disk physical length are both aligned to the block size should be classified as interlaced plain extents. Otherwise, it must be treated as shifted plain extents. This issue was found by syzbot using a crafted compressed image containing plain extents with unaligned physical lengths, which can cause OOB read in z_erofs_transform_plain(). Reported-and-tested-by: syzbot+d988dc155e740d76a331@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/699d5714.050a0220.cdd3c.03e7.GAE@google.com Fixes: 1d191b4ca51d ("erofs: implement encoded extent metadata") Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index c8d8e129eb4b..30775502b56d 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -513,6 +513,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize), recsz); + unsigned int bmask = sb->s_blocksize - 1; bool in_mbox = erofs_inode_in_metabox(inode); erofs_off_t lend = inode->i_size; erofs_off_t l, r, mid, pa, la, lstart; @@ -596,17 +597,17 @@ static int z_erofs_map_blocks_ext(struct inode *inode, map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; + if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) + map->m_flags |= EROFS_MAP_PARTIAL_REF; + map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; if (fmt) map->m_algorithmformat = fmt - 1; - else if (interlaced && !erofs_blkoff(sb, map->m_pa)) + else if (interlaced && !((map->m_pa | map->m_plen) & bmask)) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; else map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; - if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) - map->m_flags |= EROFS_MAP_PARTIAL_REF; - map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; } } map->m_llen = lend - map->m_la; From 54e367cb94d6bef941bbc1132d9959dc73bd4b6f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 Feb 2026 10:47:18 +0000 Subject: [PATCH 330/828] KVM: arm64: Deduplicate ASID retrieval code We currently have three versions of the ASID retrieval code, one in the S1 walker, and two in the VNCR handling (although the last two are limited to the EL2&0 translation regime). Make this code common, and take this opportunity to also simplify the code a bit while switching over to the TTBRx_EL1_ASID macro. Reviewed-by: Joey Gouly Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260225104718.14209-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 2 + arch/arm64/kvm/at.c | 27 +------------ arch/arm64/kvm/nested.c | 60 +++++++++++++++-------------- 3 files changed, 35 insertions(+), 54 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 905c658057a4..091544e6af44 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -397,6 +397,8 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu); void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val); +u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime); + #define vncr_fixmap(c) \ ({ \ u32 __c = (c); \ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 885bd5bb2f41..6588ea251ed7 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -540,31 +540,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); - if (wr->nG) { - u64 asid_ttbr, tcr; - - switch (wi->regime) { - case TR_EL10: - tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); - asid_ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL1) : - vcpu_read_sys_reg(vcpu, TTBR0_EL1)); - break; - case TR_EL20: - tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); - asid_ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL2) : - vcpu_read_sys_reg(vcpu, TTBR0_EL2)); - break; - default: - BUG(); - } - - wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr); - if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || - !(tcr & TCR_ASID16)) - wr->asid &= GENMASK(7, 0); - } + if (wr->nG) + wr->asid = get_asid_by_regime(vcpu, wi->regime); return 0; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 7f1ea85dc67a..787776aaf4ad 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -854,6 +854,33 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2) return kvm_inject_nested_sync(vcpu, esr_el2); } +u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + enum vcpu_sysreg ttbr_elx; + u64 tcr; + u16 asid; + + switch (regime) { + case TR_EL10: + tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); + ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL1 : TTBR0_EL1; + break; + case TR_EL20: + tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL2 : TTBR0_EL2; + break; + default: + BUG(); + } + + asid = FIELD_GET(TTBRx_EL1_ASID, vcpu_read_sys_reg(vcpu, ttbr_elx)); + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || + !(tcr & TCR_ASID16)) + asid &= GENMASK(7, 0); + + return asid; +} + static void invalidate_vncr(struct vncr_tlb *vt) { vt->valid = false; @@ -1333,20 +1360,8 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu) if (read_vncr_el2(vcpu) != vt->gva) return false; - if (vt->wr.nG) { - u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); - u64 ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL2) : - vcpu_read_sys_reg(vcpu, TTBR0_EL2)); - u16 asid; - - asid = FIELD_GET(TTBR_ASID_MASK, ttbr); - if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || - !(tcr & TCR_ASID16)) - asid &= GENMASK(7, 0); - - return asid == vt->wr.asid; - } + if (vt->wr.nG) + return get_asid_by_regime(vcpu, TR_EL20) == vt->wr.asid; return true; } @@ -1449,21 +1464,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) if (read_vncr_el2(vcpu) != vt->gva) return; - if (vt->wr.nG) { - u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); - u64 ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL2) : - vcpu_read_sys_reg(vcpu, TTBR0_EL2)); - u16 asid; - - asid = FIELD_GET(TTBR_ASID_MASK, ttbr); - if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || - !(tcr & TCR_ASID16)) - asid &= GENMASK(7, 0); - - if (asid != vt->wr.asid) - return; - } + if (vt->wr.nG && get_asid_by_regime(vcpu, TR_EL20) != vt->wr.asid) + return; vt->cpu = smp_processor_id(); From 93a4a9b732fbb479f95d327aa867d094aed3f712 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 24 Feb 2026 17:59:52 +0100 Subject: [PATCH 331/828] RDMA/core: Check id_priv->restricted_node_type in cma_listen_on_dev() When listening on wildcard addresses we have a global list for the application layer rdma_cm_id and for any existing device or any device added in future we try to listen on any wildcard listener. When the listener has a restricted_node_type we should prevent listening on devices with a different node type. While there fix the documentation comment of rdma_restrict_node_type() to include rdma_resolve_addr() instead of having rdma_bind_addr() twice. Fixes: a760e80e90f5 ("RDMA/core: introduce rdma_restrict_node_type()") Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Steve French Cc: Namjae Jeon Cc: Tom Talpey Cc: Long Li Cc: linux-rdma@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Link: https://patch.msgid.link/20260224165951.3582093-2-metze@samba.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 6 +++++- include/rdma/rdma_cm.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e54c07c74575..9480d1a51c11 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2729,6 +2729,9 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return 0; + if (id_priv->restricted_node_type != RDMA_NODE_UNSPECIFIED && + id_priv->restricted_node_type != cma_dev->device->node_type) + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, @@ -2736,6 +2739,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, if (IS_ERR(dev_id_priv)) return PTR_ERR(dev_id_priv); + dev_id_priv->restricted_node_type = id_priv->restricted_node_type; dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); @@ -4194,7 +4198,7 @@ int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type) } mutex_lock(&lock); - if (id_priv->cma_dev) + if (READ_ONCE(id_priv->state) != RDMA_CM_IDLE) ret = -EALREADY; else id_priv->restricted_node_type = node_type; diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 6de6fd8bd15e..d639ff889e64 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -181,7 +181,7 @@ void rdma_destroy_id(struct rdma_cm_id *id); * * It needs to be called before the RDMA identifier is bound * to an device, which mean it should be called before - * rdma_bind_addr(), rdma_bind_addr() and rdma_listen(). + * rdma_bind_addr(), rdma_resolve_addr() and rdma_listen(). */ int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type); From 104016eb671e19709721c1b0048dd912dc2e96be Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 24 Feb 2026 23:41:53 +0000 Subject: [PATCH 332/828] RDMA/umem: Fix double dma_buf_unpin in failure path In ib_umem_dmabuf_get_pinned_with_dma_device(), the call to ib_umem_dmabuf_map_pages() can fail. If this occurs, the dmabuf is immediately unpinned but the umem_dmabuf->pinned flag is still set. Then, when ib_umem_release() is called, it calls ib_umem_dmabuf_revoke() which will call dma_buf_unpin() again. Fix this by removing the immediate unpin upon failure and just let the ib_umem_release/revoke path handle it. This also ensures the proper unmap-unpin unwind ordering if the dmabuf_map_pages call happened to fail due to dma_resv_wait_timeout (and therefore has a non-NULL umem_dmabuf->sgt). Fixes: 1e4df4a21c5a ("RDMA/umem: Allow pinned dmabuf umem usage") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20260224234153.1207849-1-jmoroni@google.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_dmabuf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index f5298c33e581..d30f24b90bca 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -218,13 +218,11 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, err = ib_umem_dmabuf_map_pages(umem_dmabuf); if (err) - goto err_unpin; + goto err_release; dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); return umem_dmabuf; -err_unpin: - dma_buf_unpin(umem_dmabuf->attach); err_release: dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); ib_umem_release(&umem_dmabuf->umem); From 18c16f602a67782f5eb4b5ab9ba73350b9f711ec Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Wed, 4 Feb 2026 20:36:26 +0530 Subject: [PATCH 333/828] xfs: Replace ASSERT with XFS_IS_CORRUPT in xfs_rtcopy_summary() Replace ASSERT(sum > 0) with an XFS_IS_CORRUPT() and place it just after the call to xfs_rtget_summary() so that we don't end up using an illegal value of sum. Signed-off-by: Nirjhar Roy (IBM) Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_rtalloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 90a94a5b6f7e..aab59f66384e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -112,6 +112,10 @@ xfs_rtcopy_summary( error = xfs_rtget_summary(oargs, log, bbno, &sum); if (error) goto out; + if (XFS_IS_CORRUPT(oargs->mp, sum < 0)) { + error = -EFSCORRUPTED; + goto out; + } if (sum == 0) continue; error = xfs_rtmodify_summary(oargs, log, bbno, -sum); @@ -120,7 +124,6 @@ xfs_rtcopy_summary( error = xfs_rtmodify_summary(nargs, log, bbno, sum); if (error) goto out; - ASSERT(sum > 0); } } error = 0; From a49b7ff63f98ba1c4503869c568c99ecffa478f2 Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Wed, 11 Feb 2026 19:25:13 +0530 Subject: [PATCH 334/828] xfs: Refactoring the nagcount and delta calculation Introduce xfs_growfs_compute_delta() to calculate the nagcount and delta blocks and refactor the code from xfs_growfs_data_private(). No functional changes. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Nirjhar Roy (IBM) Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_ag.c | 28 ++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_ag.h | 3 +++ fs/xfs/xfs_fsops.c | 17 ++--------------- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 9c6765cc2d44..bd8fbb40b49e 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -872,6 +872,34 @@ resv_err: return err2; } +void +xfs_growfs_compute_deltas( + struct xfs_mount *mp, + xfs_rfsblock_t nb, + int64_t *deltap, + xfs_agnumber_t *nagcountp) +{ + xfs_rfsblock_t nb_div, nb_mod; + int64_t delta; + xfs_agnumber_t nagcount; + + nb_div = nb; + nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); + if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) + nb_div++; + else if (nb_mod) + nb = nb_div * mp->m_sb.sb_agblocks; + + if (nb_div > XFS_MAX_AGNUMBER + 1) { + nb_div = XFS_MAX_AGNUMBER + 1; + nb = nb_div * mp->m_sb.sb_agblocks; + } + nagcount = nb_div; + delta = nb - mp->m_sb.sb_dblocks; + *deltap = delta; + *nagcountp = nagcount; +} + /* * Extent the AG indicated by the @id by the length passed in */ diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 1f24cfa27321..3cd4790768ff 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -331,6 +331,9 @@ struct aghdr_init_data { int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); int xfs_ag_shrink_space(struct xfs_perag *pag, struct xfs_trans **tpp, xfs_extlen_t delta); +void +xfs_growfs_compute_deltas(struct xfs_mount *mp, xfs_rfsblock_t nb, + int64_t *deltap, xfs_agnumber_t *nagcountp); int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp, xfs_extlen_t len); int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 17255c41786b..8d64d904d73c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -95,18 +95,17 @@ xfs_growfs_data_private( struct xfs_growfs_data *in) /* growfs data input struct */ { xfs_agnumber_t oagcount = mp->m_sb.sb_agcount; + xfs_rfsblock_t nb = in->newblocks; struct xfs_buf *bp; int error; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; - xfs_rfsblock_t nb, nb_div, nb_mod; int64_t delta; bool lastag_extended = false; struct xfs_trans *tp; struct aghdr_init_data id = {}; struct xfs_perag *last_pag; - nb = in->newblocks; error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); if (error) return error; @@ -125,20 +124,8 @@ xfs_growfs_data_private( mp->m_sb.sb_rextsize); if (error) return error; + xfs_growfs_compute_deltas(mp, nb, &delta, &nagcount); - nb_div = nb; - nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); - if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) - nb_div++; - else if (nb_mod) - nb = nb_div * mp->m_sb.sb_agblocks; - - if (nb_div > XFS_MAX_AGNUMBER + 1) { - nb_div = XFS_MAX_AGNUMBER + 1; - nb = nb_div * mp->m_sb.sb_agblocks; - } - nagcount = nb_div; - delta = nb - mp->m_sb.sb_dblocks; /* * Reject filesystems with a single AG because they are not * supported, and reject a shrink operation that would cause a From 4ad85e633bc576a5cc8c8310aab141af7ed20efa Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Wed, 11 Feb 2026 19:25:14 +0530 Subject: [PATCH 335/828] xfs: Replace &rtg->rtg_group with rtg_group() Use the already existing rtg_group() wrapper instead of directly accessing the struct xfs_group member in struct xfs_rtgroup. Reviewed-by: Christoph Hellwig Signed-off-by: Nirjhar Roy (IBM) [cem: Conflict resolution against 06873dbd940d] Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 6 +++--- fs/xfs/xfs_zone_gc.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 67e0c8f5800f..e3d19b6dc64a 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -78,7 +78,7 @@ xfs_zone_account_reclaimable( struct xfs_rtgroup *rtg, uint32_t freed) { - struct xfs_group *xg = &rtg->rtg_group; + struct xfs_group *xg = rtg_group(rtg); struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; uint32_t used = rtg_rmap(rtg)->i_used_blocks; @@ -759,7 +759,7 @@ xfs_zone_alloc_blocks( trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb); - *sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); + *sector = xfs_gbno_to_daddr(rtg_group(rtg), 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector); if (!*is_seq) *sector += XFS_FSB_TO_BB(mp, allocated); @@ -1080,7 +1080,7 @@ xfs_init_zone( if (write_pointer == 0) { /* zone is empty */ atomic_inc(&zi->zi_nr_free_zones); - xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE); + xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE); iz->available += rtg_blocks(rtg); } else if (write_pointer < rtg_blocks(rtg)) { /* zone is open */ diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 48c6cf584447..7efeecd2d85f 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -627,7 +627,7 @@ xfs_zone_gc_alloc_blocks( if (!*count_fsb) return NULL; - *daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0); + *daddr = xfs_gbno_to_daddr(rtg_group(oz->oz_rtg), 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr); if (!*is_seq) *daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated); @@ -702,7 +702,7 @@ xfs_zone_gc_start_chunk( chunk->data = data; chunk->oz = oz; chunk->victim_rtg = iter->victim_rtg; - atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref); atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); @@ -788,7 +788,7 @@ xfs_zone_gc_split_write( atomic_inc(&chunk->oz->oz_ref); split_chunk->victim_rtg = chunk->victim_rtg; - atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref); atomic_inc(&chunk->victim_rtg->rtg_gccount); chunk->offset += split_len; @@ -888,7 +888,7 @@ xfs_zone_gc_finish_reset( goto out; } - xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE); + xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE); atomic_inc(&zi->zi_nr_free_zones); xfs_zoned_add_available(mp, rtg_blocks(rtg)); @@ -917,7 +917,7 @@ xfs_submit_zone_reset_bio( XFS_STATS_INC(mp, xs_gc_zone_reset_calls); - bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); + bio->bi_iter.bi_sector = xfs_gbno_to_daddr(rtg_group(rtg), 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { /* * Also use the bio to drive the state machine when neither From fd81d3fd01a5ee4bd26a7dc440e7a2209277d14b Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Fri, 13 Feb 2026 08:50:06 +1000 Subject: [PATCH 336/828] xfs: fix code alignment issues in xfs_ondisk.c Fixup some code alignment issues in xfs_ondisk.c Signed-off-by: Wilfred Mallawa Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_ondisk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 2e9715cc1641..70605019383c 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -73,7 +73,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64); - XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_hdr, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_map, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_local, 4); @@ -116,7 +116,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_da_intnode, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_entry, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_hdr, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16); XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, freetag, 0); XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, length, 2); From 03a6d6c4c85d2758534638fb2bb5f72e0f8877d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 15:14:31 +0100 Subject: [PATCH 337/828] xfs: cleanup inode counter stats Most of them are unused, so mark them as such. Give the remaining ones names that match their use instead of the historic IRIX ones based on vnodes. Note that the names are purely internal to the XFS code, the user interface is based on section names and arrays of counters. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_icache.c | 6 +++--- fs/xfs/xfs_stats.c | 10 +++++----- fs/xfs/xfs_stats.h | 16 ++++++++-------- fs/xfs/xfs_super.c | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index dbaab4ae709f..f76c6decdaa3 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -106,7 +106,7 @@ xfs_inode_alloc( mapping_set_folio_min_order(VFS_I(ip)->i_mapping, M_IGEO(mp)->min_folio_order); - XFS_STATS_INC(mp, vn_active); + XFS_STATS_INC(mp, xs_inodes_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(ip->i_ino == 0); @@ -172,7 +172,7 @@ __xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list)); - XFS_STATS_DEC(ip->i_mount, vn_active); + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } @@ -2234,7 +2234,7 @@ xfs_inode_mark_reclaimable( struct xfs_mount *mp = ip->i_mount; bool need_inactive; - XFS_STATS_INC(mp, vn_reclaim); + XFS_STATS_INC(mp, xs_inode_mark_reclaimable); /* * We should never get here with any of the reclaim flags already set. diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 017db0361cd8..bc4a5d6dc795 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -42,7 +42,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "xstrat", xfsstats_offset(xs_write_calls) }, { "rw", xfsstats_offset(xs_attr_get) }, { "attr", xfsstats_offset(xs_iflush_count)}, - { "icluster", xfsstats_offset(vn_active) }, + { "icluster", xfsstats_offset(xs_inodes_active) }, { "vnodes", xfsstats_offset(xb_get) }, { "buf", xfsstats_offset(xs_abtb_2) }, { "abtb2", xfsstats_offset(xs_abtc_2) }, @@ -100,15 +100,15 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) void xfs_stats_clearall(struct xfsstats __percpu *stats) { int c; - uint32_t vn_active; + uint32_t xs_inodes_active; xfs_notice(NULL, "Clearing xfsstats"); for_each_possible_cpu(c) { preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu_ptr(stats, c)->s.vn_active; + /* save xs_inodes_active, it's a universal truth! */ + xs_inodes_active = per_cpu_ptr(stats, c)->s.xs_inodes_active; memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); - per_cpu_ptr(stats, c)->s.vn_active = vn_active; + per_cpu_ptr(stats, c)->s.xs_inodes_active = xs_inodes_active; preempt_enable(); } } diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 153d2381d0a8..64bc0cc18126 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -100,14 +100,14 @@ struct __xfsstats { uint32_t xs_iflush_count; uint32_t xs_icluster_flushcnt; uint32_t xs_icluster_flushinode; - uint32_t vn_active; /* # vnodes not on free lists */ - uint32_t vn_alloc; /* # times vn_alloc called */ - uint32_t vn_get; /* # times vn_get called */ - uint32_t vn_hold; /* # times vn_hold called */ - uint32_t vn_rele; /* # times vn_rele called */ - uint32_t vn_reclaim; /* # times vn_reclaim called */ - uint32_t vn_remove; /* # times vn_remove called */ - uint32_t vn_free; /* # times vn_free called */ + uint32_t xs_inodes_active; + uint32_t __unused_vn_alloc; + uint32_t __unused_vn_get; + uint32_t __unused_vn_hold; + uint32_t xs_inode_destroy; + uint32_t xs_inode_destroy2; /* same as xs_inode_destroy */ + uint32_t xs_inode_mark_reclaimable; + uint32_t __unused_vn_free; uint32_t xb_get; uint32_t xb_create; uint32_t xb_get_locked; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index abc45f860a73..f8de44443e81 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -712,8 +712,8 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); ASSERT(!rwsem_is_locked(&inode->i_rwsem)); - XFS_STATS_INC(ip->i_mount, vn_rele); - XFS_STATS_INC(ip->i_mount, vn_remove); + XFS_STATS_INC(ip->i_mount, xs_inode_destroy); + XFS_STATS_INC(ip->i_mount, xs_inode_destroy2); xfs_inode_mark_reclaimable(ip); } From 47553dd60b1da88df2354f841a4f71dd4de6478a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2026 15:14:32 +0100 Subject: [PATCH 338/828] xfs: remove metafile inodes from the active inode stat The active inode (or active vnode until recently) stat can get much larger than expected on file systems with a lot of metafile inodes like zoned file systems on SMR hard disks with 10.000s of rtg rmap inodes. Remove all metafile inodes from the active counter to make it more useful to track actual workloads and add a separate counter for active metafile inodes. This fixes xfs/177 on SMR hard drives. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_inode_buf.c | 4 ++++ fs/xfs/libxfs/xfs_metafile.c | 5 +++++ fs/xfs/xfs_icache.c | 5 ++++- fs/xfs/xfs_stats.c | 11 ++++++++--- fs/xfs/xfs_stats.h | 3 ++- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index a017016e9075..3794e5412eba 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -268,6 +268,10 @@ xfs_inode_from_disk( } if (xfs_is_reflink_inode(ip)) xfs_ifork_init_cow(ip); + if (xfs_is_metadir_inode(ip)) { + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); + XFS_STATS_INC(ip->i_mount, xs_inodes_meta); + } return 0; out_destroy_data_fork: diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c index cf239f862212..71f004e9dc64 100644 --- a/fs/xfs/libxfs/xfs_metafile.c +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -61,6 +61,9 @@ xfs_metafile_set_iflag( ip->i_diflags2 |= XFS_DIFLAG2_METADATA; ip->i_metatype = metafile_type; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); + XFS_STATS_INC(ip->i_mount, xs_inodes_meta); } /* Clear the metadata directory inode flag. */ @@ -74,6 +77,8 @@ xfs_metafile_clear_iflag( ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_STATS_INC(ip->i_mount, xs_inodes_active); + XFS_STATS_DEC(ip->i_mount, xs_inodes_meta); } /* diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f76c6decdaa3..f2d4294efd37 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -172,7 +172,10 @@ __xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list)); - XFS_STATS_DEC(ip->i_mount, xs_inodes_active); + if (xfs_is_metadir_inode(ip)) + XFS_STATS_DEC(ip->i_mount, xs_inodes_meta); + else + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index bc4a5d6dc795..c13d600732c9 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -59,7 +59,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "rtrefcntbt", xfsstats_offset(xs_qm_dqreclaims)}, /* we print both series of quota information together */ { "qm", xfsstats_offset(xs_gc_read_calls)}, - { "zoned", xfsstats_offset(__pad1)}, + { "zoned", xfsstats_offset(xs_inodes_meta)}, + { "metafile", xfsstats_offset(xs_xstrat_bytes)}, }; /* Loop over all stats groups */ @@ -99,16 +100,20 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) void xfs_stats_clearall(struct xfsstats __percpu *stats) { + uint32_t xs_inodes_active, xs_inodes_meta; int c; - uint32_t xs_inodes_active; xfs_notice(NULL, "Clearing xfsstats"); for_each_possible_cpu(c) { preempt_disable(); - /* save xs_inodes_active, it's a universal truth! */ + /* + * Save the active / meta inode counters, as they are stateful. + */ xs_inodes_active = per_cpu_ptr(stats, c)->s.xs_inodes_active; + xs_inodes_meta = per_cpu_ptr(stats, c)->s.xs_inodes_meta; memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); per_cpu_ptr(stats, c)->s.xs_inodes_active = xs_inodes_active; + per_cpu_ptr(stats, c)->s.xs_inodes_meta = xs_inodes_meta; preempt_enable(); } } diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 64bc0cc18126..57c32b86c358 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -142,7 +142,8 @@ struct __xfsstats { uint32_t xs_gc_read_calls; uint32_t xs_gc_write_calls; uint32_t xs_gc_zone_reset_calls; - uint32_t __pad1; +/* Metafile counters */ + uint32_t xs_inodes_meta; /* Extra precision counters */ uint64_t xs_xstrat_bytes; uint64_t xs_write_bytes; From cddfa648f1ab99e30e91455be19cd5ade26338c2 Mon Sep 17 00:00:00 2001 From: Ethan Tidmore Date: Thu, 19 Feb 2026 21:38:25 -0600 Subject: [PATCH 339/828] xfs: Fix error pointer dereference The function try_lookup_noperm() can return an error pointer and is not checked for one. Add checks for error pointer in xrep_adoption_check_dcache() and xrep_adoption_zap_dcache(). Detected by Smatch: fs/xfs/scrub/orphanage.c:449 xrep_adoption_check_dcache() error: 'd_child' dereferencing possible ERR_PTR() fs/xfs/scrub/orphanage.c:485 xrep_adoption_zap_dcache() error: 'd_child' dereferencing possible ERR_PTR() Fixes: 73597e3e42b4 ("xfs: ensure dentry consistency when the orphanage adopts a file") Cc: stable@vger.kernel.org # v6.16 Signed-off-by: Ethan Tidmore Reviewed-by: Darrick J. Wong Reviewed-by: Nirjhar Roy (IBM) Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/orphanage.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 52a108f6d5f4..33c6db6b4498 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -442,6 +442,11 @@ xrep_adoption_check_dcache( return 0; d_child = try_lookup_noperm(&qname, d_orphanage); + if (IS_ERR(d_child)) { + dput(d_orphanage); + return PTR_ERR(d_child); + } + if (d_child) { trace_xrep_adoption_check_child(sc->mp, d_child); @@ -479,7 +484,7 @@ xrep_adoption_zap_dcache( return; d_child = try_lookup_noperm(&qname, d_orphanage); - while (d_child != NULL) { + while (!IS_ERR_OR_NULL(d_child)) { trace_xrep_adoption_invalidate_child(sc->mp, d_child); ASSERT(d_is_negative(d_child)); From e764dd439d68cfc16724e469db390d779ab49521 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 15:25:35 -0800 Subject: [PATCH 340/828] xfs: fix copy-paste error in previous fix Chris Mason noticed that there is a copy-paste error in a recent change to xrep_dir_teardown that nulls out pointers after freeing the resources. Fixes: ba408d299a3bb3c ("xfs: only call xf{array,blob}_destroy if we have a valid pointer") Link: https://lore.kernel.org/linux-xfs/20260205194211.2307232-1-clm@meta.com/ Reported-by: Chris Mason Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/dir_repair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 9dc55c918c78..23b80c54aa60 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -177,7 +177,7 @@ xrep_dir_teardown( rd->dir_names = NULL; if (rd->dir_entries) xfarray_destroy(rd->dir_entries); - rd->dir_names = NULL; + rd->dir_entries = NULL; } /* Set up for a directory repair. */ From 161456987a1fe4ad73c3f36dec1f684316ac9bdd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 15:25:35 -0800 Subject: [PATCH 341/828] xfs: fix xfs_group release bug in xfs_verify_report_losses Chris Mason reports that his AI tools noticed that we were using xfs_perag_put and xfs_group_put to release the group reference returned by xfs_group_next_range. However, the iterator function returns an object with an active refcount, which means that we must use the correct function to release the active refcount, which is _rele. Fixes: b8accfd65d31f2 ("xfs: add media verification ioctl") Reported-by: Chris Mason Link: https://lore.kernel.org/linux-xfs/20260206030527.2506821-1-clm@meta.com/ Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_verify_media.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c index 069cd371619d..8bbd4ec567f8 100644 --- a/fs/xfs/xfs_verify_media.c +++ b/fs/xfs/xfs_verify_media.c @@ -122,7 +122,7 @@ xfs_verify_report_losses( error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); if (error) { - xfs_perag_put(pag); + xfs_perag_rele(pag); break; } @@ -158,7 +158,7 @@ xfs_verify_report_losses( if (rtg) xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); if (error) { - xfs_group_put(xg); + xfs_group_rele(xg); break; } } From eb8550fb75a875657dc29e3925a40244ec6b6bd6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 15:25:36 -0800 Subject: [PATCH 342/828] xfs: fix xfs_group release bug in xfs_dax_notify_dev_failure Chris Mason reports that his AI tools noticed that we were using xfs_perag_put and xfs_group_put to release the group reference returned by xfs_group_next_range. However, the iterator function returns an object with an active refcount, which means that we must use the correct function to release the active refcount, which is _rele. Cc: # v6.0 Fixes: 6f643c57d57c56 ("xfs: implement ->notify_failure() for XFS") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_notify_failure.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 6be19fa1ebe2..64c8afb935c2 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -304,7 +304,7 @@ xfs_dax_notify_dev_failure( error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); if (error) { - xfs_perag_put(pag); + xfs_perag_rele(pag); break; } @@ -340,7 +340,7 @@ xfs_dax_notify_dev_failure( if (rtg) xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); if (error) { - xfs_group_put(xg); + xfs_group_rele(xg); break; } } From 94014a23e91a3944947048169ccf38b4561cfd0c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 15:25:37 -0800 Subject: [PATCH 343/828] xfs: fix potential pointer access race in xfs_healthmon_get Pankaj Raghav asks about this code in xfs_healthmon_get: hm = mp->m_healthmon; if (hm && !refcount_inc_not_zero(&hm->ref)) hm = NULL; rcu_read_unlock(); return hm; (slightly edited to compress a mailing list thread) "Nit: Should we do a READ_ONCE(mp->m_healthmon) here to avoid any compiler tricks that can result in an undefined behaviour? I am not sure if I am being paranoid here. "So this is my understanding: RCU guarantees that we get a valid object (actual data of m_healthmon) but does not guarantee the compiler will not reread the pointer between checking if hm is !NULL and accessing the pointer as we are doing it lockless. "So just a barrier() call in rcu_read_lock is enough to make sure this doesn't happen and probably adding a READ_ONCE() is not needed?" After some initial confusion I concluded that he's correct. The compiler could very well eliminate the hm variable in favor of walking the pointers twice, turning the code into: if (mp->m_healthmon && !refcount_inc_not_zero(&mp->m_healthmon->ref)) If this happens, then xfs_healthmon_detach can sneak in between the two sides of the && expression and set mp->m_healthmon to NULL, and thereby cause a null pointer dereference crash. Fix this by using the rcu pointer assignment and dereference functions, which ensure that the proper reordering barriers are in place. Practically speaking, gcc seems to allocate an actual variable for hm and only reads mp->m_healthmon once (as intended), but we ought to be more explicit about requiring this. Reported-by: Pankaj Raghav Fixes: a48373e7d35a89f6f ("xfs: start creating infrastructure for health monitoring") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Pankaj Raghav Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_healthmon.c | 11 +++++++---- fs/xfs/xfs_mount.h | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index e37c18cec372..4a06d6632f65 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -69,7 +69,7 @@ xfs_healthmon_get( struct xfs_healthmon *hm; rcu_read_lock(); - hm = mp->m_healthmon; + hm = rcu_dereference(mp->m_healthmon); if (hm && !refcount_inc_not_zero(&hm->ref)) hm = NULL; rcu_read_unlock(); @@ -110,13 +110,13 @@ xfs_healthmon_attach( struct xfs_healthmon *hm) { spin_lock(&xfs_healthmon_lock); - if (mp->m_healthmon != NULL) { + if (rcu_access_pointer(mp->m_healthmon) != NULL) { spin_unlock(&xfs_healthmon_lock); return -EEXIST; } refcount_inc(&hm->ref); - mp->m_healthmon = hm; + rcu_assign_pointer(mp->m_healthmon, hm); hm->mount_cookie = (uintptr_t)mp->m_super; spin_unlock(&xfs_healthmon_lock); @@ -128,13 +128,16 @@ STATIC void xfs_healthmon_detach( struct xfs_healthmon *hm) { + struct xfs_mount *mp; + spin_lock(&xfs_healthmon_lock); if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) { spin_unlock(&xfs_healthmon_lock); return; } - XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL; + mp = XFS_M((struct super_block *)hm->mount_cookie); + rcu_assign_pointer(mp->m_healthmon, NULL); hm->mount_cookie = DETACHED_MOUNT_COOKIE; spin_unlock(&xfs_healthmon_lock); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 61c71128d171..ddd4028be8d6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -345,7 +345,7 @@ typedef struct xfs_mount { struct xfs_hooks m_dir_update_hooks; /* Private data referring to a health monitor object. */ - struct xfs_healthmon *m_healthmon; + struct xfs_healthmon __rcu *m_healthmon; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) From 75690e5fdd74fc4d2a4aec58be9a82aec7cee721 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 15:25:38 -0800 Subject: [PATCH 344/828] xfs: don't report metadata inodes to fserror Internal metadata inodes are not exposed to userspace programs, so it makes no sense to pass them to the fserror functions (aka fsnotify). Instead, report metadata file problems as general filesystem corruption. Fixes: 5eb4cb18e445d0 ("xfs: convey metadata health events to the health monitor") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_health.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 169123772cb3..6475159eb930 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -314,6 +314,18 @@ xfs_rgno_mark_sick( xfs_rtgroup_put(rtg); } +static inline void xfs_inode_report_fserror(struct xfs_inode *ip) +{ + /* Report metadata inodes as general filesystem corruption */ + if (xfs_is_internal_inode(ip)) { + fserror_report_metadata(ip->i_mount->m_super, -EFSCORRUPTED, + GFP_NOFS); + return; + } + + fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); +} + /* Mark the unhealthy parts of an inode. */ void xfs_inode_mark_sick( @@ -339,7 +351,7 @@ xfs_inode_mark_sick( inode_state_clear(VFS_I(ip), I_DONTCACHE); spin_unlock(&VFS_I(ip)->i_lock); - fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + xfs_inode_report_fserror(ip); if (mask) xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask, mask); @@ -371,7 +383,7 @@ xfs_inode_mark_corrupt( inode_state_clear(VFS_I(ip), I_DONTCACHE); spin_unlock(&VFS_I(ip)->i_lock); - fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + xfs_inode_report_fserror(ip); if (mask) xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask, mask); From 115ea07b94d2f13942fbd93c6acde376db36b16a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Feb 2026 15:25:38 -0800 Subject: [PATCH 345/828] xfs: don't report half-built inodes to fserror Sam Sun apparently found a syzbot way to fuzz a filesystem such that xfs_iget_cache_miss would free the inode before the fserror code could catch up. Frustratingly he doesn't use the syzbot dashboard so there's no C reproducer and not even a full error report, so I'm guessing that: Inodes that are being constructed or torn down inside XFS are not visible to the VFS. They should never be reported to fserror. Also, any inode that has been freshly allocated in _cache_miss should be marked INEW immediately because, well, it's an incompletely constructed inode that isn't yet visible to the VFS. Reported-by: Sam Sun Fixes: 5eb4cb18e445d0 ("xfs: convey metadata health events to the health monitor") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_health.c | 8 ++++++-- fs/xfs/xfs_icache.c | 9 ++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 6475159eb930..239b843e83d4 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -316,8 +316,12 @@ xfs_rgno_mark_sick( static inline void xfs_inode_report_fserror(struct xfs_inode *ip) { - /* Report metadata inodes as general filesystem corruption */ - if (xfs_is_internal_inode(ip)) { + /* + * Do not report inodes being constructed or freed, or metadata inodes, + * to fsnotify. + */ + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIM) || + xfs_is_internal_inode(ip)) { fserror_report_metadata(ip->i_mount->m_super, -EFSCORRUPTED, GFP_NOFS); return; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f2d4294efd37..a7a09e7eec81 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -639,6 +639,14 @@ xfs_iget_cache_miss( if (!ip) return -ENOMEM; + /* + * Set XFS_INEW as early as possible so that the health code won't pass + * the inode to the fserror code if the ondisk inode cannot be loaded. + * We're going to free the xfs_inode immediately if that happens, which + * would lead to UAF problems. + */ + xfs_iflags_set(ip, XFS_INEW); + error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags); if (error) goto out_destroy; @@ -716,7 +724,6 @@ xfs_iget_cache_miss( ip->i_udquot = NULL; ip->i_gdquot = NULL; ip->i_pdquot = NULL; - xfs_iflags_set(ip, XFS_INEW); /* insert the new inode */ spin_lock(&pag->pag_ici_lock); From 8baa9bccc0156d6952d337bf17f57ce15902dfe4 Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Fri, 20 Feb 2026 12:23:58 +0530 Subject: [PATCH 346/828] xfs: Fix xfs_last_rt_bmblock() Bug description: If the size of the last rtgroup i.e, the rtg passed to xfs_last_rt_bmblock() is such that the last rtextent falls in 0th word offset of a bmblock of the bitmap file tracking this (last) rtgroup, then in that case xfs_last_rt_bmblock() incorrectly returns the next bmblock number instead of the current/last used bmblock number. When xfs_last_rt_bmblock() incorrectly returns the next bmblock, the loop to grow/modify the bmblocks in xfs_growfs_rtg() doesn't execute and xfs_growfs basically does a nop in certain cases. xfs_growfs will do a nop when the new size of the fs will have the same number of rtgroups i.e, we are only growing the last rtgroup. Reproduce: $ mkfs.xfs -m metadir=0 -r rtdev=/dev/loop1 /dev/loop0 \ -r size=32769b -f $ mount -o rtdev=/dev/loop1 /dev/loop0 /mnt/scratch $ xfs_growfs -R $(( 32769 + 1 )) /mnt/scratch $ xfs_info /mnt/scratch | grep rtextents $ # We can see that rtextents hasn't changed Fix: Fix this by returning the current/last used bmblock when the last rtgroup size is not a multiple xfs_rtbitmap_rtx_per_rbmblock() and the next bmblock when the rtgroup size is a multiple of xfs_rtbitmap_rtx_per_rbmblock() i.e, the existing blocks are completely used up. Also, I have renamed xfs_last_rt_bmblock() to xfs_last_rt_bmblock_to_extend() to signify that this function returns the bmblock number to extend and NOT always the last used bmblock number. Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Nirjhar Roy (IBM) Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_rtalloc.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index aab59f66384e..ae53ba2093b2 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1082,17 +1082,27 @@ xfs_last_rtgroup_extents( } /* - * Calculate the last rbmblock currently used. + * This will return the bitmap block number (indexed at 0) that will be + * extended/modified. There are 2 cases here: + * 1. The size of the rtg is such that it is a multiple of + * xfs_rtbitmap_rtx_per_rbmblock() i.e, an integral number of bitmap blocks + * are completely filled up. In this case, we should return + * 1 + (the last used bitmap block number). + * 2. The size of the rtg is not an multiple of xfs_rtbitmap_rtx_per_rbmblock(). + * Here we will return the block number of last used block number. In this + * case, we will modify the last used bitmap block to extend the size of the + * rtgroup. * * This also deals with the case where there were no rtextents before. */ static xfs_fileoff_t -xfs_last_rt_bmblock( +xfs_last_rt_bmblock_to_extend( struct xfs_rtgroup *rtg) { struct xfs_mount *mp = rtg_mount(rtg); xfs_rgnumber_t rgno = rtg_rgno(rtg); xfs_fileoff_t bmbno = 0; + unsigned int mod = 0; ASSERT(!mp->m_sb.sb_rgcount || rgno >= mp->m_sb.sb_rgcount - 1); @@ -1100,9 +1110,16 @@ xfs_last_rt_bmblock( xfs_rtxnum_t nrext = xfs_last_rtgroup_extents(mp); /* Also fill up the previous block if not entirely full. */ - bmbno = xfs_rtbitmap_blockcount_len(mp, nrext); - if (xfs_rtx_to_rbmword(mp, nrext) != 0) - bmbno--; + /* We are doing a -1 to convert it to a 0 based index */ + bmbno = xfs_rtbitmap_blockcount_len(mp, nrext) - 1; + div_u64_rem(nrext, xfs_rtbitmap_rtx_per_rbmblock(mp), &mod); + /* + * mod = 0 means that all the current blocks are full. So + * return the next block number to be used for the rtgroup + * growth. + */ + if (mod == 0) + bmbno++; } return bmbno; @@ -1207,7 +1224,8 @@ xfs_growfs_rtg( goto out_rele; } - for (bmbno = xfs_last_rt_bmblock(rtg); bmbno < bmblocks; bmbno++) { + for (bmbno = xfs_last_rt_bmblock_to_extend(rtg); bmbno < bmblocks; + bmbno++) { error = xfs_growfs_rt_bmblock(rtg, nrblocks, rextsize, bmbno); if (error) goto out_error; From ac1d977096a17d56c55bd7f90be48e81ac4cec3f Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Fri, 20 Feb 2026 12:23:59 +0530 Subject: [PATCH 347/828] xfs: Add a comment in xfs_log_sb() Add a comment explaining why the sb_frextents are updated outside the if (xfs_has_lazycount(mp) check even though it is a lazycounter. RT groups are supported only in v5 filesystems which always have lazycounter enabled - so putting it inside the if(xfs_has_lazycount(mp) check is redundant. Suggested-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Nirjhar Roy (IBM) Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_sb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 38d16fe1f6d8..47322adb7690 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1347,6 +1347,9 @@ xfs_log_sb( * feature was introduced. This counter can go negative due to the way * we handle nearly-lockless reservations, so we must use the _positive * variant here to avoid writing out nonsense frextents. + * + * RT groups are only supported on v5 file systems, which always + * have lazy SB counters. */ if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) { mp->m_sb.sb_frextents = From c2368fc89a684be2900daaa2bbf68cbc147e8d3d Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Fri, 20 Feb 2026 12:24:00 +0530 Subject: [PATCH 348/828] xfs: Update lazy counters in xfs_growfs_rt_bmblock() Update lazy counters in xfs_growfs_rt_bmblock() similar to the way it is done xfs_growfs_data_private(). This is because the lazy counters are not always updated and synching the counters will avoid inconsistencies between frexents and rtextents(total realtime extent count). This will be more useful once realtime shrink is implemented as this will prevent some transient state to occur where frexents might be greater than total rtextents. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Nirjhar Roy (IBM) Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_rtalloc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ae53ba2093b2..153f3c378f9f 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1050,6 +1050,15 @@ xfs_growfs_rt_bmblock( */ xfs_trans_resv_calc(mp, &mp->m_resv); + /* + * Sync sb counters now to reflect the updated values. Lazy counters are + * not always updated and in order to avoid inconsistencies between + * frextents and rtextents, it is better to sync the counters. + */ + + if (xfs_has_lazysbcount(mp)) + xfs_log_sb(args.tp); + error = xfs_trans_commit(args.tp); if (error) goto out_free; From 9a654a8fa3191e9ea32c4494b943c0872a3f5d27 Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Fri, 20 Feb 2026 12:24:01 +0530 Subject: [PATCH 349/828] xfs: Add comments for usages of some macros. Add comments explaining when to use XFS_IS_CORRUPT() and ASSERT() Suggested-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Nirjhar Roy (IBM) Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_platform.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/xfs/xfs_platform.h b/fs/xfs/xfs_platform.h index 1e59bf94d1f2..59a33c60e0ca 100644 --- a/fs/xfs/xfs_platform.h +++ b/fs/xfs/xfs_platform.h @@ -235,6 +235,10 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, #ifdef XFS_WARN +/* + * Please note that this ASSERT doesn't kill the kernel. It will if the kernel + * has panic_on_warn set. + */ #define ASSERT(expr) \ (likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__)) @@ -245,6 +249,11 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, #endif /* XFS_WARN */ #endif /* DEBUG */ +/* + * Use this to catch metadata corruptions that are not caught by block or + * structure verifiers. The reason is that the verifiers check corruptions only + * within the scope of the object being verified. + */ #define XFS_IS_CORRUPT(mp, expr) \ (unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \ NULL, 0, __FILE__, __LINE__, \ From e97cbf863d8918452c9f81bebdade8d04e2e7b60 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Wed, 11 Feb 2026 13:29:02 +1000 Subject: [PATCH 350/828] xfs: remove duplicate static size checks In libxfs/xfs_ondisk.h, remove some duplicate entries of XFS_CHECK_STRUCT_SIZE(). Signed-off-by: Wilfred Mallawa Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_ondisk.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 70605019383c..7bccfa7b695c 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -136,16 +136,7 @@ xfs_check_ondisk_structs(void) /* ondisk dir/attr structures from xfs/122 */ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_entry, 3); XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_unused, 6); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10); /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); From 650b774cf94495465d6a38c31bb1a6ce697b6b37 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Wed, 11 Feb 2026 13:29:04 +1000 Subject: [PATCH 351/828] xfs: add static size checks for ioctl UABI The ioctl structures in libxfs/xfs_fs.h are missing static size checks. It is useful to have static size checks for these structures as adding new fields to them could cause issues (e.g. extra padding that may be inserted by the compiler). So add these checks to xfs/xfs_ondisk.h. Due to different padding/alignment requirements across different architectures, to avoid build failures, some structures are ommited from the size checks. For example, structures with "compat_" definitions in xfs/xfs_ioctl32.h are ommited. Signed-off-by: Wilfred Mallawa Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_ondisk.h | 39 +++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 7bccfa7b695c..23cde1248f01 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -208,11 +208,6 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); - XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); - XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); - XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); - /* * Make sure the incore inode timestamp range corresponds to hand * converted values based on the ondisk format specification. @@ -292,6 +287,40 @@ xfs_check_ondisk_structs(void) XFS_CHECK_SB_OFFSET(sb_pad, 281); XFS_CHECK_SB_OFFSET(sb_rtstart, 288); XFS_CHECK_SB_OFFSET(sb_rtreserved, 296); + + /* + * ioctl UABI + * + * Due to different padding/alignment requirements across + * different architectures, some structures are ommited from + * the size checks. In addition, structures with architecture + * dependent size fields are also ommited (e.g. __kernel_long_t). + */ + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); + XFS_CHECK_STRUCT_SIZE(struct dioattr, 12); + XFS_CHECK_STRUCT_SIZE(struct getbmap, 32); + XFS_CHECK_STRUCT_SIZE(struct getbmapx, 48); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist_cursor, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist_ent, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_ag_geometry, 128); + XFS_CHECK_STRUCT_SIZE(struct xfs_rtgroup_geometry, 128); + XFS_CHECK_STRUCT_SIZE(struct xfs_error_injection, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_geom, 256); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_geom_v4, 112); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_counts, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_resblks, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_growfs_log, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulk_ireq, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_fs_eofblocks, 128); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsid, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_metadata, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_vec, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_vec_head, 40); } #endif /* __XFS_ONDISK_H */ From 6b050482ec40569429d963ac52afa878691b04c9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 24 Feb 2026 16:17:52 -0800 Subject: [PATCH 352/828] cpufreq: intel_pstate: Fix crash during turbo disable When the system is booted with kernel command line argument "nosmt" or "maxcpus" to limit the number of CPUs, disabling turbo via: echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo results in a crash: PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI ... RIP: 0010:store_no_turbo+0x100/0x1f0 ... This occurs because for_each_possible_cpu() returns CPUs even if they are not online. For those CPUs, all_cpu_data[] will be NULL. Since commit 973207ae3d7c ("cpufreq: intel_pstate: Rearrange max frequency updates handling code"), all_cpu_data[] is dereferenced even for CPUs which are not online, causing the NULL pointer dereference. To fix that, pass CPU number to intel_pstate_update_max_freq() and use all_cpu_data[] for those CPUs for which there is a valid cpufreq policy. Fixes: 973207ae3d7c ("cpufreq: intel_pstate: Rearrange max frequency updates handling code") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221068 Signed-off-by: Srinivas Pandruvada Cc: 6.16+ # 6.16+ Link: https://patch.msgid.link/20260225001752.890164-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bdc37080d319..11c58af41900 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1476,13 +1476,13 @@ static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, refresh_frequency_limits(policy); } -static bool intel_pstate_update_max_freq(struct cpudata *cpudata) +static bool intel_pstate_update_max_freq(int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) return false; - __intel_pstate_update_max_freq(policy, cpudata); + __intel_pstate_update_max_freq(policy, all_cpu_data[cpu]); return true; } @@ -1501,7 +1501,7 @@ static void intel_pstate_update_limits_for_all(void) int cpu; for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(all_cpu_data[cpu]); + intel_pstate_update_max_freq(cpu); mutex_lock(&hybrid_capacity_lock); @@ -1910,7 +1910,7 @@ static void intel_pstate_notify_work(struct work_struct *work) struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); - if (intel_pstate_update_max_freq(cpudata)) { + if (intel_pstate_update_max_freq(cpudata->cpu)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver From c9bc1753b3cc41d0e01fbca7f035258b5f4db0ae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 Feb 2026 13:29:09 +0100 Subject: [PATCH 353/828] perf: Fix __perf_event_overflow() vs perf_remove_from_context() race Make sure that __perf_event_overflow() runs with IRQs disabled for all possible callchains. Specifically the software events can end up running it with only preemption disabled. This opens up a race vs perf_event_exit_event() and friends that will go and free various things the overflow path expects to be present, like the BPF program. Fixes: 592903cdcbf6 ("perf_counter: add an event_list") Reported-by: Simond Hu Signed-off-by: Peter Zijlstra (Intel) Tested-by: Simond Hu Link: https://patch.msgid.link/20260224122909.GV1395416@noisy.programming.kicks-ass.net --- kernel/events/core.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 22a0f405585b..1f5699b339ec 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10777,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { + /* + * Entry point from hardware PMI, interrupts should be disabled here. + * This serializes us against perf_event_remove_from_context() in + * things like perf_event_release_kernel(). + */ + lockdep_assert_irqs_disabled(); + return __perf_event_overflow(event, 1, data, regs); } @@ -10853,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, { struct hw_perf_event *hwc = &event->hw; + /* + * This is: + * - software preempt + * - tracepoint preempt + * - tp_target_task irq (ctx->lock) + * - uprobes preempt/irq + * - kprobes preempt/irq + * - hw_breakpoint irq + * + * Any of these are sufficient to hold off RCU and thus ensure @event + * exists. + */ + lockdep_assert_preemption_disabled(); local64_add(nr, &event->count); if (!regs) @@ -10861,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, if (!is_sampling_event(event)) return; + /* + * Serialize against event_function_call() IPIs like normal overflow + * event handling. Specifically, must not allow + * perf_event_release_kernel() -> perf_remove_from_context() to make + * progress and 'release' the event from under us. + */ + guard(irqsave)(); + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) { data->period = nr; return perf_swevent_overflow(event, 1, data, regs); @@ -11359,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct perf_sample_data data; struct perf_event *event; + /* + * Per being a tracepoint, this runs with preemption disabled. + */ + lockdep_assert_preemption_disabled(); + struct perf_raw_record raw = { .frag = { .size = entry_size, @@ -11691,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data) struct perf_sample_data sample; struct pt_regs *regs = data; + /* + * Exception context, will have interrupts disabled. + */ + lockdep_assert_irqs_disabled(); + perf_sample_data_init(&sample, bp->attr.bp_addr, 0); if (!bp->hw.state && !perf_exclude_event(bp, regs)) @@ -12155,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) if (regs && !perf_exclude_event(event, regs)) { if (!(event->attr.exclude_idle && is_idle_task(current))) - if (__perf_event_overflow(event, 1, &data, regs)) + if (perf_event_overflow(event, &data, regs)) ret = HRTIMER_NORESTART; } From ab6088e7a95943af3452b20e3b96caaaef3eeebd Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Feb 2026 16:16:30 -0700 Subject: [PATCH 354/828] lib/Kconfig.debug: Require a release version of LLVM 22 for context analysis Using a prerelease version as a minimum supported version for CONFIG_WARN_CONTEXT_ANALYSIS was reasonable to do while LLVM 22 was the development version so that people could immediately build from main and start testing and validating this in their own code. However, it can be problematic when using prerelease versions of LLVM 22, such as Android clang 22.0.1 (the current android mainline compiler) or when bisecting LLVM between llvmorg-22-init and llvmorg-23-init, to build the kernel, as all compiler fixes for the context analysis may not be present, potentially resulting in warnings that can easily turn into errors. Now that LLVM 22 is released as 22.1.0, upgrade the check to require at least this version to ensure that a user's toolchain actually has all the changes needed for a smooth experience with context analysis. Fixes: 3269701cb256 ("compiler-context-analysis: Add infrastructure for Context Analysis with Clang") Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://patch.msgid.link/20260224-bump-clang-ver-context-analysis-v1-1-16cc7a90a040@kernel.org --- lib/Kconfig.debug | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4e2dfbbd3d78..8e2b858078e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -630,7 +630,7 @@ config DEBUG_FORCE_WEAK_PER_CPU config WARN_CONTEXT_ANALYSIS bool "Compiler context-analysis warnings" - depends on CC_IS_CLANG && CLANG_VERSION >= 220000 + depends on CC_IS_CLANG && CLANG_VERSION >= 220100 # Branch profiling re-defines "if", which messes with the compiler's # ability to analyze __cond_acquires(..), resulting in false positives. depends on !TRACE_BRANCH_PROFILING @@ -641,7 +641,7 @@ config WARN_CONTEXT_ANALYSIS and releasing user-definable "context locks". Clang's name of the feature is "Thread Safety Analysis". Requires - Clang 22 or later. + Clang 22.1.0 or later. Produces warnings by default. Select CONFIG_WERROR if you wish to turn these warnings into errors. From e9fb2028f1eb563e653cff3b0d1c87c5e0203d45 Mon Sep 17 00:00:00 2001 From: Panagiotis Foliadis Date: Wed, 25 Feb 2026 14:53:43 +0000 Subject: [PATCH 355/828] ALSA: hda/intel: increase default bdl_pos_adj for Nvidia controllers The default bdl_pos_adj of 32 for Nvidia HDA controllers is insufficient on GA102 (and likely other recent Nvidia GPUs) after S3 suspend/resume. The controller's DMA timing degrades after resume, causing premature IRQ detection in azx_position_ok() which results in silent HDMI/DP audio output despite userspace reporting a valid playback state and correct ELD data. Increase bdl_pos_adj to 64 for AZX_DRIVER_NVIDIA, matching the value already used by Intel Apollo Lake for the same class of timing issue. Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221069 Suggested-by: Charalampos Mitrodimas Signed-off-by: Panagiotis Foliadis Link: https://patch.msgid.link/20260225-nvidia-audio-fix-v1-1-b1383c37ec49@posteo.net Signed-off-by: Takashi Iwai --- sound/hda/controllers/intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 6fddf400c4a3..3f434994c18d 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -1751,6 +1751,8 @@ static int default_bdl_pos_adj(struct azx *chip) return 1; case AZX_DRIVER_ZHAOXINHDMI: return 128; + case AZX_DRIVER_NVIDIA: + return 64; default: return 32; } From 85f6c439a69afe4fa8a688512e586971e97e273a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 25 Feb 2026 10:35:57 +0000 Subject: [PATCH 356/828] io_uring/timeout: READ_ONCE sqe->addr We should use READ_ONCE when reading from a SQE, make sure timeout gets a stable timespec address. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/timeout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 84dda24f3eb2..cb61d4862fc6 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -462,7 +462,7 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) tr->ltimeout = true; if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + if (get_timespec64(&tr->ts, u64_to_user_ptr(READ_ONCE(sqe->addr2)))) return -EFAULT; if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) return -EINVAL; @@ -557,7 +557,7 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags; - if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) + if (get_timespec64(&data->ts, u64_to_user_ptr(READ_ONCE(sqe->addr)))) return -EFAULT; if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) From 0d785e2c324c90662baa4fe07a0d02233ff92824 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:04 +0100 Subject: [PATCH 357/828] s390/idle: Fix cpu idle exit cpu time accounting With the conversion to generic entry [1] cpu idle exit cpu time accounting was converted from assembly to C. This introduced an reversed order of cpu time accounting. On cpu idle exit the current accounting happens with the following call chain: -> do_io_irq()/do_ext_irq() -> irq_enter_rcu() -> account_hardirq_enter() -> vtime_account_irq() -> vtime_account_kernel() vtime_account_kernel() accounts the passed cpu time since last_update_timer as system time, and updates last_update_timer to the current cpu timer value. However the subsequent call of -> account_idle_time_irq() will incorrectly subtract passed cpu time from timer_idle_enter to the updated last_update_timer value from system_timer. Then last_update_timer is updated to a sys_enter_timer, which means that last_update_timer goes back in time. Subsequently account_hardirq_exit() will account too much cpu time as hardirq time. The sum of all accounted cpu times is still correct, however some cpu time which was previously accounted as system time is now accounted as hardirq time, plus there is the oddity that last_update_timer goes back in time. Restore previous behavior by extracting cpu time accounting code from account_idle_time_irq() into a new update_timer_idle() function and call it before irq_enter_rcu(). Fixes: 56e62a737028 ("s390: convert to generic entry") [1] Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/idle.h | 1 + arch/s390/kernel/idle.c | 13 +++++++++---- arch/s390/kernel/irq.c | 10 ++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 09f763b9eb40..133059d9a949 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -23,5 +23,6 @@ extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); +void update_timer_idle(void); #endif /* _S390_IDLE_H */ diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 39cb8d0ae348..0f9e53f0a068 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -21,11 +21,10 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); -void account_idle_time_irq(void) +void update_timer_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); struct lowcore *lc = get_lowcore(); - unsigned long idle_time; u64 cycles_new[8]; int i; @@ -35,13 +34,19 @@ void account_idle_time_irq(void) this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } - idle_time = lc->int_clock - idle->clock_idle_enter; - lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; lc->last_update_clock = lc->int_clock; lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; lc->last_update_timer = lc->sys_enter_timer; +} + +void account_idle_time_irq(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + unsigned long idle_time; + + idle_time = get_lowcore()->int_clock - idle->clock_idle_enter; /* Account time spent with enabled wait psw loaded as idle time. */ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f81723bc8856..d10a17e6531d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -146,6 +146,10 @@ void noinstr do_io_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); bool from_idle; + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); + if (from_idle) + update_timer_idle(); + irq_enter_rcu(); if (user_mode(regs)) { @@ -154,7 +158,6 @@ void noinstr do_io_irq(struct pt_regs *regs) current->thread.last_break = regs->last_break; } - from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) account_idle_time_irq(); @@ -182,6 +185,10 @@ void noinstr do_ext_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); bool from_idle; + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); + if (from_idle) + update_timer_idle(); + irq_enter_rcu(); if (user_mode(regs)) { @@ -194,7 +201,6 @@ void noinstr do_ext_irq(struct pt_regs *regs) regs->int_parm = get_lowcore()->ext_params; regs->int_parm_long = get_lowcore()->ext_params2; - from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) account_idle_time_irq(); From dbc0fb35679ed5d0adecf7d02137ac2c77244b3b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:05 +0100 Subject: [PATCH 358/828] s390/vtime: Fix virtual timer forwarding Since delayed accounting of system time [1] the virtual timer is forwarded by do_account_vtime() but also vtime_account_kernel(), vtime_account_softirq(), and vtime_account_hardirq(). This leads to double accounting of system, guest, softirq, and hardirq time. Remove accounting from the vtime_account*() family to restore old behavior. There is only one user of the vtimer interface, which might explain why nobody noticed this so far. Fixes: b7394a5f4ce9 ("sched/cputime, s390: Implement delayed accounting of system time") [1] Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vtime.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 234a0ba30510..122d30b10440 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -225,10 +225,6 @@ static u64 vtime_delta(void) return timer - lc->last_update_timer; } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ void vtime_account_kernel(struct task_struct *tsk) { struct lowcore *lc = get_lowcore(); @@ -238,27 +234,17 @@ void vtime_account_kernel(struct task_struct *tsk) lc->guest_timer += delta; else lc->system_timer += delta; - - virt_timer_forward(delta); } EXPORT_SYMBOL_GPL(vtime_account_kernel); void vtime_account_softirq(struct task_struct *tsk) { - u64 delta = vtime_delta(); - - get_lowcore()->softirq_timer += delta; - - virt_timer_forward(delta); + get_lowcore()->softirq_timer += vtime_delta(); } void vtime_account_hardirq(struct task_struct *tsk) { - u64 delta = vtime_delta(); - - get_lowcore()->hardirq_timer += delta; - - virt_timer_forward(delta); + get_lowcore()->hardirq_timer += vtime_delta(); } /* From aefa6ec890f0d39a89fc80e95908ec1996337eee Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:06 +0100 Subject: [PATCH 359/828] s390/idle: Add comment for non obvious code Add a comment to update_timer_idle() which describes why wall time (not steal time) is added to steal_timer. This is not obvious and was reported by Frederic Weisbecker. Reported-by: Frederic Weisbecker Closes: https://lore.kernel.org/all/aXEVM-04lj0lntMr@localhost.localdomain/ Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/idle.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 0f9e53f0a068..4e09f126d4fc 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -34,6 +34,15 @@ void update_timer_idle(void) this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } + /* + * This is a bit subtle: Forward last_update_clock so it excludes idle + * time. For correct steal time calculation in do_account_vtime() add + * passed wall time before idle_enter to steal_timer: + * During the passed wall time before idle_enter CPU time may have + * been accounted to system, hardirq, softirq, etc. lowcore fields. + * The accounted CPU times will be subtracted again from steal_timer + * when accumulated steal time is calculated in do_account_vtime(). + */ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; lc->last_update_clock = lc->int_clock; From 00d8b035eb71262abb547d47717b19f4a55ad4f2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:07 +0100 Subject: [PATCH 360/828] s390/idle: Slightly optimize idle time accounting Slightly optimize account_idle_time_irq() and update_timer_idle(): - Use fast single instruction __atomic64() primitives to update per cpu idle_time and idle_count, instead of READ_ONCE() / WRITE_ONCE() pairs - stcctm() is an inline assembly with a full memory barrier. This leads to a not necessary extra dereference of smp_cpu_mtid in update_timer_idle(). Avoid this and read smp_cpu_mtid into a variable - Use __this_cpu_add() instead of this_cpu_add() to avoid disabling / enabling of preemption several times in a loop in update_timer_idle(). Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/idle.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 4e09f126d4fc..627d82dd900e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -26,12 +26,13 @@ void update_timer_idle(void) struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); struct lowcore *lc = get_lowcore(); u64 cycles_new[8]; - int i; + int i, mtid; - if (smp_cpu_mtid) { - stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); - for (i = 0; i < smp_cpu_mtid; i++) - this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + mtid = smp_cpu_mtid; + if (mtid) { + stcctm(MT_DIAG, mtid, cycles_new); + for (i = 0; i < mtid; i++) + __this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } /* @@ -58,8 +59,8 @@ void account_idle_time_irq(void) idle_time = get_lowcore()->int_clock - idle->clock_idle_enter; /* Account time spent with enabled wait psw loaded as idle time. */ - WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); - WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1); + __atomic64_add(idle_time, &idle->idle_time); + __atomic64_add_const(1, &idle->idle_count); account_idle_time(cputime_to_nsecs(idle_time)); } From 257c14e5a1d8de40fded80fd8b525af55a6ded26 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:08 +0100 Subject: [PATCH 361/828] s390/idle: Inline update_timer_idle() Inline update_timer_idle() again to avoid an extra function call. This way the generated code is close to old assembler version again. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/idle.h | 3 ++- arch/s390/include/asm/vtime.h | 34 ++++++++++++++++++++++++++++++++++ arch/s390/kernel/entry.h | 2 -- arch/s390/kernel/idle.c | 34 ++-------------------------------- 4 files changed, 38 insertions(+), 35 deletions(-) diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 133059d9a949..8c5aa7329461 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -19,10 +19,11 @@ struct s390_idle_data { unsigned long mt_cycles_enter[8]; }; +DECLARE_PER_CPU(struct s390_idle_data, s390_idle); + extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); -void update_timer_idle(void); #endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index 9d25fb35a042..b1db75d14e9d 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -2,6 +2,12 @@ #ifndef _S390_VTIME_H #define _S390_VTIME_H +#include +#include +#include + +DECLARE_PER_CPU(u64, mt_cycles[8]); + static inline void update_timer_sys(void) { struct lowcore *lc = get_lowcore(); @@ -20,4 +26,32 @@ static inline void update_timer_mcck(void) lc->last_update_timer = lc->mcck_enter_timer; } +static inline void update_timer_idle(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + struct lowcore *lc = get_lowcore(); + u64 cycles_new[8]; + int i, mtid; + + mtid = smp_cpu_mtid; + if (mtid) { + stcctm(MT_DIAG, mtid, cycles_new); + for (i = 0; i < mtid; i++) + __this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + } + /* + * This is a bit subtle: Forward last_update_clock so it excludes idle + * time. For correct steal time calculation in do_account_vtime() add + * passed wall time before idle_enter to steal_timer: + * During the passed wall time before idle_enter CPU time may have + * been accounted to system, hardirq, softirq, etc. lowcore fields. + * The accounted CPU times will be subtracted again from steal_timer + * when accumulated steal time is calculated in do_account_vtime(). + */ + lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; + lc->last_update_clock = lc->int_clock; + lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; + lc->last_update_timer = lc->sys_enter_timer; +} + #endif /* _S390_VTIME_H */ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index dd55cc6bbc28..fb67b4abe68c 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -56,8 +56,6 @@ long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags); -DECLARE_PER_CPU(u64, mt_cycles[8]); - unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 627d82dd900e..1f1b06b6b4ef 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -15,41 +15,11 @@ #include #include #include +#include #include #include -#include "entry.h" -static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); - -void update_timer_idle(void) -{ - struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); - struct lowcore *lc = get_lowcore(); - u64 cycles_new[8]; - int i, mtid; - - mtid = smp_cpu_mtid; - if (mtid) { - stcctm(MT_DIAG, mtid, cycles_new); - for (i = 0; i < mtid; i++) - __this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); - } - - /* - * This is a bit subtle: Forward last_update_clock so it excludes idle - * time. For correct steal time calculation in do_account_vtime() add - * passed wall time before idle_enter to steal_timer: - * During the passed wall time before idle_enter CPU time may have - * been accounted to system, hardirq, softirq, etc. lowcore fields. - * The accounted CPU times will be subtracted again from steal_timer - * when accumulated steal time is calculated in do_account_vtime(). - */ - lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; - lc->last_update_clock = lc->int_clock; - - lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; - lc->last_update_timer = lc->sys_enter_timer; -} +DEFINE_PER_CPU(struct s390_idle_data, s390_idle); void account_idle_time_irq(void) { From d8b5cf9c63143fae54a734c41e3bb55cf3f365c7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:09 +0100 Subject: [PATCH 362/828] s390/irq/idle: Remove psw bits early Remove wait, io, external interrupt bits early in do_io_irq()/do_ext_irq() when previous context was idle. This saves one conditional branch and is closer to the original old assembly code. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/irq.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index d10a17e6531d..7fdf960191d3 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -147,8 +147,10 @@ void noinstr do_io_irq(struct pt_regs *regs) bool from_idle; from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); - if (from_idle) + if (from_idle) { update_timer_idle(); + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); + } irq_enter_rcu(); @@ -174,9 +176,6 @@ void noinstr do_io_irq(struct pt_regs *regs) set_irq_regs(old_regs); irqentry_exit(regs, state); - - if (from_idle) - regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } void noinstr do_ext_irq(struct pt_regs *regs) @@ -186,8 +185,10 @@ void noinstr do_ext_irq(struct pt_regs *regs) bool from_idle; from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); - if (from_idle) + if (from_idle) { update_timer_idle(); + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); + } irq_enter_rcu(); @@ -209,9 +210,6 @@ void noinstr do_ext_irq(struct pt_regs *regs) irq_exit_rcu(); set_irq_regs(old_regs); irqentry_exit(regs, state); - - if (from_idle) - regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } static void show_msi_interrupt(struct seq_file *p, int irq) From e282ccd308de7b2a65eb4596da8030c18b543071 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:10 +0100 Subject: [PATCH 363/828] s390/vtime: Use __this_cpu_read() / get rid of READ_ONCE() do_account_vtime() runs always with interrupts disabled, therefore use __this_cpu_read() instead of this_cpu_read() to get rid of a pointless preempt_disable() / preempt_enable() pair. Also there are no concurrent writers to the cpu time accounting fields in lowcore. Therefore get rid of READ_ONCE() usages. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vtime.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 122d30b10440..4111ff4d727c 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -137,23 +137,16 @@ static int do_account_vtime(struct task_struct *tsk) lc->system_timer += timer; /* Update MT utilization calculation */ - if (smp_cpu_mtid && - time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) + if (smp_cpu_mtid && time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) update_mt_scaling(); /* Calculate cputime delta */ - user = update_tsk_timer(&tsk->thread.user_timer, - READ_ONCE(lc->user_timer)); - guest = update_tsk_timer(&tsk->thread.guest_timer, - READ_ONCE(lc->guest_timer)); - system = update_tsk_timer(&tsk->thread.system_timer, - READ_ONCE(lc->system_timer)); - hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, - READ_ONCE(lc->hardirq_timer)); - softirq = update_tsk_timer(&tsk->thread.softirq_timer, - READ_ONCE(lc->softirq_timer)); - lc->steal_timer += - clock - user - guest - system - hardirq - softirq; + user = update_tsk_timer(&tsk->thread.user_timer, lc->user_timer); + guest = update_tsk_timer(&tsk->thread.guest_timer, lc->guest_timer); + system = update_tsk_timer(&tsk->thread.system_timer, lc->system_timer); + hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, lc->hardirq_timer); + softirq = update_tsk_timer(&tsk->thread.softirq_timer, lc->softirq_timer); + lc->steal_timer += clock - user - guest - system - hardirq - softirq; /* Push account value */ if (user) { From 80f63fd09efe9d9c016fd6378116eb7394edd2f2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:11 +0100 Subject: [PATCH 364/828] s390/vtime: Use lockdep_assert_irqs_disabled() instead of BUG_ON() Use lockdep_assert_irqs_disabled() instead of BUG_ON(). This avoids crashing the kernel, and generates better code if CONFIG_PROVE_LOCKING is disabled. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vtime.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4111ff4d727c..bf48744d0912 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -48,8 +48,7 @@ static inline void set_vtimer(u64 expires) static inline int virt_timer_forward(u64 elapsed) { - BUG_ON(!irqs_disabled()); - + lockdep_assert_irqs_disabled(); if (list_empty(&virt_timer_list)) return 0; elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed); From acbc0437ca3cf6c19005476a4f1f5a7c96ff4256 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Feb 2026 15:20:12 +0100 Subject: [PATCH 365/828] s390/idle: Remove psw_idle() prototype psw_idle() does not exist anymore. Remove its prototype. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/idle.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 8c5aa7329461..32536ee34aa0 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -24,6 +24,4 @@ DECLARE_PER_CPU(struct s390_idle_data, s390_idle); extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; -void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); - #endif /* _S390_IDLE_H */ From 1623a554c68f352c17d0a358bc62580dc187f06b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 23 Feb 2026 23:33:52 +0100 Subject: [PATCH 366/828] s390/kexec: Disable stack protector in s390_reset_system() s390_reset_system() calls set_prefix(0), which switches back to the absolute lowcore. At that point the stack protector canary no longer matches the canary from the lowcore the function was entered with, so the stack check fails. Mark s390_reset_system() __no_stack_protector. This is safe here since its callers (__do_machine_kdump() and __do_machine_kexec()) are effectively no-return and fall back to disabled_wait() on failure. Fixes: f5730d44e05e ("s390: Add stackprotector support") Reported-by: Nikita Dubrovskii Reviewed-by: Heiko Carstens Acked-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index dcdc7e274848..049c557c452f 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2377,7 +2377,7 @@ void __init setup_ipl(void) atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb); } -void s390_reset_system(void) +void __no_stack_protector s390_reset_system(void) { /* Disable prefixing */ set_prefix(0); From d879ac6756b662a085a743e76023c768c3241579 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 24 Feb 2026 07:41:07 +0100 Subject: [PATCH 367/828] s390/pfault: Fix virtual vs physical address confusion When Linux is running as guest, runs a user space process and the user space process accesses a page that the host has paged out, the guest gets a pfault interrupt and schedules a different process. Without this mechanism the host would have to suspend the whole virtual CPU until the page has been paged in. To setup the pfault interrupt the real address of parameter list should be passed to DIAGNOSE 0x258, but a virtual address is passed instead. That has a performance impact, since the pfault setup never succeeds, the interrupt is never delivered to a guest and the whole virtual CPU is suspended as result. Cc: stable@vger.kernel.org Fixes: c98d2ecae08f ("s390/mm: Uncouple physical vs virtual address spaces") Reported-by: Claudio Imbrenda Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/mm/pfault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 2f829448c719..6ecd6b0a22a8 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -62,7 +62,7 @@ int __pfault_init(void) "0: nopr %%r7\n" EX_TABLE(0b, 0b) : [rc] "+d" (rc) - : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk) + : [refbk] "a" (virt_to_phys(&pfault_init_refbk)), "m" (pfault_init_refbk) : "cc"); return rc; } @@ -84,7 +84,7 @@ void __pfault_fini(void) "0: nopr %%r7\n" EX_TABLE(0b, 0b) : - : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) + : [refbk] "a" (virt_to_phys(&pfault_fini_refbk)), "m" (pfault_fini_refbk) : "cc"); } From 3ea20672d23b21327266534e08ca10d8f281f4ab Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 24 Feb 2026 13:41:20 +0100 Subject: [PATCH 368/828] MAINTAINERS: Update contact with the kernel.org address Use the kernel.org address as a unified single entry to send patches to. At the same time, update mailmap to group all past contributions. Signed-off-by: Daniel Lezcano --- .mailmap | 4 ++++ MAINTAINERS | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.mailmap b/.mailmap index e1cf6bb85d33..463de4784ecb 100644 --- a/.mailmap +++ b/.mailmap @@ -210,6 +210,10 @@ Daniel Borkmann Daniel Borkmann Daniel Borkmann Daniel Borkmann +Daniel Lezcano +Daniel Lezcano +Daniel Lezcano +Daniel Lezcano Daniel Thompson Danilo Krummrich David Brownell diff --git a/MAINTAINERS b/MAINTAINERS index 55af015174a5..6c8752099924 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6280,7 +6280,7 @@ S: Maintained F: include/linux/clk.h CLOCKSOURCE, CLOCKEVENT DRIVERS -M: Daniel Lezcano +M: Daniel Lezcano M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Supported @@ -6669,7 +6669,7 @@ F: rust/kernel/cpu.rs CPU IDLE TIME MANAGEMENT FRAMEWORK M: "Rafael J. Wysocki" -M: Daniel Lezcano +M: Daniel Lezcano R: Christian Loehle L: linux-pm@vger.kernel.org S: Maintained @@ -6699,7 +6699,7 @@ F: arch/x86/kernel/msr.c CPUIDLE DRIVER - ARM BIG LITTLE M: Lorenzo Pieralisi -M: Daniel Lezcano +M: Daniel Lezcano L: linux-pm@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -6707,7 +6707,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git F: drivers/cpuidle/cpuidle-big_little.c CPUIDLE DRIVER - ARM EXYNOS -M: Daniel Lezcano +M: Daniel Lezcano M: Kukjin Kim R: Krzysztof Kozlowski L: linux-pm@vger.kernel.org @@ -26217,7 +26217,7 @@ F: drivers/media/radio/radio-raremono.c THERMAL M: Rafael J. Wysocki -M: Daniel Lezcano +M: Daniel Lezcano R: Zhang Rui R: Lukasz Luba L: linux-pm@vger.kernel.org @@ -26247,7 +26247,7 @@ F: drivers/thermal/amlogic_thermal.c THERMAL/CPU_COOLING M: Amit Daniel Kachhap -M: Daniel Lezcano +M: Daniel Lezcano M: Viresh Kumar R: Lukasz Luba L: linux-pm@vger.kernel.org From 085f067389d12bd9800c0a9672a174c1de7a8069 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Wed, 25 Feb 2026 01:15:23 +0000 Subject: [PATCH 369/828] cgroup/cpuset: fix null-ptr-deref in rebuild_sched_domains_cpuslocked A null-pointer-dereference bug was reported by syzbot: Oops: general protection fault, probably for address 0xdffffc0000000000: KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:bitmap_subset include/linux/bitmap.h:433 [inline] RIP: 0010:cpumask_subset include/linux/cpumask.h:836 [inline] RIP: 0010:rebuild_sched_domains_locked kernel/cgroup/cpuset.c:967 RSP: 0018:ffffc90003ecfbc0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000020 RDX: ffff888028de0000 RSI: ffffffff8200f003 RDI: ffffffff8df14f28 RBP: 0000000000000000 R08: 0000000000000cc0 R09: 00000000ffffffff R10: ffffffff8e7d95b3 R11: 0000000000000001 R12: 0000000000000000 R13: 00000000000f4240 R14: dffffc0000000000 R15: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2f463fff CR3: 000000003704c000 CR4: 00000000003526f0 Call Trace: rebuild_sched_domains_cpuslocked kernel/cgroup/cpuset.c:983 [inline] rebuild_sched_domains+0x21/0x40 kernel/cgroup/cpuset.c:990 sched_rt_handler+0xb5/0xe0 kernel/sched/rt.c:2911 proc_sys_call_handler+0x47f/0x5a0 fs/proc/proc_sysctl.c:600 new_sync_write fs/read_write.c:595 [inline] vfs_write+0x6ac/0x1070 fs/read_write.c:688 ksys_write+0x12a/0x250 fs/read_write.c:740 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x106/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The issue occurs when generate_sched_domains() returns ndoms = 1 and doms = NULL due to a kmalloc failure. This leads to a null-pointer dereference when accessing doms in rebuild_sched_domains_locked(). Fix this by adding a NULL check for doms before accessing it. Fixes: 6ee43047e8ad ("cpuset: Remove unnecessary checks in rebuild_sched_domains_locked") Reported-by: syzbot+460792609a79c085f79f@syzkaller.appspotmail.com Acked-by: Waiman Long Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index aa45351c6f38..271bb99b1b9d 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1002,7 +1002,7 @@ void rebuild_sched_domains_locked(void) * offline CPUs, a warning is emitted and we return directly to * prevent the panic. */ - for (i = 0; i < ndoms; ++i) { + for (i = 0; doms && i < ndoms; i++) { if (WARN_ON_ONCE(!cpumask_subset(doms[i], cpu_active_mask))) return; } From 712896ac4bce38a965a1c175f6e7804ed0381334 Mon Sep 17 00:00:00 2001 From: Li Li Date: Mon, 5 Jan 2026 06:47:28 +0000 Subject: [PATCH 370/828] idpf: increment completion queue next_to_clean in sw marker wait routine Currently, in idpf_wait_for_sw_marker_completion(), when an IDPF_TXD_COMPLT_SW_MARKER packet is found, the routine breaks out of the for loop and does not increment the next_to_clean counter. This causes the subsequent NAPI polls to run into the same IDPF_TXD_COMPLT_SW_MARKER packet again and print out the following: [ 23.261341] idpf 0000:05:00.0 eth1: Unknown TX completion type: 5 Instead, we should increment next_to_clean regardless when an IDPF_TXD_COMPLT_SW_MARKER packet is found. Tested: with the patch applied, we do not see the errors above from NAPI polls anymore. Fixes: 9d39447051a0 ("idpf: remove SW marker handling from NAPI") Signed-off-by: Li Li Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 376050308b06..761a77510467 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2348,7 +2348,7 @@ void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq) do { struct idpf_splitq_4b_tx_compl_desc *tx_desc; - struct idpf_tx_queue *target; + struct idpf_tx_queue *target = NULL; u32 ctype_gen, id; tx_desc = flow ? &complq->comp[ntc].common : @@ -2368,14 +2368,14 @@ void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq) target = complq->txq_grp->txqs[id]; idpf_queue_clear(SW_MARKER, target); - if (target == txq) - break; next: if (unlikely(++ntc == complq->desc_count)) { ntc = 0; gen_flag = !gen_flag; } + if (target == txq) + break; } while (time_before(jiffies, timeout)); idpf_queue_assign(GEN_CHK, complq, gen_flag); From d11e5da2d6d87d123e339dc9af7bc1682fc533ce Mon Sep 17 00:00:00 2001 From: Li Li Date: Mon, 12 Jan 2026 23:09:43 +0000 Subject: [PATCH 371/828] idpf: skip deallocating bufq_sets from rx_qgrp if it is NULL In idpf_rxq_group_alloc(), if rx_qgrp->splitq.bufq_sets failed to get allocated: rx_qgrp->splitq.bufq_sets = kcalloc(vport->num_bufqs_per_qgrp, sizeof(struct idpf_bufq_set), GFP_KERNEL); if (!rx_qgrp->splitq.bufq_sets) { err = -ENOMEM; goto err_alloc; } idpf_rxq_group_rel() would attempt to deallocate it in idpf_rxq_sw_queue_rel(), causing a kernel panic: ``` [ 7.967242] early-network-sshd-n-rexd[3148]: knetbase: Info: [ 8.127804] BUG: kernel NULL pointer dereference, address: 00000000000000c0 ... [ 8.129779] RIP: 0010:idpf_rxq_group_rel+0x101/0x170 ... [ 8.133854] Call Trace: [ 8.133980] [ 8.134092] idpf_vport_queues_alloc+0x286/0x500 [ 8.134313] idpf_vport_open+0x4d/0x3f0 [ 8.134498] idpf_open+0x71/0xb0 [ 8.134668] __dev_open+0x142/0x260 [ 8.134840] netif_open+0x2f/0xe0 [ 8.135004] dev_open+0x3d/0x70 [ 8.135166] bond_enslave+0x5ed/0xf50 [ 8.135345] ? nla_put_ifalias+0x3d/0x90 [ 8.135533] ? kvfree_call_rcu+0xb5/0x3b0 [ 8.135725] ? kvfree_call_rcu+0xb5/0x3b0 [ 8.135916] do_set_master+0x114/0x160 [ 8.136098] do_setlink+0x412/0xfb0 [ 8.136269] ? security_sock_rcv_skb+0x2a/0x50 [ 8.136509] ? sk_filter_trim_cap+0x7c/0x320 [ 8.136714] ? skb_queue_tail+0x20/0x50 [ 8.136899] ? __nla_validate_parse+0x92/0xe50 [ 8.137112] ? security_capable+0x35/0x60 [ 8.137304] rtnl_newlink+0x95c/0xa00 [ 8.137483] ? __rtnl_unlock+0x37/0x70 [ 8.137664] ? netdev_run_todo+0x63/0x530 [ 8.137855] ? allocate_slab+0x280/0x870 [ 8.138044] ? security_capable+0x35/0x60 [ 8.138235] rtnetlink_rcv_msg+0x2e6/0x340 [ 8.138431] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 8.138650] netlink_rcv_skb+0x16a/0x1a0 [ 8.138840] netlink_unicast+0x20a/0x320 [ 8.139028] netlink_sendmsg+0x304/0x3b0 [ 8.139217] __sock_sendmsg+0x89/0xb0 [ 8.139399] ____sys_sendmsg+0x167/0x1c0 [ 8.139588] ? ____sys_recvmsg+0xed/0x150 [ 8.139780] ___sys_sendmsg+0xdd/0x120 [ 8.139960] ? ___sys_recvmsg+0x124/0x1e0 [ 8.140152] ? rcutree_enqueue+0x1f/0xb0 [ 8.140341] ? rcutree_enqueue+0x1f/0xb0 [ 8.140528] ? call_rcu+0xde/0x2a0 [ 8.140695] ? evict+0x286/0x2d0 [ 8.140856] ? rcutree_enqueue+0x1f/0xb0 [ 8.141043] ? kmem_cache_free+0x2c/0x350 [ 8.141236] __x64_sys_sendmsg+0x72/0xc0 [ 8.141424] do_syscall_64+0x6f/0x890 [ 8.141603] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 8.141841] RIP: 0033:0x7f2799d21bd0 ... [ 8.149905] Kernel panic - not syncing: Fatal exception [ 8.175940] Kernel Offset: 0xf800000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 8.176425] Rebooting in 10 seconds.. ``` Tested: With this patch, the kernel panic no longer appears. Fixes: 95af467d9a4e ("idpf: configure resources for RX queues") Signed-off-by: Li Li Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 761a77510467..59aafadae3d0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1341,6 +1341,9 @@ static void idpf_txq_group_rel(struct idpf_q_vec_rsrc *rsrc) */ static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp) { + if (!rx_qgrp->splitq.bufq_sets) + return; + for (unsigned int i = 0; i < rx_qgrp->splitq.num_bufq_sets; i++) { struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i]; From e403bf4013a665961bdfdf65193e3076c3d155a5 Mon Sep 17 00:00:00 2001 From: Li Li Date: Mon, 12 Jan 2026 23:09:44 +0000 Subject: [PATCH 372/828] idpf: skip deallocating txq group's txqs if it is NULL In idpf_txq_group_alloc(), if any txq group's txqs failed to allocate memory: for (j = 0; j < tx_qgrp->num_txq; j++) { tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]), GFP_KERNEL); if (!tx_qgrp->txqs[j]) goto err_alloc; } It would cause a NULL ptr kernel panic in idpf_txq_group_rel(): for (j = 0; j < txq_grp->num_txq; j++) { if (flow_sch_en) { kfree(txq_grp->txqs[j]->refillq); txq_grp->txqs[j]->refillq = NULL; } kfree(txq_grp->txqs[j]); txq_grp->txqs[j] = NULL; } [ 6.532461] BUG: kernel NULL pointer dereference, address: 0000000000000058 ... [ 6.534433] RIP: 0010:idpf_txq_group_rel+0xc9/0x110 ... [ 6.538513] Call Trace: [ 6.538639] [ 6.538760] idpf_vport_queues_alloc+0x75/0x550 [ 6.538978] idpf_vport_open+0x4d/0x3f0 [ 6.539164] idpf_open+0x71/0xb0 [ 6.539324] __dev_open+0x142/0x260 [ 6.539506] netif_open+0x2f/0xe0 [ 6.539670] dev_open+0x3d/0x70 [ 6.539827] bond_enslave+0x5ed/0xf50 [ 6.540005] ? rcutree_enqueue+0x1f/0xb0 [ 6.540193] ? call_rcu+0xde/0x2a0 [ 6.540375] ? barn_get_empty_sheaf+0x5c/0x80 [ 6.540594] ? __kfree_rcu_sheaf+0xb6/0x1a0 [ 6.540793] ? nla_put_ifalias+0x3d/0x90 [ 6.540981] ? kvfree_call_rcu+0xb5/0x3b0 [ 6.541173] ? kvfree_call_rcu+0xb5/0x3b0 [ 6.541365] do_set_master+0x114/0x160 [ 6.541547] do_setlink+0x412/0xfb0 [ 6.541717] ? security_sock_rcv_skb+0x2a/0x50 [ 6.541931] ? sk_filter_trim_cap+0x7c/0x320 [ 6.542136] ? skb_queue_tail+0x20/0x50 [ 6.542322] ? __nla_validate_parse+0x92/0xe50 ro[o t t o6 .d5e4f2a5u4l0t]- ? security_capable+0x35/0x60 [ 6.542792] rtnl_newlink+0x95c/0xa00 [ 6.542972] ? __rtnl_unlock+0x37/0x70 [ 6.543152] ? netdev_run_todo+0x63/0x530 [ 6.543343] ? allocate_slab+0x280/0x870 [ 6.543531] ? security_capable+0x35/0x60 [ 6.543722] rtnetlink_rcv_msg+0x2e6/0x340 [ 6.543918] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 6.544138] netlink_rcv_skb+0x16a/0x1a0 [ 6.544328] netlink_unicast+0x20a/0x320 [ 6.544516] netlink_sendmsg+0x304/0x3b0 [ 6.544748] __sock_sendmsg+0x89/0xb0 [ 6.544928] ____sys_sendmsg+0x167/0x1c0 [ 6.545116] ? ____sys_recvmsg+0xed/0x150 [ 6.545308] ___sys_sendmsg+0xdd/0x120 [ 6.545489] ? ___sys_recvmsg+0x124/0x1e0 [ 6.545680] ? rcutree_enqueue+0x1f/0xb0 [ 6.545867] ? rcutree_enqueue+0x1f/0xb0 [ 6.546055] ? call_rcu+0xde/0x2a0 [ 6.546222] ? evict+0x286/0x2d0 [ 6.546389] ? rcutree_enqueue+0x1f/0xb0 [ 6.546577] ? kmem_cache_free+0x2c/0x350 [ 6.546784] __x64_sys_sendmsg+0x72/0xc0 [ 6.546972] do_syscall_64+0x6f/0x890 [ 6.547150] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 6.547393] RIP: 0033:0x7fc1a3347bd0 ... [ 6.551375] RIP: 0010:idpf_txq_group_rel+0xc9/0x110 ... [ 6.578856] Rebooting in 10 seconds.. We should skip deallocating txqs[j] if it is NULL in the first place. Tested: with this patch, the kernel panic no longer appears. Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") Signed-off-by: Li Li Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 59aafadae3d0..7613f020b622 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1316,6 +1316,9 @@ static void idpf_txq_group_rel(struct idpf_q_vec_rsrc *rsrc) struct idpf_txq_group *txq_grp = &rsrc->txq_grps[i]; for (unsigned int j = 0; j < txq_grp->num_txq; j++) { + if (!txq_grp->txqs[j]) + continue; + if (idpf_queue_has(FLOW_SCH_EN, txq_grp->txqs[j])) { kfree(txq_grp->txqs[j]->refillq); txq_grp->txqs[j]->refillq = NULL; From bc3b31977314433e4685ae92853d1b6e91ad7bc2 Mon Sep 17 00:00:00 2001 From: Li Li Date: Fri, 23 Jan 2026 06:58:06 +0000 Subject: [PATCH 373/828] idpf: nullify pointers after they are freed rss_data->rss_key needs to be nullified after it is freed. Checks like "if (!rss_data->rss_key)" in the code could fail if it is not nullified. Tested: built and booted the kernel. Fixes: 83f38f210b85 ("idpf: Fix RSS LUT NULL pointer crash on early ethtool operations") Signed-off-by: Li Li Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 94da5fbd56f1..7ce6a0e4acb6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1320,6 +1320,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, free_rss_key: kfree(rss_data->rss_key); + rss_data->rss_key = NULL; free_qreg_chunks: idpf_vport_deinit_queue_reg_chunks(adapter->vport_config[idx]); free_vector_idxs: From 1500a8662d2d41d6bb03e034de45ddfe6d7d362d Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Mon, 26 Jan 2026 21:55:59 +0000 Subject: [PATCH 374/828] idpf: change IRQ naming to match netdev and ethtool queue numbering The code uses the vidx for the IRQ name but that doesn't match ethtool reporting nor netdev naming, this makes it hard to tune the device and associate queues with IRQs. Sequentially requesting irqs starting from '0' makes the output consistent. This commit changes the interrupt numbering but preserves the name format, maintaining ABI compatibility. Existing tools relying on the old numbering are already non-functional, as they lack a useful correlation to the interrupts. Before: ethtool -L eth1 tx 1 combined 3 grep . /proc/irq/*/*idpf*/../smp_affinity_list /proc/irq/67/idpf-Mailbox-0/../smp_affinity_list:0-55,112-167 /proc/irq/68/idpf-eth1-TxRx-1/../smp_affinity_list:0 /proc/irq/70/idpf-eth1-TxRx-3/../smp_affinity_list:1 /proc/irq/71/idpf-eth1-TxRx-4/../smp_affinity_list:2 /proc/irq/72/idpf-eth1-Tx-5/../smp_affinity_list:3 ethtool -S eth1 | grep -v ': 0' NIC statistics: tx_q-0_pkts: 1002 tx_q-1_pkts: 2679 tx_q-2_pkts: 1113 tx_q-3_pkts: 1192 <----- tx_q-3 vs idpf-eth1-Tx-5 rx_q-0_pkts: 1143 rx_q-1_pkts: 3172 rx_q-2_pkts: 1074 After: ethtool -L eth1 tx 1 combined 3 grep . /proc/irq/*/*idpf*/../smp_affinity_list /proc/irq/67/idpf-Mailbox-0/../smp_affinity_list:0-55,112-167 /proc/irq/68/idpf-eth1-TxRx-0/../smp_affinity_list:0 /proc/irq/70/idpf-eth1-TxRx-1/../smp_affinity_list:1 /proc/irq/71/idpf-eth1-TxRx-2/../smp_affinity_list:2 /proc/irq/72/idpf-eth1-Tx-3/../smp_affinity_list:3 ethtool -S eth1 | grep -v ': 0' NIC statistics: tx_q-0_pkts: 118 tx_q-1_pkts: 134 tx_q-2_pkts: 228 tx_q-3_pkts: 138 <--- tx_q-3 matches idpf-eth1-Tx-3 rx_q-0_pkts: 111 rx_q-1_pkts: 366 rx_q-2_pkts: 120 Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Signed-off-by: Brian Vazquez Reviewed-by: Brett Creeley Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Reviewed-by: Eric Dumazet Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 7613f020b622..b7e922fffc96 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4077,7 +4077,7 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, continue; name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, - vec_name, vidx); + vec_name, vector); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector); From 2c31557336a8e4d209ed8d4513cef2c0f15e7ef4 Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Tue, 13 Jan 2026 12:01:13 -0600 Subject: [PATCH 375/828] idpf: Fix flow rule delete failure due to invalid validation When deleting a flow rule using "ethtool -N delete ", idpf_sideband_action_ena() incorrectly validates fsp->ring_cookie even though ethtool doesn't populate this field for delete operations. The uninitialized ring_cookie may randomly match RX_CLS_FLOW_DISC or RX_CLS_FLOW_WAKE, causing validation to fail and preventing legitimate rule deletions. Remove the unnecessary sideband action enable check and ring_cookie validation during delete operations since action validation is not required when removing existing rules. Fixes: ada3e24b84a0 ("idpf: add flow steering support") Signed-off-by: Sreedevi Joshi Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_ethtool.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 1d78a621d65b..3275b0215655 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -307,9 +307,6 @@ static int idpf_del_flow_steer(struct net_device *netdev, vport_config = vport->adapter->vport_config[np->vport_idx]; user_config = &vport_config->user_config; - if (!idpf_sideband_action_ena(vport, fsp)) - return -EOPNOTSUPP; - rule = kzalloc(struct_size(rule, rule_info, 1), GFP_KERNEL); if (!rule) return -ENOMEM; From 6aa07e23dd3ccd35a0100c06fcb6b6c3b01e7965 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 29 Jan 2026 12:00:26 +0800 Subject: [PATCH 376/828] ice: recap the VSI and QoS info after rebuild Fix IRDMA hardware initialization timeout (-110) after resume by separating VSI-dependent configuration from RDMA resource allocation, ensuring VSI is rebuilt before IRDMA accesses it. After resume from suspend, IRDMA hardware initialization fails: ice: IRDMA hardware initialization FAILED init_state=4 status=-110 Separate RDMA initialization into two phases: 1. ice_init_rdma() - Allocate resources only (no VSI/QoS access, no plug) 2. ice_rdma_finalize_setup() - Assign VSI/QoS info and plug device This allows: - ice_init_rdma() to stay in ice_resume() (mirrors ice_deinit_rdma() in ice_suspend()) - VSI assignment deferred until after ice_vsi_rebuild() completes - QoS info updated after ice_dcb_rebuild() completes - Device plugged only when control queues, VSI, and DCB are all ready Fixes: bc69ad74867db ("ice: avoid IRQ collision to fix init failure on ACPI S3 resume") Reviewed-by: Aleksandr Loktionov Signed-off-by: Aaron Ma Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_idc.c | 44 +++++++++++++++++------ drivers/net/ethernet/intel/ice/ice_main.c | 7 +++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index def7efa15447..2b2b22af42be 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -987,6 +987,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); int ice_plug_aux_dev(struct ice_pf *pf); void ice_unplug_aux_dev(struct ice_pf *pf); +void ice_rdma_finalize_setup(struct ice_pf *pf); int ice_init_rdma(struct ice_pf *pf); void ice_deinit_rdma(struct ice_pf *pf); bool ice_is_wol_supported(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 420d45c2558b..ded029aa71d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -360,6 +360,39 @@ void ice_unplug_aux_dev(struct ice_pf *pf) auxiliary_device_uninit(adev); } +/** + * ice_rdma_finalize_setup - Complete RDMA setup after VSI is ready + * @pf: ptr to ice_pf + * + * Sets VSI-dependent information and plugs aux device. + * Must be called after ice_init_rdma(), ice_vsi_rebuild(), and + * ice_dcb_rebuild() complete. + */ +void ice_rdma_finalize_setup(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct iidc_rdma_priv_dev_info *privd; + int ret; + + if (!ice_is_rdma_ena(pf) || !pf->cdev_info) + return; + + privd = pf->cdev_info->iidc_priv; + if (!privd || !pf->vsi || !pf->vsi[0] || !pf->vsi[0]->netdev) + return; + + /* Assign VSI info now that VSI is valid */ + privd->netdev = pf->vsi[0]->netdev; + privd->vport_id = pf->vsi[0]->vsi_num; + + /* Update QoS info after DCB has been rebuilt */ + ice_setup_dcb_qos_info(pf, &privd->qos_info); + + ret = ice_plug_aux_dev(pf); + if (ret) + dev_warn(dev, "Failed to plug RDMA aux device: %d\n", ret); +} + /** * ice_init_rdma - initializes PF for RDMA use * @pf: ptr to ice_pf @@ -398,22 +431,14 @@ int ice_init_rdma(struct ice_pf *pf) } cdev->iidc_priv = privd; - privd->netdev = pf->vsi[0]->netdev; privd->hw_addr = (u8 __iomem *)pf->hw.hw_addr; cdev->pdev = pf->pdev; - privd->vport_id = pf->vsi[0]->vsi_num; pf->cdev_info->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2; - ice_setup_dcb_qos_info(pf, &privd->qos_info); - ret = ice_plug_aux_dev(pf); - if (ret) - goto err_plug_aux_dev; + return 0; -err_plug_aux_dev: - pf->cdev_info->adev = NULL; - xa_erase(&ice_aux_id, pf->aux_idx); err_alloc_xa: kfree(privd); err_privd_alloc: @@ -432,7 +457,6 @@ void ice_deinit_rdma(struct ice_pf *pf) if (!ice_is_rdma_ena(pf)) return; - ice_unplug_aux_dev(pf); xa_erase(&ice_aux_id, pf->aux_idx); kfree(pf->cdev_info->iidc_priv); kfree(pf->cdev_info); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4da37caa3ec9..62914167d121 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5138,6 +5138,9 @@ int ice_load(struct ice_pf *pf) if (err) goto err_init_rdma; + /* Finalize RDMA: VSI already created, assign info and plug device */ + ice_rdma_finalize_setup(pf); + ice_service_task_restart(pf); clear_bit(ICE_DOWN, pf->state); @@ -5169,6 +5172,7 @@ void ice_unload(struct ice_pf *pf) devl_assert_locked(priv_to_devlink(pf)); + ice_unplug_aux_dev(pf); ice_deinit_rdma(pf); ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); @@ -5595,6 +5599,7 @@ static int ice_suspend(struct device *dev) */ disabled = ice_service_task_stop(pf); + ice_unplug_aux_dev(pf); ice_deinit_rdma(pf); /* Already suspended?, then there is nothing to do */ @@ -7859,7 +7864,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_health_clear(pf); - ice_plug_aux_dev(pf); + ice_rdma_finalize_setup(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); From a9c354e656597aededa027d63d2ff0973f6b033f Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 2 Feb 2026 11:17:54 +0100 Subject: [PATCH 377/828] ice: fix crash in ethtool offline loopback test Since the conversion of ice to page pool, the ethtool loopback test crashes: BUG: kernel NULL pointer dereference, address: 000000000000000c #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 1100f1067 P4D 0 Oops: Oops: 0002 [#1] SMP NOPTI CPU: 23 UID: 0 PID: 5904 Comm: ethtool Kdump: loaded Not tainted 6.19.0-0.rc7.260128g1f97d9dcf5364.49.eln154.x86_64 #1 PREEMPT(lazy) Hardware name: [...] RIP: 0010:ice_alloc_rx_bufs+0x1cd/0x310 [ice] Code: 83 6c 24 30 01 66 41 89 47 08 0f 84 c0 00 00 00 41 0f b7 dc 48 8b 44 24 18 48 c1 e3 04 41 bb 00 10 00 00 48 8d 2c 18 8b 04 24 <89> 45 0c 41 8b 4d 00 49 d3 e3 44 3b 5c 24 24 0f 83 ac fe ff ff 44 RSP: 0018:ff7894738aa1f768 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000700 RDI: 0000000000000000 RBP: 0000000000000000 R08: ff16dcae79880200 R09: 0000000000000019 R10: 0000000000000001 R11: 0000000000001000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff16dcae6c670000 FS: 00007fcf428850c0(0000) GS:ff16dcb149710000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000000c CR3: 0000000121227005 CR4: 0000000000773ef0 PKRU: 55555554 Call Trace: ice_vsi_cfg_rxq+0xca/0x460 [ice] ice_vsi_cfg_rxqs+0x54/0x70 [ice] ice_loopback_test+0xa9/0x520 [ice] ice_self_test+0x1b9/0x280 [ice] ethtool_self_test+0xe5/0x200 __dev_ethtool+0x1106/0x1a90 dev_ethtool+0xbe/0x1a0 dev_ioctl+0x258/0x4c0 sock_do_ioctl+0xe3/0x130 __x64_sys_ioctl+0xb9/0x100 do_syscall_64+0x7c/0x700 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] It crashes because we have not initialized libeth for the rx ring. Fix it by treating ICE_VSI_LB VSIs slightly more like normal PF VSIs and letting them have a q_vector. It's just a dummy, because the loopback test does not use interrupts, but it contains a napi struct that can be passed to libeth_rx_fq_create() called from ice_vsi_cfg_rxq() -> ice_rxq_pp_create(). Fixes: 93f53db9f9dc ("ice: switch to Page Pool") Signed-off-by: Michal Schmidt Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 5 ++++- drivers/net/ethernet/intel/ice/ice_ethtool.c | 4 ++++ drivers/net/ethernet/intel/ice/ice_lib.c | 15 ++++++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index afbff8aa9ceb..49b4304c4975 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -124,6 +124,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) if (vsi->type == ICE_VSI_VF) { ice_calc_vf_reg_idx(vsi->vf, q_vector); goto out; + } else if (vsi->type == ICE_VSI_LB) { + goto skip_alloc; } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) { struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); @@ -662,7 +664,8 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) u32 rx_buf_len; int err; - if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { + if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF || + ring->vsi->type == ICE_VSI_LB) { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index c6bc29cfb8e6..ff003529beba 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1289,6 +1289,10 @@ static u64 ice_loopback_test(struct net_device *netdev) test_vsi->netdev = netdev; tx_ring = test_vsi->tx_rings[0]; rx_ring = test_vsi->rx_rings[0]; + /* Dummy q_vector and napi. Fill the minimum required for + * ice_rxq_pp_create(). + */ + rx_ring->q_vector->napi.dev = netdev; if (ice_lbtest_prepare_rings(test_vsi)) { ret = 2; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index d921269e1fe7..3cd6296a1fbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -107,10 +107,6 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) if (!vsi->rxq_map) goto err_rxq_map; - /* There is no need to allocate q_vectors for a loopback VSI. */ - if (vsi->type == ICE_VSI_LB) - return 0; - /* allocate memory for q_vector pointers */ vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors, sizeof(*vsi->q_vectors), GFP_KERNEL); @@ -241,6 +237,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) case ICE_VSI_LB: vsi->alloc_txq = 1; vsi->alloc_rxq = 1; + /* A dummy q_vector, no actual IRQ. */ + vsi->num_q_vectors = 1; break; default: dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type); @@ -2428,14 +2426,21 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi) } break; case ICE_VSI_LB: - ret = ice_vsi_alloc_rings(vsi); + ret = ice_vsi_alloc_q_vectors(vsi); if (ret) goto unroll_vsi_init; + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto unroll_alloc_q_vector; + ret = ice_vsi_alloc_ring_stats(vsi); if (ret) goto unroll_vector_base; + /* Simply map the dummy q_vector to the only rx_ring */ + vsi->rx_rings[0]->q_vector = vsi->q_vectors[0]; + break; default: /* clean up the resources and exit */ From 4b3d54a85bd37ebf2d9836f0d0de775c0ff21af9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 7 Feb 2026 11:50:23 +0100 Subject: [PATCH 378/828] i40e: Fix preempt count leak in napi poll tracepoint Using get_cpu() in the tracepoint assignment causes an obvious preempt count leak because nothing invokes put_cpu() to undo it: softirq: huh, entered softirq 3 NET_RX with preempt_count 00000100, exited with 00000101? This clearly has seen a lot of testing in the last 3+ years... Use smp_processor_id() instead. Fixes: 6d4d584a7ea8 ("i40e: Add i40e_napi_poll tracepoint") Signed-off-by: Thomas Gleixner Cc: Tony Nguyen Cc: Przemek Kitszel Cc: intel-wired-lan@lists.osuosl.org Cc: netdev@vger.kernel.org Reviewed-by: Joe Damato Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h index 759f3d1c4c8f..dde0ccd789ed 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h @@ -88,7 +88,7 @@ TRACE_EVENT(i40e_napi_poll, __entry->rx_clean_complete = rx_clean_complete; __entry->tx_clean_complete = tx_clean_complete; __entry->irq_num = q->irq_num; - __entry->curr_cpu = get_cpu(); + __entry->curr_cpu = smp_processor_id(); __assign_str(qname); __assign_str(dev_name); __assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask), From feae40a6a178bb525a15f19288016e5778102a99 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Wed, 10 Dec 2025 12:26:51 +0100 Subject: [PATCH 379/828] ixgbevf: fix link setup issue It may happen that VF spawned for E610 adapter has problem with setting link up. This happens when ixgbevf supporting mailbox API 1.6 cooperates with PF driver which doesn't support this version of API, and hence doesn't support new approach for getting PF link data. In that case VF asks PF to provide link data but as PF doesn't support it, returns -EOPNOTSUPP what leads to early bail from link configuration sequence. Avoid such situation by using legacy VFLINKS approach whenever negotiated API version is less than 1.6. To reproduce the issue just create VF and set its link up - adapter must be any from the E610 family, ixgbevf must support API 1.6 or higher while ixgbevf must not. Fixes: 53f0eb62b4d2 ("ixgbevf: fix getting link speed data for E610 devices") Reviewed-by: Aleksandr Loktionov Reviewed-by: Piotr Kwapulinski Reviewed-by: Paul Menzel Cc: stable@vger.kernel.org Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbevf/vf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 74d320879513..b67b580f7f1c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -852,7 +852,8 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, if (!mac->get_link_status) goto out; - if (hw->mac.type == ixgbe_mac_e610_vf) { + if (hw->mac.type == ixgbe_mac_e610_vf && + hw->api_version >= ixgbe_mbox_api_16) { ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); if (ret_val) goto out; From 5b644464eeeac485685c6632bcc95347ae6c8677 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Tue, 6 Jan 2026 16:14:19 +0200 Subject: [PATCH 380/828] e1000e: introduce new board type for Panther Lake PCH Add new board type for Panther Lake devices for separating device-specific features and flows. Additionally, remove the deprecated device IDs 0x57B5 and 0x57B6, which are not used by any existing devices. Signed-off-by: Vitaly Lifshits Tested-by: Avigail Dahan Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 4 +++- drivers/net/ethernet/intel/e1000e/hw.h | 2 -- drivers/net/ethernet/intel/e1000e/ich8lan.c | 22 ++++++++++++++++++++- drivers/net/ethernet/intel/e1000e/netdev.c | 15 +++++++------- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index aa08f397988e..63ebe00376f5 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -117,7 +117,8 @@ enum e1000_boards { board_pch_cnp, board_pch_tgp, board_pch_adp, - board_pch_mtp + board_pch_mtp, + board_pch_ptp }; struct e1000_ps_page { @@ -527,6 +528,7 @@ extern const struct e1000_info e1000_pch_cnp_info; extern const struct e1000_info e1000_pch_tgp_info; extern const struct e1000_info e1000_pch_adp_info; extern const struct e1000_info e1000_pch_mtp_info; +extern const struct e1000_info e1000_pch_ptp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index fc8ed38aa095..c7ac599e5a7a 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -118,8 +118,6 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_ARL_I219_V24 0x57A1 #define E1000_DEV_ID_PCH_PTP_I219_LM25 0x57B3 #define E1000_DEV_ID_PCH_PTP_I219_V25 0x57B4 -#define E1000_DEV_ID_PCH_PTP_I219_LM26 0x57B5 -#define E1000_DEV_ID_PCH_PTP_I219_V26 0x57B6 #define E1000_DEV_ID_PCH_PTP_I219_LM27 0x57B7 #define E1000_DEV_ID_PCH_PTP_I219_V27 0x57B8 #define E1000_DEV_ID_PCH_NVL_I219_LM29 0x57B9 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 0ff8688ac3b8..23421a640097 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -528,7 +528,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->id = e1000_phy_unknown; - if (hw->mac.type == e1000_pch_mtp) { + if (hw->mac.type == e1000_pch_mtp || hw->mac.type == e1000_pch_ptp) { phy->retry_count = 2; e1000e_enable_phy_retry(hw); } @@ -6208,3 +6208,23 @@ const struct e1000_info e1000_pch_mtp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_ptp_info = { + .mac = e1000_pch_ptp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ddbe2f7d8112..814698807f3d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -55,6 +55,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_tgp] = &e1000_pch_tgp_info, [board_pch_adp] = &e1000_pch_adp_info, [board_pch_mtp] = &e1000_pch_mtp_info, + [board_pch_ptp] = &e1000_pch_ptp_info, }; struct e1000_reg_info { @@ -7925,14 +7926,12 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_mtp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_LM24), board_pch_mtp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_V24), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM26), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V26), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_ptp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; From 0942fc6d324eb9c6b16187b2aa994c0823557f06 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Tue, 6 Jan 2026 16:14:20 +0200 Subject: [PATCH 381/828] e1000e: clear DPG_EN after reset to avoid autonomous power-gating Panther Lake systems introduced an autonomous power gating feature for the integrated Gigabit Ethernet in shutdown state (S5) state. As part of it, the reset value of DPG_EN bit was changed to 1. Clear this bit after performing hardware reset to avoid errors such as Tx/Rx hangs, or packet loss/corruption. Fixes: 0c9183ce61bc ("e1000e: Add support for the next LOM generation") Signed-off-by: Vitaly Lifshits Reviewed-by: Aleksandr Loktionov Tested-by: Avigail Dahan Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/defines.h | 1 + drivers/net/ethernet/intel/e1000e/ich8lan.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index ba331899d186..d4a1041e456d 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -33,6 +33,7 @@ /* Extended Device Control */ #define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */ +#define E1000_CTRL_EXT_DPG_EN 0x00000008 /* Dynamic Power Gating Enable */ #define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */ #define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */ #define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 23421a640097..dea208db1be5 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4932,6 +4932,15 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) reg |= E1000_KABGTXD_BGSQLBIAS; ew32(KABGTXD, reg); + /* The hardware reset value of the DPG_EN bit is 1. + * Clear DPG_EN to prevent unexpected autonomous power gating. + */ + if (hw->mac.type >= e1000_pch_ptp) { + reg = er32(CTRL_EXT); + reg &= ~E1000_CTRL_EXT_DPG_EN; + ew32(CTRL_EXT, reg); + } + return 0; } From f6bf47ab32e0863df50f5501d207dcdddb7fc507 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 23 Feb 2026 22:10:10 +0000 Subject: [PATCH 382/828] arm64: io: Rename ioremap_prot() to __ioremap_prot() Rename our ioremap_prot() implementation to __ioremap_prot() and convert all arch-internal callers over to the new function. ioremap_prot() remains as a #define to __ioremap_prot() for generic_access_phys() and will be subsequently extended to handle user permissions in 'prot'. Cc: Zeng Heng Cc: Jinjiang Tu Cc: Catalin Marinas Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/io.h | 11 ++++++----- arch/arm64/kernel/acpi.c | 2 +- arch/arm64/mm/ioremap.c | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 83e03abbb2ca..cd2fddfe814a 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -264,19 +264,20 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count) typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, pgprot_t *prot); int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); +void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot); -#define ioremap_prot ioremap_prot +#define ioremap_prot __ioremap_prot #define _PAGE_IOREMAP PROT_DEVICE_nGnRE #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) + __ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_np(addr, size) \ - ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) + __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) #define ioremap_encrypted(addr, size) \ - ioremap_prot((addr), (size), PAGE_KERNEL) + __ioremap_prot((addr), (size), PAGE_KERNEL) /* * io{read,write}{16,32,64}be() macros @@ -297,7 +298,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(addr))) return (void __iomem *)__phys_to_virt(addr); - return ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); + return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); } /* diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index af90128cfed5..a9d884fd1d00 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -377,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return ioremap_prot(phys, size, prot); + return __ioremap_prot(phys, size, prot); } /* diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b12cbed9b5ad..6eef48ef6278 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -14,8 +14,8 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook) return 0; } -void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - pgprot_t pgprot) +void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size, + pgprot_t pgprot) { unsigned long last_addr = phys_addr + size - 1; @@ -39,7 +39,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, return generic_ioremap_prot(phys_addr, size, pgprot); } -EXPORT_SYMBOL(ioremap_prot); +EXPORT_SYMBOL(__ioremap_prot); /* * Must be called after early_fixmap_init From 8f098037139b294050053123ab2bc0f819d08932 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 23 Feb 2026 22:10:11 +0000 Subject: [PATCH 383/828] arm64: io: Extract user memory type in ioremap_prot() The only caller of ioremap_prot() outside of the generic ioremap() implementation is generic_access_phys(), which passes a 'pgprot_t' value determined from the user mapping of the target 'pfn' being accessed by the kernel. On arm64, the 'pgprot_t' contains all of the non-address bits from the pte, including the permission controls, and so we end up returning a new user mapping from ioremap_prot() which faults when accessed from the kernel on systems with PAN: | Unable to handle kernel read from unreadable memory at virtual address ffff80008ea89000 | ... | Call trace: | __memcpy_fromio+0x80/0xf8 | generic_access_phys+0x20c/0x2b8 | __access_remote_vm+0x46c/0x5b8 | access_remote_vm+0x18/0x30 | environ_read+0x238/0x3e8 | vfs_read+0xe4/0x2b0 | ksys_read+0xcc/0x178 | __arm64_sys_read+0x4c/0x68 Extract only the memory type from the user 'pgprot_t' in ioremap_prot() and assert that we're being passed a user mapping, to protect us against any changes in future that may require additional handling. To avoid falsely flagging users of ioremap(), provide our own ioremap() macro which simply wraps __ioremap_prot(). Cc: Zeng Heng Cc: Jinjiang Tu Cc: Catalin Marinas Fixes: 893dea9ccd08 ("arm64: Add HAVE_IOREMAP_PROT support") Reported-by: Jinjiang Tu Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/io.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index cd2fddfe814a..8cbd1e96fd50 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -266,10 +266,23 @@ typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot); -#define ioremap_prot __ioremap_prot +static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size, + pgprot_t user_prot) +{ + pgprot_t prot; + ptdesc_t user_prot_val = pgprot_val(user_prot); -#define _PAGE_IOREMAP PROT_DEVICE_nGnRE + if (WARN_ON_ONCE(!(user_prot_val & PTE_USER))) + return NULL; + prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK, + user_prot_val & PTE_ATTRINDX_MASK); + return __ioremap_prot(phys, size, prot); +} +#define ioremap_prot ioremap_prot + +#define ioremap(addr, size) \ + __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) \ __ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_np(addr, size) \ From 8a85b3131225a8c8143ba2ae29c0eef8c1f9117f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Feb 2026 17:45:30 +0000 Subject: [PATCH 384/828] arm64: gcs: Do not set PTE_SHARED on GCS mappings if FEAT_LPA2 is enabled When FEAT_LPA2 is enabled, bits 8-9 of the PTE replace the shareability attribute with bits 50-51 of the output address. The _PAGE_GCS{,_RO} definitions include the PTE_SHARED bits as 0b11 (this matches the other _PAGE_* definitions) but using this macro directly leads to the following panic when enabling GCS on a system/model with LPA2: Unable to handle kernel paging request at virtual address fffff1ffc32d8008 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 swapper pgtable: 4k pages, 52-bit VAs, pgdp=0000000060f4d000 [fffff1ffc32d8008] pgd=100000006184b003, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP CPU: 0 UID: 0 PID: 513 Comm: gcs_write_fault Tainted: G M 7.0.0-rc1 #1 PREEMPT Tainted: [M]=MACHINE_CHECK Hardware name: QEMU QEMU Virtual Machine, BIOS 2025.02-8+deb13u1 11/08/2025 pstate: 03402005 (nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : zap_huge_pmd+0x168/0x468 lr : zap_huge_pmd+0x2c/0x468 sp : ffff800080beb660 x29: ffff800080beb660 x28: fff00000c2058180 x27: ffff800080beb898 x26: fff00000c2058180 x25: ffff800080beb820 x24: 00c800010b600f41 x23: ffffc1ffc30af1a8 x22: fff00000c2058180 x21: 0000ffff8dc00000 x20: fff00000c2bc6370 x19: ffff800080beb898 x18: ffff800080bebb60 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000007 x14: 000000000000000a x13: 0000aaaacbbbffff x12: 0000000000000000 x11: 0000ffff8ddfffff x10: 00000000000001fe x9 : 0000ffff8ddfffff x8 : 0000ffff8de00000 x7 : 0000ffff8da00000 x6 : fff00000c2bc6370 x5 : 0000ffff8da00000 x4 : 000000010b600000 x3 : ffffc1ffc0000000 x2 : fff00000c2058180 x1 : fffff1ffc32d8000 x0 : 000000c00010b600 Call trace: zap_huge_pmd+0x168/0x468 (P) unmap_page_range+0xd70/0x1560 unmap_single_vma+0x48/0x80 unmap_vmas+0x90/0x180 unmap_region+0x88/0xe4 vms_complete_munmap_vmas+0xf8/0x1e0 do_vmi_align_munmap+0x158/0x180 do_vmi_munmap+0xac/0x160 __vm_munmap+0xb0/0x138 vm_munmap+0x14/0x20 gcs_free+0x70/0x80 mm_release+0x1c/0xc8 exit_mm_release+0x28/0x38 do_exit+0x190/0x8ec do_group_exit+0x34/0x90 get_signal+0x794/0x858 arch_do_signal_or_restart+0x11c/0x3e0 exit_to_user_mode_loop+0x10c/0x17c el0_da+0x8c/0x9c el0t_64_sync_handler+0xd0/0xf0 el0t_64_sync+0x198/0x19c Code: aa1603e2 d34cfc00 cb813001 8b011861 (f9400420) Similarly to how the kernel handles protection_map[], use a gcs_page_prot variable to store the protection bits and clear PTE_SHARED if LPA2 is enabled. Also remove the unused PAGE_GCS{,_RO} macros. Signed-off-by: Catalin Marinas Fixes: 6497b66ba694 ("arm64/mm: Map pages for guarded control stack") Reported-by: Emanuele Rocca Cc: stable@vger.kernel.org Cc: Mark Brown Cc: Will Deacon Reviewed-by: David Hildenbrand (Arm) Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 3 --- arch/arm64/mm/mmap.c | 8 ++++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index d27e8872fe3c..2b32639160de 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -164,9 +164,6 @@ static inline bool __pure lpa2_is_enabled(void) #define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) #define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) -#define PAGE_GCS __pgprot(_PAGE_GCS) -#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) - #define PIE_E0 ( \ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 08ee177432c2..75f343009b4b 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -34,6 +34,8 @@ static pgprot_t protection_map[16] __ro_after_init = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC }; +static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO; + /* * You really shouldn't be using read() or write() on /dev/mem. This might go * away in the future. @@ -73,9 +75,11 @@ static int __init adjust_protection_map(void) protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY; } - if (lpa2_is_enabled()) + if (lpa2_is_enabled()) { for (int i = 0; i < ARRAY_SIZE(protection_map); i++) pgprot_val(protection_map[i]) &= ~PTE_SHARED; + gcs_page_prot &= ~PTE_SHARED; + } return 0; } @@ -87,7 +91,7 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags) /* Short circuit GCS to avoid bloating the table. */ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { - prot = _PAGE_GCS_RO; + prot = gcs_page_prot; } else { prot = pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]); From 47a8aad135ac1aed04b7b0c0a8157fd208075827 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Feb 2026 17:45:31 +0000 Subject: [PATCH 385/828] arm64: gcs: Honour mprotect(PROT_NONE) on shadow stack mappings vm_get_page_prot() short-circuits the protection_map[] lookup for a VM_SHADOW_STACK mapping since it uses a different PIE index from the typical read/write/exec permissions. However, the side effect is that it also ignores mprotect(PROT_NONE) by creating an accessible PTE. Special-case the !(vm_flags & VM_ACCESS_FLAGS) flags to use the protection_map[VM_NONE] permissions instead. No GCS attributes are required for an inaccessible PTE. Signed-off-by: Catalin Marinas Fixes: 6497b66ba694 ("arm64/mm: Map pages for guarded control stack") Cc: stable@vger.kernel.org Cc: Mark Brown Cc: Will Deacon Cc: David Hildenbrand Reviewed-by: David Hildenbrand (Arm) Signed-off-by: Will Deacon --- arch/arm64/mm/mmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 75f343009b4b..92b2f5097a96 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -91,7 +91,11 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags) /* Short circuit GCS to avoid bloating the table. */ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { - prot = gcs_page_prot; + /* Honour mprotect(PROT_NONE) on shadow stack mappings */ + if (vm_flags & VM_ACCESS_FLAGS) + prot = gcs_page_prot; + else + prot = pgprot_val(protection_map[VM_NONE]); } else { prot = pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]); From 9d1a7c4a457eac8a7e07e1685f95e54fa551db5e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Feb 2026 17:45:32 +0000 Subject: [PATCH 386/828] kselftest: arm64: Check access to GCS after mprotect(PROT_NONE) A GCS mapping should not be accessible after mprotect(PROT_NONE). Add a kselftest for this scenario. Signed-off-by: Catalin Marinas Cc: Shuah Khan Cc: Mark Brown Cc: Will Deacon Cc: David Hildenbrand Reviewed-by: Mark Brown Signed-off-by: Will Deacon --- .../signal/testcases/gcs_prot_none_fault.c | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c new file mode 100644 index 000000000000..2259f454a202 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026 ARM Limited + */ + +#include +#include +#include + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +static uint64_t *gcs_page; +static bool post_mprotect; + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +static bool alloc_gcs(struct tdescr *td) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + + gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, + page_size, 0); + if (gcs_page == MAP_FAILED) { + fprintf(stderr, "Failed to map %ld byte GCS: %d\n", + page_size, errno); + return false; + } + + return true; +} + +static int gcs_prot_none_fault_trigger(struct tdescr *td) +{ + /* Verify that the page is readable (ie, not completely unmapped) */ + fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); + + if (mprotect(gcs_page, sysconf(_SC_PAGE_SIZE), PROT_NONE) != 0) { + fprintf(stderr, "mprotect(PROT_NONE) failed: %d\n", errno); + return 0; + } + post_mprotect = true; + + /* This should trigger a fault if PROT_NONE is honoured for the GCS page */ + fprintf(stderr, "Read value after mprotect(PROT_NONE) 0x%lx\n", gcs_page[0]); + return 0; +} + +static int gcs_prot_none_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* A fault before mprotect(PROT_NONE) is unexpected. */ + if (!post_mprotect) + return 0; + + return 1; +} + +struct tdescr tde = { + .name = "GCS PROT_NONE fault", + .descr = "Read from GCS after mprotect(PROT_NONE) segfaults", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sanity_disabled = true, + .init = alloc_gcs, + .trigger = gcs_prot_none_fault_trigger, + .run = gcs_prot_none_fault_signal, +}; From bfd9c931d19aa59fb8371d557774fa169b15db9a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 18 Feb 2026 16:43:47 +0000 Subject: [PATCH 387/828] arm64: tlb: Allow XZR argument to TLBI ops The TLBI instruction accepts XZR as a register argument, and for TLBI operations with a register argument, there is no functional difference between using XZR or another GPR which contains zeroes. Operations without a register argument are encoded as if XZR were used. Allow the __TLBI_1() macro to use XZR when a register argument is all zeroes. Today this only results in a trivial code saving in __do_compat_cache_op()'s workaround for Neoverse-N1 erratum #1542419. In subsequent patches this pattern will be used more generally. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Oliver Upton Cc: Ryan Roberts Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a2d65d7d6aae..bf1cc9949dc8 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -38,12 +38,12 @@ : : ) #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ - "tlbi " #op ", %0\n" \ + "tlbi " #op ", %x0\n" \ ALTERNATIVE("nop\n nop", \ - "dsb ish\n tlbi " #op ", %0", \ + "dsb ish\n tlbi " #op ", %x0", \ ARM64_WORKAROUND_REPEAT_TLBI, \ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ - : : "r" (arg)) + : : "rZ" (arg)) #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) From a8f78680ee6bf795086384e8aea159a52814f827 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 18 Feb 2026 16:43:48 +0000 Subject: [PATCH 388/828] arm64: tlb: Optimize ARM64_WORKAROUND_REPEAT_TLBI The ARM64_WORKAROUND_REPEAT_TLBI workaround is used to mitigate several errata where broadcast TLBI;DSB sequences don't provide all the architecturally required synchronization. The workaround performs more work than necessary, and can have significant overhead. This patch optimizes the workaround, as explained below. The workaround was originally added for Qualcomm Falkor erratum 1009 in commit: d9ff80f83ecb ("arm64: Work around Falkor erratum 1009") As noted in the message for that commit, the workaround is applied even in cases where it is not strictly necessary. The workaround was later reused without changes for: * Arm Cortex-A76 erratum #1286807 SDEN v33: https://developer.arm.com/documentation/SDEN-885749/33-0/ * Arm Cortex-A55 erratum #2441007 SDEN v16: https://developer.arm.com/documentation/SDEN-859338/1600/ * Arm Cortex-A510 erratum #2441009 SDEN v19: https://developer.arm.com/documentation/SDEN-1873351/1900/ The important details to note are as follows: 1. All relevant errata only affect the ordering and/or completion of memory accesses which have been translated by an invalidated TLB entry. The actual invalidation of TLB entries is unaffected. 2. The existing workaround is applied to both broadcast and local TLB invalidation, whereas for all relevant errata it is only necessary to apply a workaround for broadcast invalidation. 3. The existing workaround replaces every TLBI with a TLBI;DSB;TLBI sequence, whereas for all relevant errata it is only necessary to execute a single additional TLBI;DSB sequence after any number of TLBIs are completed by a DSB. For example, for a sequence of batched TLBIs: TLBI [, ] TLBI [, ] TLBI [, ] DSB ISH ... the existing workaround will expand this to: TLBI [, ] DSB ISH // additional TLBI [, ] // additional TLBI [, ] DSB ISH // additional TLBI [, ] // additional TLBI [, ] DSB ISH // additional TLBI [, ] // additional DSB ISH ... whereas it is sufficient to have: TLBI [, ] TLBI [, ] TLBI [, ] DSB ISH TLBI [, ] // additional DSB ISH // additional Using a single additional TBLI and DSB at the end of the sequence can have significantly lower overhead as each DSB which completes a TLBI must synchronize with other PEs in the system, with potential performance effects both locally and system-wide. 4. The existing workaround repeats each specific TLBI operation, whereas for all relevant errata it is sufficient for the additional TLBI to use *any* operation which will be broadcast, regardless of which translation regime or stage of translation the operation applies to. For example, for a single TLBI: TLBI ALLE2IS DSB ISH ... the existing workaround will expand this to: TLBI ALLE2IS DSB ISH TLBI ALLE2IS // additional DSB ISH // additional ... whereas it is sufficient to have: TLBI ALLE2IS DSB ISH TLBI VALE1IS, XZR // additional DSB ISH // additional As the additional TLBI doesn't have to match a specific earlier TLBI, the additional TLBI can be implemented in separate code, with no memory of the earlier TLBIs. The additional TLBI can also use a cheaper TLBI operation. 5. The existing workaround is applied to both Stage-1 and Stage-2 TLB invalidation, whereas for all relevant errata it is only necessary to apply a workaround for Stage-1 invalidation. Architecturally, TLBI operations which invalidate only Stage-2 information (e.g. IPAS2E1IS) are not required to invalidate TLB entries which combine information from Stage-1 and Stage-2 translation table entries, and consequently may not complete memory accesses translated by those combined entries. In these cases, completion of memory accesses is only guaranteed after subsequent invalidation of Stage-1 information (e.g. VMALLE1IS). Taking the above points into account, this patch reworks the workaround logic to reduce overhead: * New __tlbi_sync_s1ish() and __tlbi_sync_s1ish_hyp() functions are added and used in place of any dsb(ish) which is used to complete broadcast Stage-1 TLB maintenance. When the ARM64_WORKAROUND_REPEAT_TLBI workaround is enabled, these helpers will execute an additional TLBI;DSB sequence. For consistency, it might make sense to add __tlbi_sync_*() helpers for local and stage 2 maintenance. For now I've left those with open-coded dsb() to keep the diff small. * The duplication of TLBIs in __TLBI_0() and __TLBI_1() is removed. This is no longer needed as the necessary synchronization will happen in __tlbi_sync_s1ish() or __tlbi_sync_s1ish_hyp(). * The additional TLBI operation is chosen to have minimal impact: - __tlbi_sync_s1ish() uses "TLBI VALE1IS, XZR". This is only used at EL1 or at EL2 with {E2H,TGE}=={1,1}, where it will target an unused entry for the reserved ASID in the kernel's own translation regime, and have no adverse affect. - __tlbi_sync_s1ish_hyp() uses "TLBI VALE2IS, XZR". This is only used in hyp code, where it will target an unused entry in the hyp code's TTBR0 mapping, and should have no adverse effect. * As __TLBI_0() and __TLBI_1() no longer replace each TLBI with a TLBI;DSB;TLBI sequence, batching TLBIs is worthwhile, and there's no need for arch_tlbbatch_should_defer() to consider ARM64_WORKAROUND_REPEAT_TLBI. When building defconfig with GCC 15.1.0, compared to v6.19-rc1, this patch saves ~1KiB of text, makes the vmlinux ~42KiB smaller, and makes the resulting Image 64KiB smaller: | [mark@lakrids:~/src/linux]% size vmlinux-* | text data bss dec hex filename | 21179831 19660919 708216 41548966 279fca6 vmlinux-after | 21181075 19660903 708216 41550194 27a0172 vmlinux-before | [mark@lakrids:~/src/linux]% ls -l vmlinux-* | -rwxr-xr-x 1 mark mark 157771472 Feb 4 12:05 vmlinux-after | -rwxr-xr-x 1 mark mark 157815432 Feb 4 12:05 vmlinux-before | [mark@lakrids:~/src/linux]% ls -l Image-* | -rw-r--r-- 1 mark mark 41007616 Feb 4 12:05 Image-after | -rw-r--r-- 1 mark mark 41073152 Feb 4 12:05 Image-before Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Oliver Upton Cc: Ryan Roberts Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 59 ++++++++++++++++++------------- arch/arm64/kernel/sys_compat.c | 2 +- arch/arm64/kvm/hyp/nvhe/mm.c | 2 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 8 ++--- arch/arm64/kvm/hyp/pgtable.c | 2 +- arch/arm64/kvm/hyp/vhe/tlb.c | 10 +++--- 6 files changed, 47 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bf1cc9949dc8..1416e652612b 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -31,18 +31,10 @@ */ #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ "tlbi " #op "\n" \ - ALTERNATIVE("nop\n nop", \ - "dsb ish\n tlbi " #op, \ - ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ "tlbi " #op ", %x0\n" \ - ALTERNATIVE("nop\n nop", \ - "dsb ish\n tlbi " #op ", %x0", \ - ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : "rZ" (arg)) #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) @@ -181,6 +173,34 @@ static inline unsigned long get_trans_granule(void) (__pages >> (5 * (scale) + 1)) - 1; \ }) +#define __repeat_tlbi_sync(op, arg...) \ +do { \ + if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \ + break; \ + __tlbi(op, ##arg); \ + dsb(ish); \ +} while (0) + +/* + * Complete broadcast TLB maintenance issued by the host which invalidates + * stage 1 information in the host's own translation regime. + */ +static inline void __tlbi_sync_s1ish(void) +{ + dsb(ish); + __repeat_tlbi_sync(vale1is, 0); +} + +/* + * Complete broadcast TLB maintenance issued by hyp code which invalidates + * stage 1 translation information in any translation regime. + */ +static inline void __tlbi_sync_s1ish_hyp(void) +{ + dsb(ish); + __repeat_tlbi_sync(vale2is, 0); +} + /* * TLB Invalidation * ================ @@ -279,7 +299,7 @@ static inline void flush_tlb_all(void) { dsb(ishst); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish(); isb(); } @@ -291,7 +311,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) asid = __TLBI_VADDR(0, ASID(mm)); __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); - dsb(ish); + __tlbi_sync_s1ish(); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } @@ -345,20 +365,11 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { flush_tlb_page_nosync(vma, uaddr); - dsb(ish); + __tlbi_sync_s1ish(); } static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) { - /* - * TLB flush deferral is not required on systems which are affected by - * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation - * will have two consecutive TLBI instructions with a dsb(ish) in between - * defeating the purpose (i.e save overall 'dsb ish' cost). - */ - if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) - return false; - return true; } @@ -374,7 +385,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) */ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { - dsb(ish); + __tlbi_sync_s1ish(); } /* @@ -509,7 +520,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, { __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, last_level, tlb_level); - dsb(ish); + __tlbi_sync_s1ish(); } static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, @@ -557,7 +568,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); __flush_tlb_range_op(vaale1is, start, pages, stride, 0, TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); - dsb(ish); + __tlbi_sync_s1ish(); isb(); } @@ -571,7 +582,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ishst); __tlbi(vaae1is, addr); - dsb(ish); + __tlbi_sync_s1ish(); isb(); } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 4a609e9b65de..b9d4998c97ef 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) * We pick the reserved-ASID to minimise the impact. */ __tlbi(aside1is, __TLBI_VADDR(0, 0)); - dsb(ish); + __tlbi_sync_s1ish(); } ret = caches_clean_inval_user_pou(start, start + chunk); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index ae8391baebc3..218976287d3f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -271,7 +271,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) */ dsb(ishst); __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 48da9ca9763f..3dc1ce0d27fe 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, */ dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -226,7 +226,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -240,7 +240,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) enter_vmid_context(mmu, &cxt, false); __tlbi(vmalls12e1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -266,5 +266,5 @@ void __kvm_flush_vm_context(void) /* Same remark as in enter_vmid_context() */ dsb(ish); __tlbi(alle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0e4ddd28ef5d..9b480f947da2 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -501,7 +501,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, *unmapped += granule; } - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); mm_ops->put_page(ctx->ptep); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index ec2569818629..35855dadfb1b 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -115,7 +115,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, */ dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -176,7 +176,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -192,7 +192,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) enter_vmid_context(mmu, &cxt); __tlbi(vmalls12e1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -217,7 +217,7 @@ void __kvm_flush_vm_context(void) { dsb(ishst); __tlbi(alle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); } /* @@ -358,7 +358,7 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) default: ret = -EINVAL; } - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); if (mmu) From 468711a40d5dfc01bf0a24c1981246a2c93ac405 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 10 Feb 2026 19:12:25 +0100 Subject: [PATCH 389/828] PCI: dwc: ep: Refresh MSI Message Address cache on change Endpoint drivers use dw_pcie_ep_raise_msi_irq() to raise MSI interrupts to the host. After 8719c64e76bf ("PCI: dwc: ep: Cache MSI outbound iATU mapping"), dw_pcie_ep_raise_msi_irq() caches the Message Address from the MSI Capability in ep->msi_msg_addr. But that Message Address is controlled by the host, and it may change. For example, if: - firmware on the host configures the Message Address and triggers an MSI, - a driver on the Endpoint raises the MSI via dw_pcie_ep_raise_msi_irq(), which caches the Message Address, - a kernel on the host reconfigures the Message Address and the host kernel driver triggers another MSI, dw_pcie_ep_raise_msi_irq() notices that the Message Address no longer matches the cached ep->msi_msg_addr, warns about it, and returns error instead of raising the MSI. The host kernel may hang because it never receives the MSI. This was seen with the nvmet_pci_epf_driver: the host UEFI performs NVMe commands, e.g. Identify Controller to get the name of the controller, nvmet-pci-epf posts the completion queue entry and raises an IRQ using dw_pcie_ep_raise_msi_irq(). When the host boots Linux, we see a WARN_ON_ONCE() from dw_pcie_ep_raise_msi_irq(), and the host kernel hangs because the nvme driver never gets an IRQ. Remove the warning when dw_pcie_ep_raise_msi_irq() notices that Message Address has changed, remap using the new address, and update the ep->msi_msg_addr cache. Fixes: 8719c64e76bf ("PCI: dwc: ep: Cache MSI outbound iATU mapping") Signed-off-by: Niklas Cassel [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Shin'ichiro Kawasaki Tested-by: Koichiro Den Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260210181225.3926165-2-cassel@kernel.org --- .../pci/controller/dwc/pcie-designware-ep.c | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 295076cf70de..35d8fc5e1d20 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -905,6 +905,19 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, * supported, so we avoid reprogramming the region on every MSI, * specifically unmapping immediately after writel(). */ + if (ep->msi_iatu_mapped && (ep->msi_msg_addr != msg_addr || + ep->msi_map_size != map_size)) { + /* + * The host changed the MSI target address or the required + * mapping size changed. Reprogramming the iATU when there are + * operations in flight is unsafe on this controller. However, + * there is no unified way to check if we have operations in + * flight, thus we don't know if we should WARN() or not. + */ + dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); + ep->msi_iatu_mapped = false; + } + if (!ep->msi_iatu_mapped) { ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, @@ -915,15 +928,6 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, ep->msi_iatu_mapped = true; ep->msi_msg_addr = msg_addr; ep->msi_map_size = map_size; - } else if (WARN_ON_ONCE(ep->msi_msg_addr != msg_addr || - ep->msi_map_size != map_size)) { - /* - * The host changed the MSI target address or the required - * mapping size changed. Reprogramming the iATU at runtime is - * unsafe on this controller, so bail out instead of trying to - * update the existing region. - */ - return -EINVAL; } writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset); From c22533c66ccae10511ad6a7afc34bb26c47577e3 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 11 Feb 2026 18:55:41 +0100 Subject: [PATCH 390/828] PCI: dwc: ep: Flush MSI-X write before unmapping its ATU entry Endpoint drivers use dw_pcie_ep_raise_msix_irq() to raise an MSI-X interrupt to the host using a writel(), which generates a PCI posted write transaction. There's no completion for posted writes, so the writel() may return before the PCI write completes. dw_pcie_ep_raise_msix_irq() also unmaps the outbound ATU entry used for the PCI write, so the write races with the unmap. If the PCI write loses the race with the ATU unmap, the write may corrupt host memory or cause IOMMU errors, e.g., these when running fio with a larger queue depth against nvmet-pci-epf: arm-smmu-v3 fc900000.iommu: 0x0000010000000010 arm-smmu-v3 fc900000.iommu: 0x0000020000000000 arm-smmu-v3 fc900000.iommu: 0x000000090000f040 arm-smmu-v3 fc900000.iommu: 0x0000000000000000 arm-smmu-v3 fc900000.iommu: event: F_TRANSLATION client: 0000:01:00.0 sid: 0x100 ssid: 0x0 iova: 0x90000f040 ipa: 0x0 arm-smmu-v3 fc900000.iommu: unpriv data write s1 "Input address caused fault" stag: 0x0 Flush the write by performing a readl() of the same address to ensure that the write has reached the destination before the ATU entry is unmapped. The same problem was solved for dw_pcie_ep_raise_msi_irq() in commit 8719c64e76bf ("PCI: dwc: ep: Cache MSI outbound iATU mapping"), but there it was solved by dedicating an outbound iATU only for MSI. We can't do the same for MSI-X because each vector can have a different msg_addr and the msg_addr may be changed while the vector is masked. Fixes: beb4641a787d ("PCI: dwc: Add MSI-X callbacks handler") Signed-off-by: Niklas Cassel [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Frank Li Link: https://patch.msgid.link/20260211175540.105677-2-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-designware-ep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 35d8fc5e1d20..c57ae4d6c5c0 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -1014,6 +1014,9 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, writel(msg_data, ep->msi_mem + offset); + /* flush posted write before unmap */ + readl(ep->msi_mem + offset); + dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); return 0; From 75c151ceaacf5ca8f2f34ebf863d88002fb12587 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 25 Feb 2026 12:47:52 -0800 Subject: [PATCH 391/828] accel/amdxdna: Use a different name for latest firmware Using legacy driver with latest firmware causes a power off issue. Fix this by assigning a different filename (npu_7.sbin) to the latest firmware. The driver attempts to load the latest firmware first and falls back to the previous firmware version if loading fails. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5009 Fixes: f1eac46fe5f7 ("accel/amdxdna: Update firmware version check for latest firmware") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260225204752.2711734-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_pci.c | 20 +++++++++++++++++++- drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +++ drivers/accel/amdxdna/npu1_regs.c | 2 +- drivers/accel/amdxdna/npu4_regs.c | 2 +- drivers/accel/amdxdna/npu5_regs.c | 2 +- drivers/accel/amdxdna/npu6_regs.c | 2 +- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 4b3e6bb97bd2..85079b6fc5d9 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -32,6 +32,11 @@ static int aie2_max_col = XRS_MAX_COL; module_param(aie2_max_col, uint, 0600); MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used"); +static char *npu_fw[] = { + "npu_7.sbin", + "npu.sbin" +}; + /* * The management mailbox channel is allocated by firmware. * The related register and ring buffer information is on SRAM BAR. @@ -489,6 +494,7 @@ static int aie2_init(struct amdxdna_dev *xdna) struct psp_config psp_conf; const struct firmware *fw; unsigned long bars = 0; + char *fw_full_path; int i, nvec, ret; if (!hypervisor_is_type(X86_HYPER_NATIVE)) { @@ -503,7 +509,19 @@ static int aie2_init(struct amdxdna_dev *xdna) ndev->priv = xdna->dev_info->dev_priv; ndev->xdna = xdna; - ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev); + for (i = 0; i < ARRAY_SIZE(npu_fw); i++) { + fw_full_path = kasprintf(GFP_KERNEL, "%s%s", ndev->priv->fw_path, npu_fw[i]); + if (!fw_full_path) + return -ENOMEM; + + ret = firmware_request_nowarn(&fw, fw_full_path, &pdev->dev); + kfree(fw_full_path); + if (!ret) { + XDNA_INFO(xdna, "Load firmware %s%s", ndev->priv->fw_path, npu_fw[i]); + break; + } + } + if (ret) { XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", ndev->priv->fw_path, ret); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 4ada45d06fcf..a4384593bdcc 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -23,6 +23,9 @@ MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); +MODULE_FIRMWARE("amdnpu/1502_00/npu_7.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu_7.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin"); /* * 0.0: Initial version diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index 6f36a27b5a02..6e3d3ca69c04 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -72,7 +72,7 @@ static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { }; static const struct amdxdna_dev_priv npu1_dev_priv = { - .fw_path = "amdnpu/1502_00/npu.sbin", + .fw_path = "amdnpu/1502_00/", .rt_config = npu1_default_rt_cfg, .dpm_clk_tbl = npu1_dpm_clk_table, .fw_feature_tbl = npu1_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index a8d6f76dde5f..ce25eef5fc34 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -98,7 +98,7 @@ const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { }; static const struct amdxdna_dev_priv npu4_dev_priv = { - .fw_path = "amdnpu/17f0_10/npu.sbin", + .fw_path = "amdnpu/17f0_10/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index c0a35cfd886c..c0ac5daf32ee 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -63,7 +63,7 @@ #define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE static const struct amdxdna_dev_priv npu5_dev_priv = { - .fw_path = "amdnpu/17f0_11/npu.sbin", + .fw_path = "amdnpu/17f0_11/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 1fb07df99186..ce591ed0d483 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -63,7 +63,7 @@ #define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE static const struct amdxdna_dev_priv npu6_dev_priv = { - .fw_path = "amdnpu/17f0_10/npu.sbin", + .fw_path = "amdnpu/17f0_10/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, From 49abfa812617a7f2d0132c70d23ac98b389c6ec1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Feb 2026 12:41:30 +0000 Subject: [PATCH 392/828] drm/amdgpu/userq: Fix reference leak in amdgpu_userq_wait_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop reference to syncobj and timeline fence when aborting the ioctl due output array being too small. Reviewed-by: Alex Deucher Signed-off-by: Tvrtko Ursulin Fixes: a292fdecd728 ("drm/amdgpu: Implement userqueue signal/wait IOCTL") Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 68951e9c3e6bb22396bc42ef2359751c8315dd27) Cc: # v6.16+ --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 8013260e29dc..9b9947b94b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -876,6 +876,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_fence_unwrap_for_each(f, &iter, fence) { if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } @@ -900,6 +901,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } From 7b7d7693a55d606d700beb9549c9f7f0e5d9c24f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Feb 2026 12:41:31 +0000 Subject: [PATCH 393/828] drm/amdgpu/userq: Do not allow userspace to trivially triger kernel warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can either deliberately pass in the too small num_fences, or the required number can legitimately grow between the two calls to the userq wait ioctl. In both cases we do not want the emit the kernel warning backtrace since nothing is wrong with the kernel and userspace will simply get an errno reported back. So lets simply drop the WARN_ONs. Reviewed-by: Alex Deucher Signed-off-by: Tvrtko Ursulin Fixes: a292fdecd728 ("drm/amdgpu: Implement userqueue signal/wait IOCTL") Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 2c333ea579de6cc20ea7bc50e9595ef72863e65c) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 9b9947b94b89..d972dc46f5a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -833,7 +833,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, DMA_RESV_USAGE_READ, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; goto free_fences; } @@ -850,7 +850,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, DMA_RESV_USAGE_WRITE, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; goto free_fences; } @@ -874,7 +874,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_fences; dma_fence_unwrap_for_each(f, &iter, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; dma_fence_put(fence); goto free_fences; @@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (r) goto free_fences; - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; dma_fence_put(fence); goto free_fences; From ea78f8c68f4f6211c557df49174c54d167821962 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 20 Feb 2026 13:47:58 +0530 Subject: [PATCH 394/828] drm/amdgpu: add upper bound check on user inputs in signal ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Huge input values in amdgpu_userq_signal_ioctl can lead to a OOM and could be exploited. So check these input value against AMDGPU_USERQ_MAX_HANDLES which is big enough value for genuine use cases and could potentially avoid OOM. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit be267e15f99bc97cbe202cd556717797cdcf79a5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index d972dc46f5a8..c5f5af20af75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -35,6 +35,8 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops; static struct kmem_cache *amdgpu_userq_fence_slab; +#define AMDGPU_USERQ_MAX_HANDLES (1U << 16) + int amdgpu_userq_fence_slab_init(void) { amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", @@ -478,6 +480,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_syncobj_handles = args->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), size_mul(sizeof(u32), num_syncobj_handles)); From 64ac7c09fc44985ec9bb6a9db740899fa40ca613 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 24 Feb 2026 12:13:09 +0530 Subject: [PATCH 395/828] drm/amdgpu: add upper bound check on user inputs in wait ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Huge input values in amdgpu_userq_wait_ioctl can lead to a OOM and could be exploited. So check these input value against AMDGPU_USERQ_MAX_HANDLES which is big enough value for genuine use cases and could potentially avoid OOM. v2: squash in Srini's fix Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit fcec012c664247531aed3e662f4280ff804d1476) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index c5f5af20af75..7e9cf1868cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -671,6 +671,11 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_read_bo_handles = wait_info->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), size_mul(sizeof(u32), num_read_bo_handles)); From 28dfe4317541e57fe52f9a290394cd29c348228b Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 23 Feb 2026 12:45:37 +0100 Subject: [PATCH 396/828] drm/amd/display: Use GFP_ATOMIC in dc_create_stream_for_sink This can be called while preemption is disabled, for example by dcn32_internal_validate_bw which is called with the FPU active. Fixes "BUG: scheduling while atomic" messages I encounter on my Navi31 machine. Signed-off-by: Natalie Vock Signed-off-by: Alex Deucher (cherry picked from commit b42dae2ebc5c84a68de63ec4ffdfec49362d53f1) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 246893d80f1f..baf820e6eae8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -170,11 +170,11 @@ struct dc_stream_state *dc_create_stream_for_sink( if (sink == NULL) goto fail; - stream = kzalloc_obj(struct dc_stream_state); + stream = kzalloc_obj(struct dc_stream_state, GFP_ATOMIC); if (stream == NULL) goto fail; - stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_KERNEL); + stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_ATOMIC); if (stream->update_scratch == NULL) goto fail; From 5e0bcc7b88bcd081aaae6f481b10d9ab294fcb69 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 14:00:07 -0800 Subject: [PATCH 397/828] drm/amdgpu: Unlock a mutex before destroying it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mutexes must be unlocked before these are destroyed. This has been detected by the Clang thread-safety analyzer. Cc: Alex Deucher Cc: Christian König Cc: Yang Wang Cc: Hawking Zhang Cc: amd-gfx@lists.freedesktop.org Fixes: f5e4cc8461c4 ("drm/amdgpu: implement RAS ACA driver framework") Reviewed-by: Yang Wang Acked-by: Christian König Signed-off-by: Bart Van Assche Signed-off-by: Alex Deucher (cherry picked from commit 270258ba320beb99648dceffb67e86ac76786e55) --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index afe5ca81beec..db7858fe0c3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -641,6 +641,7 @@ static void aca_error_fini(struct aca_error *aerr) aca_bank_error_remove(aerr, bank_error); out_unlock: + mutex_unlock(&aerr->lock); mutex_destroy(&aerr->lock); } From 480ad5f6ead4a47b969aab6618573cd6822bb6a4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 13:50:23 -0800 Subject: [PATCH 398/828] drm/amdgpu: Fix locking bugs in error paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not unlock psp->ras_context.mutex if it has not been locked. This has been detected by the Clang thread-safety analyzer. Cc: Alex Deucher Cc: Christian König Cc: YiPeng Chai Cc: Hawking Zhang Cc: amd-gfx@lists.freedesktop.org Fixes: b3fb79cda568 ("drm/amdgpu: add mutex to protect ras shared memory") Acked-by: Christian König Signed-off-by: Bart Van Assche Signed-off-by: Alex Deucher (cherry picked from commit 6fa01b4335978051d2cd80841728fd63cc597970) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 6e8aad91bcd3..0d3c18f04ac3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -332,13 +332,13 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size if (!context || !context->initialized) { dev_err(adev->dev, "TA is not initialized\n"); ret = -EINVAL; - goto err_free_shared_buf; + goto free_shared_buf; } if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) { dev_err(adev->dev, "Unsupported function to invoke TA\n"); ret = -EOPNOTSUPP; - goto err_free_shared_buf; + goto free_shared_buf; } context->session_id = ta_id; @@ -346,7 +346,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) - goto err_free_shared_buf; + goto unlock; ret = psp_fn_ta_invoke(psp, cmd_id); if (ret || context->resp_status) { @@ -354,15 +354,17 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret, context->resp_status); if (!ret) { ret = -EINVAL; - goto err_free_shared_buf; + goto unlock; } } if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; -err_free_shared_buf: +unlock: mutex_unlock(&psp->ras_context.mutex); + +free_shared_buf: kfree(shared_buf); return ret; From a5fe1a54513196e4bc8f9170006057dc31e7155e Mon Sep 17 00:00:00 2001 From: sguttula Date: Sat, 21 Feb 2026 10:03:32 +0530 Subject: [PATCH 399/828] drm/amdgpu/vcn5: Add SMU dpm interface type This will set AMDGPU_VCN_SMU_DPM_INTERFACE_* smu_type based on soc type and fixing ring timeout issue seen for DPM enabled case. Signed-off-by: sguttula Reviewed-by: Pratik Vishwakarma Signed-off-by: Alex Deucher (cherry picked from commit f0f23c315b38c55e8ce9484cf59b65811f350630) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 0202df5db1e1..6109124f852e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -174,6 +174,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); From b57c4ec98c17789136a4db948aec6daadceb5024 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 24 Feb 2026 10:18:51 +0530 Subject: [PATCH 400/828] drm/amdgpu: Fix error handling in slot reset If the device has not recovered after slot reset is called, it goes to out label for error handling. There it could make decision based on uninitialized hive pointer and could result in accessing an uninitialized list. Initialize the list and hive properly so that it handles the error situation and also releases the reset domain lock which is acquired during error_detected callback. Fixes: 732c6cefc1ec ("drm/amdgpu: Replace tmp_adev with hive in amdgpu_pci_slot_reset") Signed-off-by: Lijo Lazar Reviewed-by: Ce Sun Signed-off-by: Alex Deucher (cherry picked from commit bb71362182e59caa227e4192da5a612b09349696) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d9789e0b5201..3e19b51a2763 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -7059,6 +7059,15 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, &device_list); + } else { + list_add_tail(&adev->reset_list, &device_list); + } if (adev->pcie_reset_ctx.swus) link_dev = adev->pcie_reset_ctx.swus; @@ -7099,19 +7108,13 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); - INIT_LIST_HEAD(&device_list); - hive = amdgpu_get_xgmi_hive(adev); if (hive) { - mutex_lock(&hive->hive_lock); reset_context.hive = hive; - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) tmp_adev->pcie_reset_ctx.in_link_reset = true; - list_add_tail(&tmp_adev->reset_list, &device_list); - } } else { set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - list_add_tail(&adev->reset_list, &device_list); } r = amdgpu_device_asic_reset(adev, &device_list, &reset_context); From 6b0d812971370c64b837a2db4275410f478272fe Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 25 Feb 2026 10:51:16 -0600 Subject: [PATCH 401/828] drm/amd: Disable MES LR compute W/A A workaround was introduced in commit 1fb710793ce2 ("drm/amdgpu: Enable MES lr_compute_wa by default") to help with some hangs observed in gfx1151. This WA didn't fully fix the issue. It was actually fixed by adjusting the VGPR size to the correct value that matched the hardware in commit b42f3bf9536c ("drm/amdkfd: bump minimum vgpr size for gfx1151"). There are reports of instability on other products with newer GC microcode versions, and I believe they're caused by this workaround. As we don't need the workaround any more, remove it. Fixes: b42f3bf9536c ("drm/amdkfd: bump minimum vgpr size for gfx1151") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 9973e64bd6ee7642860a6f3b6958cbf14e89cabd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 09ebb13ca5e8..a926a330700e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -720,11 +720,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(mes->adev->dev, - "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b1c864dc79a8..5bfa5d1d0b36 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -779,11 +779,6 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(adev->dev, - "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell From 2a064262eb378263792cf1fb044de631ac41bcc5 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 25 Feb 2026 21:15:23 +0000 Subject: [PATCH 402/828] sched_ext: Fix out-of-bounds access in scx_idle_init_masks() scx_idle_node_masks is allocated with num_possible_nodes() elements but indexed by NUMA node IDs via for_each_node(). On systems with non-contiguous NUMA node numbering (e.g. nodes 0 and 4), node IDs can exceed the array size, causing out-of-bounds memory corruption. Use nr_node_ids instead, which represents the maximum node ID range and is the correct size for arrays indexed by node ID. Fixes: 7c60329e3521 ("sched_ext: Add NUMA-awareness to the default idle selection policy") Signed-off-by: David Carlier Signed-off-by: Tejun Heo --- kernel/sched/ext_idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 3d9d404d5cd2..e2da6c3968a6 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -663,8 +663,8 @@ void scx_idle_init_masks(void) BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.cpu, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.smt, GFP_KERNEL)); - /* Allocate per-node idle cpumasks */ - scx_idle_node_masks = kcalloc(num_possible_nodes(), + /* Allocate per-node idle cpumasks (use nr_node_ids for non-contiguous NUMA nodes) */ + scx_idle_node_masks = kcalloc(nr_node_ids, sizeof(*scx_idle_node_masks), GFP_KERNEL); BUG_ON(!scx_idle_node_masks); From 12133a483dfa832241fbbf09321109a0ea8a520e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 12:28:30 +0100 Subject: [PATCH 403/828] nfc: pn533: properly drop the usb interface reference on disconnect When the device is disconnected from the driver, there is a "dangling" reference count on the usb interface that was grabbed in the probe callback. Fix this up by properly dropping the reference after we are done with it. Cc: stable Signed-off-by: Greg Kroah-Hartman Reviewed-by: Simon Horman Fixes: c46ee38620a2 ("NFC: pn533: add NXP pn533 nfc device driver") Link: https://patch.msgid.link/2026022329-flashing-ought-7573@gregkh Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 018a80674f06..0f12f86ebb02 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -628,6 +628,7 @@ static void pn533_usb_disconnect(struct usb_interface *interface) usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); kfree(phy->ack_buffer); + usb_put_dev(phy->udev); nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n"); } From 11de1d3ae5565ed22ef1f89d73d8f2d00322c699 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:58:48 +0100 Subject: [PATCH 404/828] net: usb: pegasus: validate USB endpoints The pegasus driver should validate that the device it is probing has the proper number and types of USB endpoints it is expecting before it binds to it. If a malicious device were to not have the same urbs the driver will crash later on when it blindly accesses these endpoints. Cc: Petko Manolov Cc: stable Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022347-legibly-attest-cc5c@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/usb/pegasus.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index b84968c5f074..c497202b78e8 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -812,8 +812,19 @@ static void unlink_all_urbs(pegasus_t *pegasus) static int alloc_urbs(pegasus_t *pegasus) { + static const u8 bulk_ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + 3 | USB_DIR_IN, + 0}; int res = -ENOMEM; + if (!usb_check_bulk_endpoints(pegasus->intf, bulk_ep_addr) || + !usb_check_int_endpoints(pegasus->intf, int_ep_addr)) + return -ENODEV; + pegasus->rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!pegasus->rx_urb) { return res; @@ -1168,6 +1179,7 @@ static int pegasus_probe(struct usb_interface *intf, pegasus = netdev_priv(net); pegasus->dev_index = dev_index; + pegasus->intf = intf; res = alloc_urbs(pegasus); if (res < 0) { @@ -1179,7 +1191,6 @@ static int pegasus_probe(struct usb_interface *intf, INIT_DELAYED_WORK(&pegasus->carrier_check, check_carrier); - pegasus->intf = intf; pegasus->usb = dev; pegasus->net = net; From c58b6c29a4c9b8125e8ad3bca0637e00b71e2693 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:59:26 +0100 Subject: [PATCH 405/828] net: usb: kalmia: validate USB endpoints The kalmia driver should validate that the device it is probing has the proper number and types of USB endpoints it is expecting before it binds to it. If a malicious device were to not have the same urbs the driver will crash later on when it blindly accesses these endpoints. Cc: stable Signed-off-by: Greg Kroah-Hartman Reviewed-by: Simon Horman Fixes: d40261236e8e ("net/usb: Add Samsung Kalmia driver for Samsung GT-B3730") Link: https://patch.msgid.link/2026022326-shack-headstone-ef6f@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/usb/kalmia.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 613fc6910f14..ee9c48f7f68f 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -132,11 +132,18 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf) { int status; u8 ethernet_addr[ETH_ALEN]; + static const u8 ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; /* Don't bind to AT command interface */ if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) return -EINVAL; + if (!usb_check_bulk_endpoints(intf, ep_addr)) + return -ENODEV; + dev->in = usb_rcvbulkpipe(dev->udev, 0x81 & USB_ENDPOINT_NUMBER_MASK); dev->out = usb_sndbulkpipe(dev->udev, 0x02 & USB_ENDPOINT_NUMBER_MASK); dev->status = NULL; From 4b063c002ca759d1b299988ee23f564c9609c875 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 14:00:06 +0100 Subject: [PATCH 406/828] net: usb: kaweth: validate USB endpoints The kaweth driver should validate that the device it is probing has the proper number and types of USB endpoints it is expecting before it binds to it. If a malicious device were to not have the same urbs the driver will crash later on when it blindly accesses these endpoints. Cc: stable Signed-off-by: Greg Kroah-Hartman Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://patch.msgid.link/2026022305-substance-virtual-c728@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/usb/kaweth.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index e01d14f6c366..cb2472b59e10 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -883,6 +883,13 @@ static int kaweth_probe( const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; int rv = -EIO; + static const u8 bulk_ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + 3 | USB_DIR_IN, + 0}; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", @@ -896,6 +903,12 @@ static int kaweth_probe( (int)udev->descriptor.bLength, (int)udev->descriptor.bDescriptorType); + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(dev, "couldn't find required endpoints\n"); + return -ENODEV; + } + netdev = alloc_etherdev(sizeof(*kaweth)); if (!netdev) return -ENOMEM; From 5cc619583c7e735c4fb801bede671fb6f9c79425 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 18:32:18 +0100 Subject: [PATCH 407/828] vsock: Use container_of() to get net namespace in sysctl handlers current->nsproxy is should not be accessed directly as syzbot has found that it could be NULL at times, causing crashes. Fix up the af_vsock sysctl handlers to use container_of() to deal with the current net namespace instead of attempting to rely on current. This is the same type of change done in commit 7f5611cbc487 ("rds: sysctl: rds_tcp_{rcv,snd}buf: avoid using current->nsproxy") Signed-off-by: Greg Kroah-Hartman Reviewed-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Fixes: eafb64f40ca4 ("vsock: add netns to vsock core") Link: https://patch.msgid.link/2026022318-rearview-gallery-ae13@gregkh Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9880756d9eff..f4062c6a1944 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2825,7 +2825,7 @@ static int vsock_net_mode_string(const struct ctl_table *table, int write, if (write) return -EPERM; - net = current->nsproxy->net_ns; + net = container_of(table->data, struct net, vsock.mode); return __vsock_net_mode_string(table, write, buffer, lenp, ppos, vsock_net_mode(net), NULL); @@ -2838,7 +2838,7 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, struct net *net; int ret; - net = current->nsproxy->net_ns; + net = container_of(table->data, struct net, vsock.child_ns_mode); ret = __vsock_net_mode_string(table, write, buffer, lenp, ppos, vsock_net_child_mode(net), &new_mode); From 1e3bb184e94125bae7c1703472109a646d0f79d9 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Tue, 24 Feb 2026 09:20:12 +0100 Subject: [PATCH 408/828] tcp: re-enable acceptance of FIN packets when RWIN is 0 Commit 2bd99aef1b19 ("tcp: accept bare FIN packets under memory pressure") allowed accepting FIN packets in tcp_data_queue() even when the receive window was closed, to prevent ACK/FIN loops with broken clients. Such a FIN packet is in sequence, but because the FIN consumes a sequence number, it extends beyond the window. Before commit 9ca48d616ed7 ("tcp: do not accept packets beyond window"), tcp_sequence() only required the seq to be within the window. After that change, the entire packet (including the FIN) must fit within the window. As a result, such FIN packets are now dropped and the handling path is no longer reached. Be more lenient by not counting the sequence number consumed by the FIN when calling tcp_sequence(), restoring the previous behavior for cases where only the FIN extends beyond the window. Fixes: 9ca48d616ed7 ("tcp: do not accept packets beyond window") Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260224-fix_zero_wnd_fin-v2-1-a16677ea7cea@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e7b41abb82aa..1c6b8ca67918 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4858,15 +4858,24 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk, */ static enum skb_drop_reason tcp_sequence(const struct sock *sk, - u32 seq, u32 end_seq) + u32 seq, u32 end_seq, + const struct tcphdr *th) { const struct tcp_sock *tp = tcp_sk(sk); + u32 seq_limit; if (before(end_seq, tp->rcv_wup)) return SKB_DROP_REASON_TCP_OLD_SEQUENCE; - if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) { - if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) + seq_limit = tp->rcv_nxt + tcp_receive_window(tp); + if (unlikely(after(end_seq, seq_limit))) { + /* Some stacks are known to handle FIN incorrectly; allow the + * FIN to extend beyond the window and check it in detail later. + */ + if (!after(end_seq - th->fin, seq_limit)) + return SKB_NOT_DROPPED_YET; + + if (after(seq, seq_limit)) return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; /* Only accept this packet if receive queue is empty. */ @@ -6379,7 +6388,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, step1: /* Step 1: check sequence number */ - reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, th); if (reason) { /* RFC793, page 37: "In all states except SYN-SENT, all reset * (RST) segments are validated by checking their SEQ-fields." From 74511332309844c3de64970841b3c250d85f34ce Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Tue, 24 Feb 2026 09:20:13 +0100 Subject: [PATCH 409/828] selftests/net: packetdrill: Verify acceptance of FIN packets when RWIN is 0 Add a packetdrill test that verifies we accept bare FIN packets when the advertised receive window is zero. Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260224-fix_zero_wnd_fin-v2-2-a16677ea7cea@gmail.com Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_rcv_zero_wnd_fin.pkt | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt new file mode 100644 index 000000000000..e245359a1a91 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Some TCP stacks send FINs even though the window is closed. We break +// a possible FIN/ACK loop by accepting the FIN. + +--mss=1000 + +`./defaults.sh` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:60001(60000) ack 1 win 257 + * > . 1:1(0) ack 60001 win 0 + + +0 < F. 60001:60001(0) ack 1 win 257 + +0 > . 1:1(0) ack 60002 win 0 From 676c7af91fcd740d34e7cb788cbc58e3bcafde39 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 19:04:04 +0800 Subject: [PATCH 410/828] dpll: zl3073x: Remove redundant cleanup in devm_dpll_init() The devm_add_action_or_reset() function already executes the cleanup action on failure before returning an error, so the explicit goto error and subsequent zl3073x_dev_dpll_fini() call causes double cleanup. Fixes: ebb1031c5137 ("dpll: zl3073x: Refactor DPLL initialization") Reviewed-by: Ivan Vecera Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260224-dpll-v2-1-d7786414a830@gmail.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 8c5ee28e02f4..37f3c33570ee 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -981,11 +981,7 @@ zl3073x_devm_dpll_init(struct zl3073x_dev *zldev, u8 num_dplls) } /* Add devres action to release DPLL related resources */ - rc = devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); - if (rc) - goto error; - - return 0; + return devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); error: zl3073x_dev_dpll_fini(zldev); From 916864e5eda0c52572b11b4cacce41b89622cc35 Mon Sep 17 00:00:00 2001 From: Mohd Ayaan Anwar Date: Tue, 24 Feb 2026 17:58:47 +0530 Subject: [PATCH 411/828] MAINTAINERS: Update maintainer entry for QUALCOMM ETHQOS ETHERNET DRIVER Replace Vinod Koul with Mohd Ayaan Anwar as the maintainer of the QUALCOMM ETHQOS ETHERNET DRIVER. Vinod confirmed he is no longer active in this area and agreed to be removed. Acked-by: Vinod Koul Suggested-by: Russell King (Oracle) Signed-off-by: Mohd Ayaan Anwar Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260224-qcom_ethqos_maintainer-v1-1-24e02701ea52@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0313ca6039..a461baa96483 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21692,7 +21692,7 @@ S: Maintained F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM ETHQOS ETHERNET DRIVER -M: Vinod Koul +M: Mohd Ayaan Anwar L: netdev@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained From f975a0955276579e2176a134366ed586071c7c6a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Tue, 24 Feb 2026 04:38:36 -0800 Subject: [PATCH 412/828] net: mana: Fix double destroy_workqueue on service rescan PCI path While testing corner cases in the driver, a use-after-free crash was found on the service rescan PCI path. When mana_serv_reset() calls mana_gd_suspend(), mana_gd_cleanup() destroys gc->service_wq. If the subsequent mana_gd_resume() fails with -ETIMEDOUT or -EPROTO, the code falls through to mana_serv_rescan() which triggers pci_stop_and_remove_bus_device(). This invokes the PCI .remove callback (mana_gd_remove), which calls mana_gd_cleanup() a second time, attempting to destroy the already- freed workqueue. Fix this by NULL-checking gc->service_wq in mana_gd_cleanup() and setting it to NULL after destruction. Call stack of issue for reference: [Sat Feb 21 18:53:48 2026] Call Trace: [Sat Feb 21 18:53:48 2026] [Sat Feb 21 18:53:48 2026] mana_gd_cleanup+0x33/0x70 [mana] [Sat Feb 21 18:53:48 2026] mana_gd_remove+0x3a/0xc0 [mana] [Sat Feb 21 18:53:48 2026] pci_device_remove+0x41/0xb0 [Sat Feb 21 18:53:48 2026] device_remove+0x46/0x70 [Sat Feb 21 18:53:48 2026] device_release_driver_internal+0x1e3/0x250 [Sat Feb 21 18:53:48 2026] device_release_driver+0x12/0x20 [Sat Feb 21 18:53:48 2026] pci_stop_bus_device+0x6a/0x90 [Sat Feb 21 18:53:48 2026] pci_stop_and_remove_bus_device+0x13/0x30 [Sat Feb 21 18:53:48 2026] mana_do_service+0x180/0x290 [mana] [Sat Feb 21 18:53:48 2026] mana_serv_func+0x24/0x50 [mana] [Sat Feb 21 18:53:48 2026] process_one_work+0x190/0x3d0 [Sat Feb 21 18:53:48 2026] worker_thread+0x16e/0x2e0 [Sat Feb 21 18:53:48 2026] kthread+0xf7/0x130 [Sat Feb 21 18:53:48 2026] ? __pfx_worker_thread+0x10/0x10 [Sat Feb 21 18:53:48 2026] ? __pfx_kthread+0x10/0x10 [Sat Feb 21 18:53:48 2026] ret_from_fork+0x269/0x350 [Sat Feb 21 18:53:48 2026] ? __pfx_kthread+0x10/0x10 [Sat Feb 21 18:53:48 2026] ret_from_fork_asm+0x1a/0x30 [Sat Feb 21 18:53:48 2026] Fixes: 505cc26bcae0 ("net: mana: Add support for auxiliary device servicing events") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Reviewed-by: Simon Horman Link: https://patch.msgid.link/aZ2bzL64NagfyHpg@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 5 ++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 0055c231acf6..3926d18f1840 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1946,7 +1946,10 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_gd_remove_irqs(pdev); - destroy_workqueue(gc->service_wq); + if (gc->service_wq) { + destroy_workqueue(gc->service_wq); + gc->service_wq = NULL; + } dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9b5a72ada5c4..f69e42651359 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3762,7 +3762,9 @@ void mana_rdma_remove(struct gdma_dev *gd) } WRITE_ONCE(gd->rdma_teardown, true); - flush_workqueue(gc->service_wq); + + if (gc->service_wq) + flush_workqueue(gc->service_wq); if (gd->adev) remove_adev(gd); From bb4c698633c0e19717586a6524a33196cff01a32 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 24 Feb 2026 14:57:08 +0200 Subject: [PATCH 413/828] team: avoid NETDEV_CHANGEMTU event when unregistering slave syzbot is reporting unregister_netdevice: waiting for netdevsim0 to become free. Usage count = 3 ref_tracker: netdev@ffff88807dcf8618 has 1/2 users at __netdev_tracker_alloc include/linux/netdevice.h:4400 [inline] netdev_hold include/linux/netdevice.h:4429 [inline] inetdev_init+0x201/0x4e0 net/ipv4/devinet.c:286 inetdev_event+0x251/0x1610 net/ipv4/devinet.c:1600 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85 call_netdevice_notifiers_mtu net/core/dev.c:2318 [inline] netif_set_mtu_ext+0x5aa/0x800 net/core/dev.c:9886 netif_set_mtu+0xd7/0x1b0 net/core/dev.c:9907 dev_set_mtu+0x126/0x260 net/core/dev_api.c:248 team_port_del+0xb07/0xcb0 drivers/net/team/team_core.c:1333 team_del_slave drivers/net/team/team_core.c:1936 [inline] team_device_event+0x207/0x5b0 drivers/net/team/team_core.c:2929 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2281 [inline] call_netdevice_notifiers net/core/dev.c:2295 [inline] __dev_change_net_namespace+0xcb7/0x2050 net/core/dev.c:12592 do_setlink+0x2ce/0x4590 net/core/rtnetlink.c:3060 rtnl_changelink net/core/rtnetlink.c:3776 [inline] __rtnl_newlink net/core/rtnetlink.c:3935 [inline] rtnl_newlink+0x15a9/0x1be0 net/core/rtnetlink.c:4072 rtnetlink_rcv_msg+0x7d5/0xbe0 net/core/rtnetlink.c:6958 netlink_rcv_skb+0x232/0x4b0 net/netlink/af_netlink.c:2550 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] netlink_unicast+0x80f/0x9b0 net/netlink/af_netlink.c:1344 netlink_sendmsg+0x813/0xb40 net/netlink/af_netlink.c:1894 problem. Ido Schimmel found steps to reproduce ip link add name team1 type team ip link add name dummy1 mtu 1499 master team1 type dummy ip netns add ns1 ip link set dev dummy1 netns ns1 ip -n ns1 link del dev dummy1 and also found that the same issue was fixed in the bond driver in commit f51048c3e07b ("bonding: avoid NETDEV_CHANGEMTU event when unregistering slave"). Let's do similar thing for the team driver, with commit ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") and commit 303a8487a657 ("net: s/__dev_set_mtu/__netif_set_mtu/") also applied. Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 Suggested-by: Ido Schimmel Reviewed-by: Jiri Pirko Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Tetsuo Handa Signed-off-by: Ido Schimmel Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260224125709.317574-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index c08a5c1bd6e4..a0fe998cc055 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1292,7 +1292,7 @@ err_set_mtu: static void __team_port_change_port_removed(struct team_port *port); -static int team_port_del(struct team *team, struct net_device *port_dev) +static int team_port_del(struct team *team, struct net_device *port_dev, bool unregister) { struct net_device *dev = team->dev; struct team_port *port; @@ -1330,7 +1330,13 @@ static int team_port_del(struct team *team, struct net_device *port_dev) __team_port_change_port_removed(port); team_port_set_orig_dev_addr(port); - dev_set_mtu(port_dev, port->orig.mtu); + if (unregister) { + netdev_lock_ops(port_dev); + __netif_set_mtu(port_dev, port->orig.mtu); + netdev_unlock_ops(port_dev); + } else { + dev_set_mtu(port_dev, port->orig.mtu); + } kfree_rcu(port, rcu); netdev_info(dev, "Port device %s removed\n", portname); netdev_compute_master_upper_features(team->dev, true); @@ -1634,7 +1640,7 @@ static void team_uninit(struct net_device *dev) ASSERT_RTNL(); list_for_each_entry_safe(port, tmp, &team->port_list, list) - team_port_del(team, port->dev); + team_port_del(team, port->dev, false); __team_change_mode(team, NULL); /* cleanup */ __team_options_unregister(team, team_options, ARRAY_SIZE(team_options)); @@ -1933,7 +1939,16 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) ASSERT_RTNL(); - return team_port_del(team, port_dev); + return team_port_del(team, port_dev, false); +} + +static int team_del_slave_on_unregister(struct net_device *dev, struct net_device *port_dev) +{ + struct team *team = netdev_priv(dev); + + ASSERT_RTNL(); + + return team_port_del(team, port_dev, true); } static netdev_features_t team_fix_features(struct net_device *dev, @@ -2926,7 +2941,7 @@ static int team_device_event(struct notifier_block *unused, !!netif_oper_up(port->dev)); break; case NETDEV_UNREGISTER: - team_del_slave(port->team->dev, dev); + team_del_slave_on_unregister(port->team->dev, dev); break; case NETDEV_FEAT_CHANGE: if (!port->team->notifier_ctx) { @@ -2999,3 +3014,4 @@ MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jiri Pirko "); MODULE_DESCRIPTION("Ethernet team device driver"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); From 58f8ef625e23a607f4a89758d6b328a2701f7354 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 24 Feb 2026 14:57:09 +0200 Subject: [PATCH 414/828] selftests: team: Add a reference count leak test Add a test for the issue that was fixed in "team: avoid NETDEV_CHANGEMTU event when unregistering slave". The test hangs due to a reference count leak without the fix: # make -C tools/testing/selftests TARGETS="drivers/net/team" TEST_PROGS=refleak.sh TEST_GEN_PROGS="" run_tests [...] TAP version 13 1..1 # timeout set to 45 # selftests: drivers/net/team: refleak.sh [ 50.681299][ T496] unregister_netdevice: waiting for dummy1 to become free. Usage count = 3 [ 71.185325][ T496] unregister_netdevice: waiting for dummy1 to become free. Usage count = 3 And passes with the fix: # make -C tools/testing/selftests TARGETS="drivers/net/team" TEST_PROGS=refleak.sh TEST_GEN_PROGS="" run_tests [...] TAP version 13 1..1 # timeout set to 45 # selftests: drivers/net/team: refleak.sh ok 1 selftests: drivers/net/team: refleak.sh Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260224125709.317574-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/team/Makefile | 1 + .../selftests/drivers/net/team/refleak.sh | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/team/refleak.sh diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 1340b3df9c31..45a3e7ad3dcb 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -5,6 +5,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ options.sh \ propagation.sh \ + refleak.sh \ # end of TEST_PROGS TEST_INCLUDES := \ diff --git a/tools/testing/selftests/drivers/net/team/refleak.sh b/tools/testing/selftests/drivers/net/team/refleak.sh new file mode 100755 index 000000000000..ef08213ab964 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/refleak.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +# Test that there is no reference count leak and that dummy1 can be deleted. +# https://lore.kernel.org/netdev/4d69abe1-ca8d-4f0b-bcf8-13899b211e57@I-love.SAKURA.ne.jp/ +setup_ns ns1 ns2 +ip -n "$ns1" link add name team1 type team +ip -n "$ns1" link add name dummy1 mtu 1499 type dummy +ip -n "$ns1" link set dev dummy1 master team1 +ip -n "$ns1" link set dev dummy1 netns "$ns2" +ip -n "$ns2" link del dev dummy1 From 2700b7e603af39ca55fe9fc876ca123efd44680f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:48 +0200 Subject: [PATCH 415/828] net/mlx5: DR, Fix circular locking dependency in dump Fix a circular locking dependency between dbg_mutex and the domain rx/tx mutexes that could lead to a deadlock. The dump path in dr_dump_domain_all() was acquiring locks in the order: dbg_mutex -> rx.mutex -> tx.mutex While the table/matcher creation paths acquire locks in the order: rx.mutex -> tx.mutex -> dbg_mutex This inverted lock ordering creates a circular dependency. Fix this by changing dr_dump_domain_all() to acquire the domain lock before dbg_mutex, matching the order used in mlx5dr_table_create() and mlx5dr_matcher_create(). Lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.19.0-rc6net_next_e817c4e #1 Not tainted ------------------------------------------------------ sos/30721 is trying to acquire lock: ffff888102df5900 (&dmn->info.rx.mutex){+.+.}-{4:4}, at: dr_dump_start+0x131/0x450 [mlx5_core] but task is already holding lock: ffff888102df5bc0 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}, at: dr_dump_start+0x10b/0x450 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}: __mutex_lock+0x91/0x1060 mlx5dr_matcher_create+0x377/0x5e0 [mlx5_core] mlx5_cmd_dr_create_flow_group+0x62/0xd0 [mlx5_core] mlx5_create_flow_group+0x113/0x1c0 [mlx5_core] mlx5_chains_create_prio+0x453/0x2290 [mlx5_core] mlx5_chains_get_table+0x2e2/0x980 [mlx5_core] esw_chains_create+0x1e6/0x3b0 [mlx5_core] esw_create_offloads_fdb_tables.cold+0x62/0x63f [mlx5_core] esw_offloads_enable+0x76f/0xd20 [mlx5_core] mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core] mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core] devlink_nl_eswitch_set_doit+0x67/0xe0 genl_family_rcv_msg_doit+0xe0/0x130 genl_rcv_msg+0x188/0x290 netlink_rcv_skb+0x4b/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1ed/0x2c0 netlink_sendmsg+0x210/0x450 __sock_sendmsg+0x38/0x60 __sys_sendto+0x119/0x180 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x70/0xd00 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #1 (&dmn->info.tx.mutex){+.+.}-{4:4}: __mutex_lock+0x91/0x1060 mlx5dr_table_create+0x11d/0x530 [mlx5_core] mlx5_cmd_dr_create_flow_table+0x62/0x140 [mlx5_core] __mlx5_create_flow_table+0x46f/0x960 [mlx5_core] mlx5_create_flow_table+0x16/0x20 [mlx5_core] esw_create_offloads_fdb_tables+0x136/0x240 [mlx5_core] esw_offloads_enable+0x76f/0xd20 [mlx5_core] mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core] mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core] devlink_nl_eswitch_set_doit+0x67/0xe0 genl_family_rcv_msg_doit+0xe0/0x130 genl_rcv_msg+0x188/0x290 netlink_rcv_skb+0x4b/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1ed/0x2c0 netlink_sendmsg+0x210/0x450 __sock_sendmsg+0x38/0x60 __sys_sendto+0x119/0x180 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x70/0xd00 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #0 (&dmn->info.rx.mutex){+.+.}-{4:4}: __lock_acquire+0x18b6/0x2eb0 lock_acquire+0xd3/0x2c0 __mutex_lock+0x91/0x1060 dr_dump_start+0x131/0x450 [mlx5_core] seq_read_iter+0xe3/0x410 seq_read+0xfb/0x130 full_proxy_read+0x53/0x80 vfs_read+0xba/0x330 ksys_read+0x65/0xe0 do_syscall_64+0x70/0xd00 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dmn->dump_info.dbg_mutex); lock(&dmn->info.tx.mutex); lock(&dmn->dump_info.dbg_mutex); lock(&dmn->info.rx.mutex); *** DEADLOCK *** Fixes: 9222f0b27da2 ("net/mlx5: DR, Add support for dumping steering info") Signed-off-by: Shay Drory Reviewed-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c index 8803fa071c50..18362e9c3314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c @@ -1051,8 +1051,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) struct mlx5dr_table *tbl; int ret; - mutex_lock(&dmn->dump_info.dbg_mutex); mlx5dr_domain_lock(dmn); + mutex_lock(&dmn->dump_info.dbg_mutex); ret = dr_dump_domain(file, dmn); if (ret < 0) @@ -1065,8 +1065,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) } unlock_mutex: - mlx5dr_domain_unlock(dmn); mutex_unlock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_unlock(dmn); return ret; } From bd7b9f83fb9f85228c3ac9748d9cba9fab7fb5a2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:49 +0200 Subject: [PATCH 416/828] net/mlx5: LAG, disable MPESW in lag_disable_change() mlx5_lag_disable_change() unconditionally called mlx5_disable_lag() when LAG was active, which is incorrect for MLX5_LAG_MODE_MPESW. Hnece, call mlx5_disable_mpesw() when running in MPESW mode. Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h | 5 +++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 9fe47c836ebd..859f042caf79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1869,8 +1869,12 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) mutex_lock(&ldev->lock); ldev->mode_changes_in_progress++; - if (__mlx5_lag_is_active(ldev)) - mlx5_disable_lag(ldev); + if (__mlx5_lag_is_active(ldev)) { + if (ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_lag_disable_mpesw(ldev); + else + mlx5_disable_lag(ldev); + } mutex_unlock(&ldev->lock); mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 04762562d7d9..a63d48d18878 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -65,7 +65,7 @@ err_metadata: return err; } -static int enable_mpesw(struct mlx5_lag *ldev) +static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0; int err; @@ -126,7 +126,7 @@ err_add_devices: return err; } -static void disable_mpesw(struct mlx5_lag *ldev) +void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) { if (ldev->mode == MLX5_LAG_MODE_MPESW) { mlx5_mpesw_metadata_cleanup(ldev); @@ -152,9 +152,9 @@ static void mlx5_mpesw_work(struct work_struct *work) } if (mpesww->op == MLX5_MPESW_OP_ENABLE) - mpesww->result = enable_mpesw(ldev); + mpesww->result = mlx5_lag_enable_mpesw(ldev); else if (mpesww->op == MLX5_MPESW_OP_DISABLE) - disable_mpesw(ldev); + mlx5_lag_disable_mpesw(ldev); unlock: mutex_unlock(&ldev->lock); mlx5_devcom_comp_unlock(devcom); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index f5d9b5c97b0d..b767dbb4f457 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -31,6 +31,11 @@ int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); +#ifdef CONFIG_MLX5_ESWITCH +void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev); +#else +static inline void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) {} +#endif /* CONFIG_MLX5_ESWITCH */ #ifdef CONFIG_MLX5_ESWITCH void mlx5_mpesw_speed_update_work(struct work_struct *work); From d7073e8b978ae925f1f0f08754f33f84d8547ea7 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:50 +0200 Subject: [PATCH 417/828] net/mlx5: E-switch, Clear legacy flag when moving to switchdev The cited commit introduced MLX5_PRIV_FLAGS_SWITCH_LEGACY to identify when a transition to legacy mode is requested via devlink. However, the logic failed to clear this flag if the mode was subsequently changed back to MLX5_ESWITCH_OFFLOADS (switchdev). Consequently, if a user toggled from legacy to switchdev, the flag remained set, leaving the driver with wrong state indicating Fix this by explicitly clearing the MLX5_PRIV_FLAGS_SWITCH_LEGACY bit when the requested mode is MLX5_ESWITCH_OFFLOADS. Fixes: 2a4f56fbcc47 ("net/mlx5e: Keep netdev when leave switchdev for devlink set legacy only") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1b439cef3719..9d51d030596c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4068,6 +4068,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (mlx5_mode == MLX5_ESWITCH_LEGACY) esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) + esw->dev->priv.flags &= ~MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_eswitch_disable_locked(esw); if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { From 60253042c0b87b61596368489c44d12ba720d11c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:51 +0200 Subject: [PATCH 418/828] net/mlx5: Fix missing devlink lock in SRIOV enable error path The cited commit miss to add locking in the error path of mlx5_sriov_enable(). When pci_enable_sriov() fails, mlx5_device_disable_sriov() is called to clean up. This cleanup function now expects to be called with the devlink instance lock held. Add the missing devl_lock(devlink) and devl_unlock(devlink) Fixes: 84a433a40d0e ("net/mlx5: Lock mlx5 devlink reload callbacks") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a2fc937d5461..172862a70c70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -193,7 +193,9 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); + devl_lock(devlink); mlx5_device_disable_sriov(dev, num_vfs, true, true); + devl_unlock(devlink); } return err; } From 859380694f434597407632c29f30fdb5e763e6cc Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 24 Feb 2026 13:46:52 +0200 Subject: [PATCH 419/828] net/mlx5e: Fix "scheduling while atomic" in IPsec MAC address query Fix a "scheduling while atomic" bug in mlx5e_ipsec_init_macs() by replacing mlx5_query_mac_address() with ether_addr_copy() to get the local MAC address directly from netdev->dev_addr. The issue occurs because mlx5_query_mac_address() queries the hardware which involves mlx5_cmd_exec() that can sleep, but it is called from the mlx5e_ipsec_handle_event workqueue which runs in atomic context. The MAC address is already available in netdev->dev_addr, so no need to query hardware. This avoids the sleeping call and resolves the bug. Call trace: BUG: scheduling while atomic: kworker/u112:2/69344/0x00000200 __schedule+0x7ab/0xa20 schedule+0x1c/0xb0 schedule_timeout+0x6e/0xf0 __wait_for_common+0x91/0x1b0 cmd_exec+0xa85/0xff0 [mlx5_core] mlx5_cmd_exec+0x1f/0x50 [mlx5_core] mlx5_query_nic_vport_mac_address+0x7b/0xd0 [mlx5_core] mlx5_query_mac_address+0x19/0x30 [mlx5_core] mlx5e_ipsec_init_macs+0xc1/0x720 [mlx5_core] mlx5e_ipsec_build_accel_xfrm_attrs+0x422/0x670 [mlx5_core] mlx5e_ipsec_handle_event+0x2b9/0x460 [mlx5_core] process_one_work+0x178/0x2e0 worker_thread+0x2ea/0x430 Fixes: cee137a63431 ("net/mlx5e: Handle ESN update events") Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 9c7064187ed0..f03507a522b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -259,7 +259,6 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { - struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); struct mlx5e_ipsec_addr *addrs = &attrs->addrs; struct net_device *netdev = sa_entry->dev; struct xfrm_state *x = sa_entry->x; @@ -276,7 +275,7 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - mlx5_query_mac_address(mdev, addr); + ether_addr_copy(addr, netdev->dev_addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac; From aa4876fe2d9fcbcaa0592b25f34ec6f6ea7876c1 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 9 Feb 2026 21:41:49 +0800 Subject: [PATCH 420/828] ALSA: hda/realtek: add quirk for Acer Nitro ANV15-51 fix mute/micmute LEDs and headset microphone for Acer Nitro ANV15-51. [ The headset microphone issue is solved by Kailang] Link: https://bugzilla.kernel.org/show_bug.cgi?id=220279 Cc: stable@vger.kernel.org Signed-off-by: Zhang Heng Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260209134149.3076957-1-zhangheng@kylinos.cn --- sound/hda/codecs/realtek/alc269.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 43ecfc63ef87..86bb22d19629 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -4075,6 +4075,7 @@ enum { ALC236_FIXUP_HP_MUTE_LED_MICMUTE_GPIO, ALC233_FIXUP_LENOVO_GPIO2_MIC_HOTKEY, ALC245_FIXUP_BASS_HP_DAC, + ALC245_FIXUP_ACER_MICMUTE_LED, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6599,6 +6600,12 @@ static const struct hda_fixup alc269_fixups[] = { /* Borrow the DAC routing selected for those Thinkpads */ .v.func = alc285_fixup_thinkpad_x1_gen7, }, + [ALC245_FIXUP_ACER_MICMUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_coef_micmute_led, + .chained = true, + .chain_id = ALC2XX_FIXUP_HEADSET_MIC, + } }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -6651,6 +6658,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x159c, "Acer Nitro 5 AN515-58", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1597, "Acer Nitro 5 AN517-55", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED), + SND_PCI_QUIRK(0x1025, 0x171e, "Acer Nitro ANV15-51", ALC245_FIXUP_ACER_MICMUTE_LED), SND_PCI_QUIRK(0x1025, 0x1826, "Acer Helios ZPC", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), SND_PCI_QUIRK(0x1025, 0x182c, "Acer Helios ZPD", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), SND_PCI_QUIRK(0x1025, 0x1844, "Acer Helios ZPS", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), From 54f9d645a5453d0bfece0c465d34aaf072ea99fa Mon Sep 17 00:00:00 2001 From: Jun Seo Date: Thu, 26 Feb 2026 10:08:20 +0900 Subject: [PATCH 421/828] ALSA: usb-audio: Use correct version for UAC3 header validation The entry of the validators table for UAC3 AC header descriptor is defined with the wrong protocol version UAC_VERSION_2, while it should have been UAC_VERSION_3. This results in the validator never matching for actual UAC3 devices (protocol == UAC_VERSION_3), causing their header descriptors to bypass validation entirely. A malicious USB device presenting a truncated UAC3 header could exploit this to cause out-of-bounds reads when the driver later accesses unvalidated descriptor fields. The bug was introduced in the same commit as the recently fixed UAC3 feature unit sub-type typo, and appears to be from the same copy-paste error when the UAC3 section was created from the UAC2 section. Fixes: 57f8770620e9 ("ALSA: usb-audio: More validations of descriptor units") Cc: Signed-off-by: Jun Seo Link: https://patch.msgid.link/20260226010820.36529-1-jun.seo.93@proton.me Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 4bb4893f6e74..f62b7cc041dc 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -281,7 +281,7 @@ static const struct usb_desc_validator audio_validators[] = { /* UAC_VERSION_2, UAC2_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC3 */ - FIXED(UAC_VERSION_2, UAC_HEADER, struct uac3_ac_header_descriptor), + FIXED(UAC_VERSION_3, UAC_HEADER, struct uac3_ac_header_descriptor), FIXED(UAC_VERSION_3, UAC_INPUT_TERMINAL, struct uac3_input_terminal_descriptor), FIXED(UAC_VERSION_3, UAC_OUTPUT_TERMINAL, From cd3c877d04683b44a4d50dcdfad54b356e65d158 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 24 Feb 2026 07:46:37 -0800 Subject: [PATCH 422/828] iomap: don't report direct-io retries to fserror iomap's directio implementation has two magic errno codes that it uses to signal callers -- ENOTBLK tells the filesystem that it should retry a write with the pagecache; and EAGAIN tells the caller that pagecache flushing or invalidation failed and that it should try again. Neither of these indicate data loss, so let's not report them. Fixes: a9d573ee88af98 ("iomap: report file I/O errors to the VFS") Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/20260224154637.GD2390381@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 95254aa1b654..e911daedff65 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -87,6 +87,19 @@ static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio) return FSERR_DIRECTIO_READ; } +static inline bool should_report_dio_fserror(const struct iomap_dio *dio) +{ + switch (dio->error) { + case 0: + case -EAGAIN: + case -ENOTBLK: + /* don't send fsnotify for success or magic retry codes */ + return false; + default: + return true; + } +} + ssize_t iomap_dio_complete(struct iomap_dio *dio) { const struct iomap_dio_ops *dops = dio->dops; @@ -96,7 +109,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (dops && dops->end_io) ret = dops->end_io(iocb, dio->size, ret, dio->flags); - if (dio->error) + if (should_report_dio_fserror(dio)) fserror_report_io(file_inode(iocb->ki_filp), iomap_dio_err_type(dio), offset, dio->size, dio->error, GFP_NOFS); From 28aaa9c39945b7925a1cc1d513c8f21ed38f5e4f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Feb 2026 10:43:55 +0100 Subject: [PATCH 423/828] kthread: consolidate kthread exit paths to prevent use-after-free Guillaume reported crashes via corrupted RCU callback function pointers during KUnit testing. The crash was traced back to the pidfs rhashtable conversion which replaced the 24-byte rb_node with an 8-byte rhash_head in struct pid, shrinking it from 160 to 144 bytes. struct kthread (without CONFIG_BLK_CGROUP) is also 144 bytes. With CONFIG_SLAB_MERGE_DEFAULT and SLAB_HWCACHE_ALIGN both round up to 192 bytes and share the same slab cache. struct pid.rcu.func and struct kthread.affinity_node both sit at offset 0x78. When a kthread exits via make_task_dead() it bypasses kthread_exit() and misses the affinity_node cleanup. free_kthread_struct() frees the memory while the node is still linked into the global kthread_affinity_list. A subsequent list_del() by another kthread writes through dangling list pointers into the freed and reused memory, corrupting the pid's rcu.func pointer. Instead of patching free_kthread_struct() to handle the missed cleanup, consolidate all kthread exit paths. Turn kthread_exit() into a macro that calls do_exit() and add kthread_do_exit() which is called from do_exit() for any task with PF_KTHREAD set. This guarantees that kthread-specific cleanup always happens regardless of the exit path - make_task_dead(), direct do_exit(), or kthread_exit(). Replace __to_kthread() with a new tsk_is_kthread() accessor in the public header. Export do_exit() since module code using the kthread_exit() macro now needs it directly. Reported-by: Guillaume Tucker Tested-by: Guillaume Tucker Tested-by: Mark Brown Tested-by: David Gow Cc: Link: https://lore.kernel.org/all/20260224-mittlerweile-besessen-2738831ae7f6@brauner Co-developed-by: Linus Torvalds Fixes: 4d13f4304fa4 ("kthread: Implement preferred affinity") Signed-off-by: Linus Torvalds Signed-off-by: Christian Brauner --- include/linux/kthread.h | 21 ++++++++++++++++++++- kernel/exit.c | 6 ++++++ kernel/kthread.c | 41 +++++------------------------------------ 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c92c1149ee6e..a01a474719a7 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -7,6 +7,24 @@ struct mm_struct; +/* opaque kthread data */ +struct kthread; + +/* + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. + * + * Return NULL for any task that is not a kthread. + */ +static inline struct kthread *tsk_is_kthread(struct task_struct *p) +{ + if (p->flags & PF_KTHREAD) + return p->worker_private; + return NULL; +} + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_exit(long result) __noreturn; +#define kthread_exit(result) do_exit(result) void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreads_update_housekeeping(void); +void kthread_do_exit(struct kthread *, long); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/kernel/exit.c b/kernel/exit.c index 8a87021211ae..ede3117fa7d4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -896,11 +896,16 @@ static void synchronize_group_exit(struct task_struct *tsk, long code) void __noreturn do_exit(long code) { struct task_struct *tsk = current; + struct kthread *kthread; int group_dead; WARN_ON(irqs_disabled()); WARN_ON(tsk->plug); + kthread = tsk_is_kthread(tsk); + if (unlikely(kthread)) + kthread_do_exit(kthread, code); + kcov_task_exit(tsk); kmsan_task_exit(tsk); @@ -1013,6 +1018,7 @@ void __noreturn do_exit(long code) lockdep_free_task(tsk); do_task_dead(); } +EXPORT_SYMBOL(do_exit); void __noreturn make_task_dead(int signr) { diff --git a/kernel/kthread.c b/kernel/kthread.c index 20451b624b67..791210daf8b4 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -85,24 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k) return k->worker_private; } -/* - * Variant of to_kthread() that doesn't assume @p is a kthread. - * - * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will - * always remain a kthread. For kthreads p->worker_private always - * points to a struct kthread. For tasks that are not kthreads - * p->worker_private is used to point to other things. - * - * Return NULL for any task that is not a kthread. - */ -static inline struct kthread *__to_kthread(struct task_struct *p) -{ - void *kthread = p->worker_private; - if (kthread && !(p->flags & PF_KTHREAD)) - kthread = NULL; - return kthread; -} - void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk) { struct kthread *kthread = to_kthread(tsk); @@ -193,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park); bool kthread_should_stop_or_park(void) { - struct kthread *kthread = __to_kthread(current); + struct kthread *kthread = tsk_is_kthread(current); if (!kthread) return false; @@ -234,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); */ void *kthread_func(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); if (kthread) return kthread->threadfn; return NULL; @@ -266,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data); */ void *kthread_probe_data(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); void *data = NULL; if (kthread) @@ -309,19 +291,8 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -/** - * kthread_exit - Cause the current kthread return @result to kthread_stop(). - * @result: The integer value to return to kthread_stop(). - * - * While kthread_exit can be called directly, it exists so that - * functions which do some additional work in non-modular code such as - * module_put_and_kthread_exit can be implemented. - * - * Does not return. - */ -void __noreturn kthread_exit(long result) +void kthread_do_exit(struct kthread *kthread, long result) { - struct kthread *kthread = to_kthread(current); kthread->result = result; if (!list_empty(&kthread->affinity_node)) { mutex_lock(&kthread_affinity_lock); @@ -333,9 +304,7 @@ void __noreturn kthread_exit(long result) kthread->preferred_affinity = NULL; } } - do_exit(0); } -EXPORT_SYMBOL(kthread_exit); /** * kthread_complete_and_exit - Exit the current kthread. @@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu) bool kthread_is_per_cpu(struct task_struct *p) { - struct kthread *kthread = __to_kthread(p); + struct kthread *kthread = tsk_is_kthread(p); if (!kthread) return false; From 7c2889af823340d1d410939b9d547bf184d5fa54 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Feb 2026 15:48:59 +0200 Subject: [PATCH 424/828] RDMA/uverbs: Import DMA-BUF module in uverbs_std_types_dmabuf file Fix the following compilation error: ERROR: modpost: module ib_uverbs uses symbol dma_buf_move_notify from namespace DMA_BUF, but does not import it. Fixes: 0ac6f4056c4a ("RDMA/uverbs: Add DMABUF object type and operations") Link: https://patch.msgid.link/20260225-fix-uverbs-compilation-v1-1-acf7b3d0f9fa@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_dmabuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_std_types_dmabuf.c b/drivers/infiniband/core/uverbs_std_types_dmabuf.c index dfdfcd1d1a44..4a7f8b6f9dc8 100644 --- a/drivers/infiniband/core/uverbs_std_types_dmabuf.c +++ b/drivers/infiniband/core/uverbs_std_types_dmabuf.c @@ -10,6 +10,8 @@ #include "rdma_core.h" #include "uverbs.h" +MODULE_IMPORT_NS("DMA_BUF"); + static int uverbs_dmabuf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) { From a382a34276cb94d1cdc620b622dd85c55589a166 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Feb 2026 14:38:32 -0800 Subject: [PATCH 425/828] selftests/vsock: change tests to respect write-once child ns mode The child_ns_mode sysctl parameter becomes write-once in a future patch in this series, which breaks existing tests. This patch updates the tests to respect this new policy. No additional tests are added. Add "global-parent" and "local-parent" namespaces as intermediaries to spawn namespaces in the given modes. This avoids the need to change "child_ns_mode" in the init_ns. nsenter must be used because ip netns unshares the mount namespace so nested "ip netns add" breaks exec calls from the init ns. Adds nsenter to the deps check. Reviewed-by: Stefano Garzarella Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-1-c0cde6959923@meta.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/vsock/vmtest.sh | 39 +++++++++++++------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index dc8dbe74a6d0..86e338886b33 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -210,16 +210,21 @@ check_result() { } add_namespaces() { - local orig_mode - orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) + ip netns add "global-parent" 2>/dev/null + echo "global" | ip netns exec "global-parent" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null + ip netns add "local-parent" 2>/dev/null + echo "local" | ip netns exec "local-parent" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null - for mode in "${NS_MODES[@]}"; do - echo "${mode}" > /proc/sys/net/vsock/child_ns_mode - ip netns add "${mode}0" 2>/dev/null - ip netns add "${mode}1" 2>/dev/null - done - - echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode + nsenter --net=/var/run/netns/global-parent \ + ip netns add "global0" 2>/dev/null + nsenter --net=/var/run/netns/global-parent \ + ip netns add "global1" 2>/dev/null + nsenter --net=/var/run/netns/local-parent \ + ip netns add "local0" 2>/dev/null + nsenter --net=/var/run/netns/local-parent \ + ip netns add "local1" 2>/dev/null } init_namespaces() { @@ -237,6 +242,8 @@ del_namespaces() { log_host "removed ns ${mode}0" log_host "removed ns ${mode}1" done + ip netns del "global-parent" &>/dev/null + ip netns del "local-parent" &>/dev/null } vm_ssh() { @@ -287,7 +294,7 @@ check_args() { } check_deps() { - for dep in vng ${QEMU} busybox pkill ssh ss socat; do + for dep in vng ${QEMU} busybox pkill ssh ss socat nsenter; do if [[ ! -x $(command -v "${dep}") ]]; then echo -e "skip: dependency ${dep} not found!\n" exit "${KSFT_SKIP}" @@ -1231,12 +1238,8 @@ test_ns_local_same_cid_ok() { } test_ns_host_vsock_child_ns_mode_ok() { - local orig_mode - local rc + local rc="${KSFT_PASS}" - orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) - - rc="${KSFT_PASS}" for mode in "${NS_MODES[@]}"; do local ns="${mode}0" @@ -1246,15 +1249,13 @@ test_ns_host_vsock_child_ns_mode_ok() { continue fi - if ! echo "${mode}" > /proc/sys/net/vsock/child_ns_mode; then - log_host "child_ns_mode should be writable to ${mode}" + if ! echo "${mode}" | ip netns exec "${ns}" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null; then rc="${KSFT_FAIL}" continue fi done - echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode - return "${rc}" } From 102eab95f025b4d3f3a6c0a858400aca2af2fe52 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Feb 2026 14:38:33 -0800 Subject: [PATCH 426/828] vsock: lock down child_ns_mode as write-once Two administrator processes may race when setting child_ns_mode as one process sets child_ns_mode to "local" and then creates a namespace, but another process changes child_ns_mode to "global" between the write and the namespace creation. The first process ends up with a namespace in "global" mode instead of "local". While this can be detected after the fact by reading ns_mode and retrying, it is fragile and error-prone. Make child_ns_mode write-once so that a namespace manager can set it once and be sure it won't change. Writing a different value after the first write returns -EBUSY. This applies to all namespaces, including init_net, where an init process can write "local" to lock all future namespaces into local mode. Fixes: eafb64f40ca4 ("vsock: add netns to vsock core") Suggested-by: Daan De Meyer Suggested-by: Stefano Garzarella Co-developed-by: Stefano Garzarella Signed-off-by: Stefano Garzarella Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-2-c0cde6959923@meta.com Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 13 +++++++++++-- include/net/netns/vsock.h | 3 +++ net/vmw_vsock/af_vsock.c | 15 ++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d3ff48a2fbe0..533d8e75f7bb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -276,10 +276,19 @@ static inline bool vsock_net_mode_global(struct vsock_sock *vsk) return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; } -static inline void vsock_net_set_child_mode(struct net *net, +static inline bool vsock_net_set_child_mode(struct net *net, enum vsock_net_mode mode) { - WRITE_ONCE(net->vsock.child_ns_mode, mode); + int new_locked = mode + 1; + int old_locked = 0; /* unlocked */ + + if (try_cmpxchg(&net->vsock.child_ns_mode_locked, + &old_locked, new_locked)) { + WRITE_ONCE(net->vsock.child_ns_mode, mode); + return true; + } + + return old_locked == new_locked; } static inline enum vsock_net_mode vsock_net_child_mode(struct net *net) diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h index b34d69a22fa8..dc8cbe45f406 100644 --- a/include/net/netns/vsock.h +++ b/include/net/netns/vsock.h @@ -17,5 +17,8 @@ struct netns_vsock { enum vsock_net_mode mode; enum vsock_net_mode child_ns_mode; + + /* 0 = unlocked, 1 = locked to global, 2 = locked to local */ + int child_ns_mode_locked; }; #endif /* __NET_NET_NAMESPACE_VSOCK_H */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index f4062c6a1944..2f7d94d682cb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -90,16 +90,20 @@ * * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's * mode, which is set at namespace creation and immutable thereafter. - * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future + * - /proc/sys/net/vsock/child_ns_mode (write-once) controls what mode future * child namespaces will inherit when created. The initial value matches * the namespace's own ns_mode. * * Changing child_ns_mode only affects newly created namespaces, not the * current namespace or existing children. A "local" namespace cannot set - * child_ns_mode to "global". At namespace creation, ns_mode is inherited - * from the parent's child_ns_mode. + * child_ns_mode to "global". child_ns_mode is write-once, so that it may be + * configured and locked down by a namespace manager. Writing a different + * value after the first write returns -EBUSY. At namespace creation, ns_mode + * is inherited from the parent's child_ns_mode. * - * The init_net mode is "global" and cannot be modified. + * The init_net mode is "global" and cannot be modified. The init_net + * child_ns_mode is also write-once, so an init process (e.g. systemd) can + * set it to "local" to ensure all new namespaces inherit local mode. * * The modes affect the allocation and accessibility of CIDs as follows: * @@ -2853,7 +2857,8 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, new_mode == VSOCK_NET_MODE_GLOBAL) return -EPERM; - vsock_net_set_child_mode(net, new_mode); + if (!vsock_net_set_child_mode(net, new_mode)) + return -EBUSY; } return 0; From b6302e057fdc8f199ddae736ecdf45029f892e5c Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Feb 2026 14:38:34 -0800 Subject: [PATCH 427/828] vsock: document write-once behavior of the child_ns_mode sysctl Update the vsock child_ns_mode documentation to include the new write-once semantics of setting child_ns_mode. The semantics are implemented in a preceding patch in this series. Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-3-c0cde6959923@meta.com Signed-off-by: Paolo Abeni --- Documentation/admin-guide/sysctl/net.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index c10530624f1e..3b2ad61995d4 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -594,6 +594,9 @@ Values: their sockets will only be able to connect within their own namespace. +The first write to ``child_ns_mode`` locks its value. Subsequent writes of the +same value succeed, but writing a different value returns ``-EBUSY``. + Changing ``child_ns_mode`` only affects namespaces created after the change; it does not modify the current namespace or any existing children. From 7aa767d0d3d04e50ae94e770db7db8197f666970 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Feb 2026 15:51:00 -0800 Subject: [PATCH 428/828] net: consume xmit errors of GSO frames udpgro_frglist.sh and udpgro_bench.sh are the flakiest tests currently in NIPA. They fail in the same exact way, TCP GRO test stalls occasionally and the test gets killed after 10min. These tests use veth to simulate GRO. They attach a trivial ("return XDP_PASS;") XDP program to the veth to force TSO off and NAPI on. Digging into the failure mode we can see that the connection is completely stuck after a burst of drops. The sender's snd_nxt is at sequence number N [1], but the receiver claims to have received (rcv_nxt) up to N + 3 * MSS [2]. Last piece of the puzzle is that senders rtx queue is not empty (let's say the block in the rtx queue is at sequence number N - 4 * MSS [3]). In this state, sender sends a retransmission from the rtx queue with a single segment, and sequence numbers N-4*MSS:N-3*MSS [3]. Receiver sees it and responds with an ACK all the way up to N + 3 * MSS [2]. But sender will reject this ack as TCP_ACK_UNSENT_DATA because it has no recollection of ever sending data that far out [1]. And we are stuck. The root cause is the mess of the xmit return codes. veth returns an error when it can't xmit a frame. We end up with a loss event like this: ------------------------------------------------- | GSO super frame 1 | GSO super frame 2 | |-----------------------------------------------| | seg | seg | seg | seg | seg | seg | seg | seg | | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ------------------------------------------------- x ok ok | ok ok ok \\ snd_nxt "x" means packet lost by veth, and "ok" means it went thru. Since veth has TSO disabled in this test it sees individual segments. Segment 1 is on the retransmit queue and will be resent. So why did the sender not advance snd_nxt even tho it clearly did send up to seg 8? tcp_write_xmit() interprets the return code from the core to mean that data has not been sent at all. Since TCP deals with GSO super frames, not individual segment the crux of the problem is that loss of a single segment can be interpreted as loss of all. TCP only sees the last return code for the last segment of the GSO frame (in <> brackets in the diagram above). Of course for the problem to occur we need a setup or a device without a Qdisc. Otherwise Qdisc layer disconnects the protocol layer from the device errors completely. We have multiple ways to fix this. 1) make veth not return an error when it lost a packet. While this is what I think we did in the past, the issue keeps reappearing and it's annoying to debug. The game of whack a mole is not great. 2) fix the damn return codes We only talk about NETDEV_TX_OK and NETDEV_TX_BUSY in the documentation, so maybe we should make the return code from ndo_start_xmit() a boolean. I like that the most, but perhaps some ancient, not-really-networking protocol would suffer. 3) make TCP ignore the errors It is not entirely clear to me what benefit TCP gets from interpreting the result of ip_queue_xmit()? Specifically once the connection is established and we're pushing data - packet loss is just packet loss? 4) this fix Ignore the rc in the Qdisc-less+GSO case, since it's unreliable. We already always return OK in the TCQ_F_CAN_BYPASS case. In the Qdisc-less case let's be a bit more conservative and only mask the GSO errors. This path is taken by non-IP-"networks" like CAN, MCTP etc, so we could regress some ancient thing. This is the simplest, but also maybe the hackiest fix? Similar fix has been proposed by Eric in the past but never committed because original reporter was working with an OOT driver and wasn't providing feedback (see Link). Link: https://lore.kernel.org/CANn89iJcLepEin7EtBETrZ36bjoD9LrR=k4cfwWh046GB+4f9A@mail.gmail.com Fixes: 1f59533f9ca5 ("qdisc: validate frames going through the direct_xmit path") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260223235100.108939-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/core/dev.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f3426385f1ba..3d2d3b18915c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4822,6 +4822,8 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) * to -1 or to their cpu id, but not to our id. */ if (READ_ONCE(txq->xmit_lock_owner) != cpu) { + bool is_list = false; + if (dev_xmit_recursion()) goto recursion_alert; @@ -4832,17 +4834,28 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { + is_list = !!skb->next; + dev_xmit_recursion_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); dev_xmit_recursion_dec(); - if (dev_xmit_complete(rc)) { - HARD_TX_UNLOCK(dev, txq); - goto out; - } + + /* GSO segments a single SKB into + * a list of frames. TCP expects error + * to mean none of the data was sent. + */ + if (is_list) + rc = NETDEV_TX_OK; } HARD_TX_UNLOCK(dev, txq); + if (!skb) /* xmit completed */ + goto out; + net_crit_ratelimited("Virtual device %s asks to queue packet!\n", dev->name); + /* NETDEV_TX_BUSY or queue was stopped */ + if (!is_list) + rc = -ENETDOWN; } else { /* Recursion is detected! It is possible, * unfortunately @@ -4850,10 +4863,10 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) recursion_alert: net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", dev->name); + rc = -ENETDOWN; } } - rc = -ENETDOWN; rcu_read_unlock_bh(); dev_core_stats_tx_dropped_inc(dev); From 8a5752c6dcc085a3bfc78589925182e4e98468c5 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Tue, 24 Feb 2026 19:05:56 +0800 Subject: [PATCH 429/828] dpaa2-switch: validate num_ifs to prevent out-of-bounds write The driver obtains sw_attr.num_ifs from firmware via dpsw_get_attributes() but never validates it against DPSW_MAX_IF (64). This value controls iteration in dpaa2_switch_fdb_get_flood_cfg(), which writes port indices into the fixed-size cfg->if_id[DPSW_MAX_IF] array. When firmware reports num_ifs >= 64, the loop can write past the array bounds. Add a bound check for num_ifs in dpaa2_switch_init(). dpaa2_switch_fdb_get_flood_cfg() appends the control interface (port num_ifs) after all matched ports. When num_ifs == DPSW_MAX_IF and all ports match the flood filter, the loop fills all 64 slots and the control interface write overflows by one entry. The check uses >= because num_ifs == DPSW_MAX_IF is also functionally broken. build_if_id_bitmap() silently drops any ID >= 64: if (id[i] < DPSW_MAX_IF) bmap[id[i] / 64] |= ... Fixes: 539dda3c5d19 ("staging: dpaa2-switch: properly setup switching domains") Signed-off-by: Junrui Luo Reviewed-by: Ioana Ciornei Link: https://patch.msgid.link/SYBPR01MB78812B47B7F0470B617C408AAF74A@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 66240c340492..78e21b46a5ba 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3034,6 +3034,13 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev) goto err_close; } + if (ethsw->sw_attr.num_ifs >= DPSW_MAX_IF) { + dev_err(dev, "DPSW num_ifs %u exceeds max %u\n", + ethsw->sw_attr.num_ifs, DPSW_MAX_IF); + err = -EINVAL; + goto err_close; + } + err = dpsw_get_api_version(ethsw->mc_io, 0, ðsw->major, ðsw->minor); From 003ce8c9b2ca28fbb4860651e76fb1c9a91f2ea1 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 26 Feb 2026 11:17:28 +0000 Subject: [PATCH 430/828] ALSA: hda: cs35l56: Fix signedness error in cs35l56_hda_posture_put() In cs35l56_hda_posture_put() assign ucontrol->value.integer.value[0] to a long instead of an unsigned long. ucontrol->value.integer.value[0] is a long. This fixes the sparse warning: sound/hda/codecs/side-codecs/cs35l56_hda.c:256:20: warning: unsigned value that used to be signed checked against zero? sound/hda/codecs/side-codecs/cs35l56_hda.c:252:29: signed value source Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea8 ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://patch.msgid.link/20260226111728.1700431-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index cfc8de2ae499..eb66827eabf8 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -249,7 +249,7 @@ static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol); - unsigned long pos = ucontrol->value.integer.value[0]; + long pos = ucontrol->value.integer.value[0]; bool changed; int ret; From baed0d9ba91d4f390da12d5039128ee897253d60 Mon Sep 17 00:00:00 2001 From: Vahagn Vardanian Date: Wed, 25 Feb 2026 14:06:18 +0100 Subject: [PATCH 431/828] netfilter: nf_conntrack_h323: fix OOB read in decode_choice() In decode_choice(), the boundary check before get_len() uses the variable `len`, which is still 0 from its initialization at the top of the function: unsigned int type, ext, len = 0; ... if (ext || (son->attr & OPEN)) { BYTE_ALIGN(bs); if (nf_h323_error_boundary(bs, len, 0)) /* len is 0 here */ return H323_ERROR_BOUND; len = get_len(bs); /* OOB read */ When the bitstream is exactly consumed (bs->cur == bs->end), the check nf_h323_error_boundary(bs, 0, 0) evaluates to (bs->cur + 0 > bs->end), which is false. The subsequent get_len() call then dereferences *bs->cur++, reading 1 byte past the end of the buffer. If that byte has bit 7 set, get_len() reads a second byte as well. This can be triggered remotely by sending a crafted Q.931 SETUP message with a User-User Information Element containing exactly 2 bytes of PER-encoded data ({0x08, 0x00}) to port 1720 through a firewall with the nf_conntrack_h323 helper active. The decoder fully consumes the PER buffer before reaching this code path, resulting in a 1-2 byte heap-buffer-overflow read confirmed by AddressSanitizer. Fix this by checking for 2 bytes (the maximum that get_len() may read) instead of the uninitialized `len`. This matches the pattern used at every other get_len() call site in the same file, where the caller checks for 2 bytes of available data before calling get_len(). Fixes: ec8a8f3c31dd ("netfilter: nf_ct_h323: Extend nf_h323_error_boundary to work on bits as well") Signed-off-by: Vahagn Vardanian Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260225130619.1248-2-fw@strlen.de Signed-off-by: Paolo Abeni --- net/netfilter/nf_conntrack_h323_asn1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 540d97715bd2..62aa22a07876 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -796,7 +796,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (ext || (son->attr & OPEN)) { BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, len, 0)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); if (nf_h323_error_boundary(bs, len, 0)) From 36d9579fed6c9429aa172f77bd28c58696ce8e2b Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Tue, 10 Feb 2026 19:09:32 +0100 Subject: [PATCH 432/828] drm/solomon: Fix page start when updating rectangle in page addressing mode In page addressing mode, the pixel values of a dirty rectangle must be sent to the display controller one page at a time. The range of pages corresponding to a given rectangle is being incorrectly calculated as if the Y value of the top left coordinate of the rectangle was 0. This can result in rectangle updates being displayed on wrong parts of the screen. Fix the above issue by consolidating the start page calculation in a single place at the beginning of the update_rect function, and using the calculated value for all addressing modes. Fixes: b0daaa5cfaa5 ("drm/ssd130x: Support page addressing mode") Signed-off-by: Francesco Lavra Reviewed-by: Javier Martinez Canillas Link: https://patch.msgid.link/20260210180932.736502-1-flavra@baylibre.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/solomon/ssd130x.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 6ecf9e2ff61b..c77455b1834d 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -737,6 +737,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x, unsigned int height = drm_rect_height(rect); unsigned int line_length = DIV_ROUND_UP(width, 8); unsigned int page_height = SSD130X_PAGE_HEIGHT; + u8 page_start = ssd130x->page_offset + y / page_height; unsigned int pages = DIV_ROUND_UP(height, page_height); struct drm_device *drm = &ssd130x->drm; u32 array_idx = 0; @@ -774,14 +775,11 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x, */ if (!ssd130x->page_address_mode) { - u8 page_start; - /* Set address range for horizontal addressing mode */ ret = ssd130x_set_col_range(ssd130x, ssd130x->col_offset + x, width); if (ret < 0) return ret; - page_start = ssd130x->page_offset + y / page_height; ret = ssd130x_set_page_range(ssd130x, page_start, pages); if (ret < 0) return ret; @@ -813,7 +811,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x, */ if (ssd130x->page_address_mode) { ret = ssd130x_set_page_pos(ssd130x, - ssd130x->page_offset + i, + page_start + i, ssd130x->col_offset + x); if (ret < 0) return ret; From 786ea2b694f48e1b34f1dcf104e09357fc99ef34 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 26 Feb 2026 12:41:15 +0000 Subject: [PATCH 433/828] ALSA: hda: cs35l56: Remove unnecessary struct cs_dsp_client_ops Since commit af37511305c0 ("firmware: cs_dsp: Don't require client to provide a struct cs_dsp_client_ops") the client doesn't have to provide a struct cs_dsp_client_ops. So remove the dummy cs_dsp_client_ops. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260226124115.1811187-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index eb66827eabf8..1ace4beef508 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -403,10 +403,6 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) snd_ctl_remove(cs35l56->codec->card, cs35l56->volume_ctl); } -static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - /* cs_dsp requires the client to provide this even if it is empty */ -}; - static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, const struct firmware **firmware, char **filename, const char *base_name, const char *system_name, @@ -1149,7 +1145,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) cs35l56->base.cal_index = cs35l56->index; cs35l56_init_cs_dsp(&cs35l56->base, &cs35l56->cs_dsp); - cs35l56->cs_dsp.client_ops = &cs35l56_hda_client_ops; if (cs35l56->base.reset_gpio) { dev_dbg(cs35l56->base.dev, "Hard reset\n"); From a0b4c7a49137ed21279f354eb59f49ddae8dffc2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Feb 2026 13:32:33 +0000 Subject: [PATCH 434/828] netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence Fix netfslib such that when it's making an unbuffered or DIO write, to make sure that it sends each subrequest strictly sequentially, waiting till the previous one is 'committed' before sending the next so that we don't have pieces landing out of order and potentially leaving a hole if an error occurs (ENOSPC for example). This is done by copying in just those bits of issuing, collecting and retrying subrequests that are necessary to do one subrequest at a time. Retrying, in particular, is simpler because if the current subrequest needs retrying, the source iterator can just be copied again and the subrequest prepped and issued again without needing to be concerned about whether it needs merging with the previous or next in the sequence. Note that the issuing loop waits for a subrequest to complete right after issuing it, but this wait could be moved elsewhere allowing preparatory steps to be performed whilst the subrequest is in progress. In particular, once content encryption is available in netfslib, that could be done whilst waiting, as could cleanup of buffers that have been completed. Fixes: 153a9961b551 ("netfs: Implement unbuffered/DIO write support") Signed-off-by: David Howells Link: https://patch.msgid.link/58526.1772112753@warthog.procyon.org.uk Tested-by: Steve French Reviewed-by: Paulo Alcantara (Red Hat) cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/direct_write.c | 228 ++++++++++++++++++++++++++++++++--- fs/netfs/internal.h | 4 +- fs/netfs/write_collect.c | 21 ---- fs/netfs/write_issue.c | 41 +------ include/trace/events/netfs.h | 4 +- 5 files changed, 221 insertions(+), 77 deletions(-) diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index a9d1c3b2c084..dd1451bf7543 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -9,6 +9,202 @@ #include #include "internal.h" +/* + * Perform the cleanup rituals after an unbuffered write is complete. + */ +static void netfs_unbuffered_write_done(struct netfs_io_request *wreq) +{ + struct netfs_inode *ictx = netfs_inode(wreq->inode); + + _enter("R=%x", wreq->debug_id); + + /* Okay, declare that all I/O is complete. */ + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); + + if (!wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); + + if (wreq->origin == NETFS_DIO_WRITE && + wreq->mapping->nrpages) { + /* mmap may have got underfoot and we may now have folios + * locally covering the region we just wrote. Attempt to + * discard the folios, but leave in place any modified locally. + * ->write_iter() is prevented from interfering by the DIO + * counter. + */ + pgoff_t first = wreq->start >> PAGE_SHIFT; + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; + + invalidate_inode_pages2_range(wreq->mapping, first, last); + } + + if (wreq->origin == NETFS_DIO_WRITE) + inode_dio_end(wreq->inode); + + _debug("finished"); + netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ + + if (wreq->iocb) { + size_t written = umin(wreq->transferred, wreq->len); + + wreq->iocb->ki_pos += written; + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); + wreq->iocb->ki_complete(wreq->iocb, wreq->error ?: written); + } + wreq->iocb = VFS_PTR_POISON; + } + + netfs_clear_subrequests(wreq); +} + +/* + * Collect the subrequest results of unbuffered write subrequests. + */ +static void netfs_unbuffered_write_collect(struct netfs_io_request *wreq, + struct netfs_io_stream *stream, + struct netfs_io_subrequest *subreq) +{ + trace_netfs_collect_sreq(wreq, subreq); + + spin_lock(&wreq->lock); + list_del_init(&subreq->rreq_link); + spin_unlock(&wreq->lock); + + wreq->transferred += subreq->transferred; + iov_iter_advance(&wreq->buffer.iter, subreq->transferred); + + stream->collected_to = subreq->start + subreq->transferred; + wreq->collected_to = stream->collected_to; + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); + + trace_netfs_collect_stream(wreq, stream); + trace_netfs_collect_state(wreq, wreq->collected_to, 0); +} + +/* + * Write data to the server without going through the pagecache and without + * writing it to the local cache. We dispatch the subrequests serially and + * wait for each to complete before dispatching the next, lest we leave a gap + * in the data written due to a failure such as ENOSPC. We could, however + * attempt to do preparation such as content encryption for the next subreq + * whilst the current is in progress. + */ +static int netfs_unbuffered_write(struct netfs_io_request *wreq) +{ + struct netfs_io_subrequest *subreq = NULL; + struct netfs_io_stream *stream = &wreq->io_streams[0]; + int ret; + + _enter("%llx", wreq->len); + + if (wreq->origin == NETFS_DIO_WRITE) + inode_dio_begin(wreq->inode); + + stream->collected_to = wreq->start; + + for (;;) { + bool retry = false; + + if (!subreq) { + netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred); + subreq = stream->construct; + stream->construct = NULL; + stream->front = NULL; + } + + /* Check if (re-)preparation failed. */ + if (unlikely(test_bit(NETFS_SREQ_FAILED, &subreq->flags))) { + netfs_write_subrequest_terminated(subreq, subreq->error); + wreq->error = subreq->error; + break; + } + + iov_iter_truncate(&subreq->io_iter, wreq->len - wreq->transferred); + if (!iov_iter_count(&subreq->io_iter)) + break; + + subreq->len = netfs_limit_iter(&subreq->io_iter, 0, + stream->sreq_max_len, + stream->sreq_max_segs); + iov_iter_truncate(&subreq->io_iter, subreq->len); + stream->submit_extendable_to = subreq->len; + + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); + stream->issue_write(subreq); + + /* Async, need to wait. */ + netfs_wait_for_in_progress_stream(wreq, stream); + + if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { + retry = true; + } else if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { + ret = subreq->error; + wreq->error = ret; + netfs_see_subrequest(subreq, netfs_sreq_trace_see_failed); + subreq = NULL; + break; + } + ret = 0; + + if (!retry) { + netfs_unbuffered_write_collect(wreq, stream, subreq); + subreq = NULL; + if (wreq->transferred >= wreq->len) + break; + if (!wreq->iocb && signal_pending(current)) { + ret = wreq->transferred ? -EINTR : -ERESTARTSYS; + trace_netfs_rreq(wreq, netfs_rreq_trace_intr); + break; + } + continue; + } + + /* We need to retry the last subrequest, so first reset the + * iterator, taking into account what, if anything, we managed + * to transfer. + */ + subreq->error = -EAGAIN; + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); + if (subreq->transferred > 0) + iov_iter_advance(&wreq->buffer.iter, subreq->transferred); + + if (stream->source == NETFS_UPLOAD_TO_SERVER && + wreq->netfs_ops->retry_request) + wreq->netfs_ops->retry_request(wreq, stream); + + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); + __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); + __clear_bit(NETFS_SREQ_FAILED, &subreq->flags); + subreq->io_iter = wreq->buffer.iter; + subreq->start = wreq->start + wreq->transferred; + subreq->len = wreq->len - wreq->transferred; + subreq->transferred = 0; + subreq->retry_count += 1; + stream->sreq_max_len = UINT_MAX; + stream->sreq_max_segs = INT_MAX; + + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + stream->prepare_write(subreq); + + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_stat(&netfs_n_wh_retry_write_subreq); + } + + netfs_unbuffered_write_done(wreq); + _leave(" = %d", ret); + return ret; +} + +static void netfs_unbuffered_write_async(struct work_struct *work) +{ + struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); + + netfs_unbuffered_write(wreq); + netfs_put_request(wreq, netfs_rreq_trace_put_complete); +} + /* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. @@ -70,35 +266,35 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * */ wreq->buffer.iter = *iter; } + + wreq->len = iov_iter_count(&wreq->buffer.iter); } __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); - if (async) - __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); /* Copy the data into the bounce buffer and encrypt it. */ // TODO /* Dispatch the write. */ __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); - if (async) + + if (async) { + INIT_WORK(&wreq->work, netfs_unbuffered_write_async); wreq->iocb = iocb; - wreq->len = iov_iter_count(&wreq->buffer.iter); - ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); - if (ret < 0) { - _debug("begin = %zd", ret); - goto out; - } - - if (!async) { - ret = netfs_wait_for_write(wreq); - if (ret > 0) - iocb->ki_pos += ret; - } else { + queue_work(system_dfl_wq, &wreq->work); ret = -EIOCBQUEUED; + } else { + ret = netfs_unbuffered_write(wreq); + if (ret < 0) { + _debug("begin = %zd", ret); + } else { + iocb->ki_pos += wreq->transferred; + ret = wreq->transferred ?: wreq->error; + } + + netfs_put_request(wreq, netfs_rreq_trace_put_complete); } -out: netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 4319611f5354..d436e20d3418 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -198,6 +198,9 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, struct file *file, loff_t start, enum netfs_io_origin origin); +void netfs_prepare_write(struct netfs_io_request *wreq, + struct netfs_io_stream *stream, + loff_t start); void netfs_reissue_write(struct netfs_io_stream *stream, struct netfs_io_subrequest *subreq, struct iov_iter *source); @@ -212,7 +215,6 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio **writethrough_cache); ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, struct folio *writethrough_cache); -int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); /* * write_retry.c diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 61eab34ea67e..83eb3dc1adf8 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -399,27 +399,6 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if ((wreq->origin == NETFS_UNBUFFERED_WRITE || - wreq->origin == NETFS_DIO_WRITE) && - !wreq->error) - netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); - - if (wreq->origin == NETFS_DIO_WRITE && - wreq->mapping->nrpages) { - /* mmap may have got underfoot and we may now have folios - * locally covering the region we just wrote. Attempt to - * discard the folios, but leave in place any modified locally. - * ->write_iter() is prevented from interfering by the DIO - * counter. - */ - pgoff_t first = wreq->start >> PAGE_SHIFT; - pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; - invalidate_inode_pages2_range(wreq->mapping, first, last); - } - - if (wreq->origin == NETFS_DIO_WRITE) - inode_dio_end(wreq->inode); - _debug("finished"); netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 34894da5a23e..437268f65640 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -154,9 +154,9 @@ EXPORT_SYMBOL(netfs_prepare_write_failed); * Prepare a write subrequest. We need to allocate a new subrequest * if we don't have one. */ -static void netfs_prepare_write(struct netfs_io_request *wreq, - struct netfs_io_stream *stream, - loff_t start) +void netfs_prepare_write(struct netfs_io_request *wreq, + struct netfs_io_stream *stream, + loff_t start) { struct netfs_io_subrequest *subreq; struct iov_iter *wreq_iter = &wreq->buffer.iter; @@ -698,41 +698,6 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c return ret; } -/* - * Write data to the server without going through the pagecache and without - * writing it to the local cache. - */ -int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) -{ - struct netfs_io_stream *upload = &wreq->io_streams[0]; - ssize_t part; - loff_t start = wreq->start; - int error = 0; - - _enter("%zx", len); - - if (wreq->origin == NETFS_DIO_WRITE) - inode_dio_begin(wreq->inode); - - while (len) { - // TODO: Prepare content encryption - - _debug("unbuffered %zx", len); - part = netfs_advance_write(wreq, upload, start, len, false); - start += part; - len -= part; - rolling_buffer_advance(&wreq->buffer, part); - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) - netfs_wait_for_paused_write(wreq); - if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) - break; - } - - netfs_end_issue_write(wreq); - _leave(" = %d", error); - return error; -} - /* * Write some of a pending folio data back to the server and/or the cache. */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 64a382fbc31a..2d366be46a1c 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -57,6 +57,7 @@ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_intr, "INTR ") \ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ @@ -169,7 +170,8 @@ EM(netfs_sreq_trace_put_oom, "PUT OOM ") \ EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ EM(netfs_sreq_trace_put_work, "PUT WORK ") \ - E_(netfs_sreq_trace_put_terminated, "PUT TERM ") + EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \ + E_(netfs_sreq_trace_see_failed, "SEE FAILED ") #define netfs_folio_traces \ EM(netfs_folio_is_uptodate, "mod-uptodate") \ From 2970525f789c080e7e82ecb09cd85a8bb1d284e4 Mon Sep 17 00:00:00 2001 From: Jingkai Tan Date: Thu, 22 Jan 2026 21:14:10 +0000 Subject: [PATCH 435/828] btrfs: handle discard errors in in btrfs_finish_extent_commit() Coverity (ID: 1226842) reported that the return value of btrfs_discard_extent() is assigned to ret but is immediately overwritten by unpin_extent_range() without being checked. Use the same error handling that is done later in the same function. Signed-off-by: Jingkai Tan Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 03cf9f242c70..b0d9baf5b412 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2933,9 +2933,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) while (!TRANS_ABORTED(trans) && cached_state) { struct extent_state *next_state; - if (btrfs_test_opt(fs_info, DISCARD_SYNC)) + if (btrfs_test_opt(fs_info, DISCARD_SYNC)) { ret = btrfs_discard_extent(fs_info, start, end + 1 - start, NULL, true); + if (ret) { + btrfs_warn(fs_info, + "discard failed for extent [%llu, %llu]: errno=%d %s", + start, end, ret, btrfs_decode_error(ret)); + } + } next_state = btrfs_next_extent_state(unpin, cached_state); btrfs_clear_extent_dirty(unpin, start, end, &cached_state); From a4fe134fc1d8eb7dcd07e0961c5711b443decd89 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 9 Feb 2026 14:01:45 +1030 Subject: [PATCH 436/828] btrfs: fix a double release on reserved extents in cow_one_range() [BUG] Commit c28214bde6da ("btrfs: refactor the main loop of cow_file_range()") refactored the handling of COWing one range. However it changed the error handling of the reserved extent. The old cleanup looks like this: out_drop_extent_cache: btrfs_drop_extent_map_range(inode, start, start + cur_alloc_size - 1, false); out_reserve: btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, true); [...] clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK; /* * For the range (2). If we reserved an extent for our delalloc range * (or a subrange) and failed to create the respective ordered extent, * then it means that when we reserved the extent we decremented the * extent's size from the data space_info's bytes_may_use counter and * incremented the space_info's bytes_reserved counter by the same * amount. We must make sure extent_clear_unlock_delalloc() does not try * to decrement again the data space_info's bytes_may_use counter, * therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV. */ if (cur_alloc_size) { extent_clear_unlock_delalloc(inode, start, start + cur_alloc_size - 1, locked_folio, &cached, clear_bits, page_ops); btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } Which only calls EXTENT_CLEAR_META_RESV. As the reserved extent is properly handled by btrfs_free_reserved_extent(). However the new cleanup is: extent_clear_unlock_delalloc(inode, file_offset, cur_end, locked_folio, cached, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); btrfs_qgroup_free_data(inode, NULL, file_offset, cur_len, NULL); btrfs_dec_block_group_reservations(fs_info, ins->objectid); btrfs_free_reserved_extent(fs_info, ins->objectid, ins->offset, true); The flag EXTENT_DO_ACCOUNTING implies both EXTENT_CLEAR_META_RESV and EXTENT_CLEAR_DATA_RESV, which will release the bytes_may_use, which later btrfs_free_reserved_extent() will do again, causing incorrect double release (and may underflow bytes_may_use). [FIX] Use EXTENT_CLEAR_META_RESV to replace EXTENT_DO_ACCOUNTING, and add back the comments on why we only use EXTENT_CLEAR_META_RESV. Fixes: c28214bde6da ("btrfs: refactor the main loop of cow_file_range()") Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/20260208184920.1102719-1-clm@meta.com/ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b6c763a17406..ade590707793 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1392,10 +1392,25 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio, return ret; free_reserved: + /* + * If we have reserved an extent for the current range and failed to + * create the respective extent map or ordered extent, it means that + * when we reserved the extent we decremented the extent's size from + * the data space_info's bytes_may_use counter and + * incremented the space_info's bytes_reserved counter by the same + * amount. + * + * We must make sure extent_clear_unlock_delalloc() does not try + * to decrement again the data space_info's bytes_may_use counter, which + * will be handled by btrfs_free_reserved_extent(). + * + * Therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV, but only + * EXTENT_CLEAR_META_RESV. + */ extent_clear_unlock_delalloc(inode, file_offset, cur_end, locked_folio, cached, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | - EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, + EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); btrfs_qgroup_free_data(inode, NULL, file_offset, cur_len, NULL); From 0649355303995d6539df1fa8feaf50497f94db9b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Feb 2026 15:27:14 +0000 Subject: [PATCH 437/828] btrfs: change warning messages to error level in open_ctree() Failure to read the fs root results in a mount error, but we log a warning message. Same goes for checking the UUID tree, an error results in a mount failure but we log a warning message. Change the level of the logged messages from warning to error. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 13e400046c87..de8ce9c588fe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3642,7 +3642,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { ret = PTR_ERR(fs_info->fs_root); - btrfs_warn(fs_info, "failed to read fs tree: %d", ret); + btrfs_err(fs_info, "failed to read fs tree: %d", ret); fs_info->fs_root = NULL; goto fail_qgroup; } @@ -3663,8 +3663,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device btrfs_info(fs_info, "checking UUID tree"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { - btrfs_warn(fs_info, - "failed to check the UUID tree: %d", ret); + btrfs_err(fs_info, "failed to check the UUID tree: %d", ret); close_ctree(fs_info); return ret; } From 64def7d7d62b61044fe619e3cc9cbe60454decd9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Feb 2026 15:37:41 +0000 Subject: [PATCH 438/828] btrfs: remove redundant warning message in btrfs_check_uuid_tree() If we fail to start the UUID rescan kthread, btrfs_check_uuid_tree() logs an error message and returns the error to the single caller, open_ctree(). This however is redundant since the caller already logs an error message, which is also more informative since it logs the error code. Some remove the warning message from btrfs_check_uuid_tree() as it doesn't add any value. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de8ce9c588fe..b855e2cec2ec 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2972,7 +2972,6 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { /* fs_info->update_uuid_tree_gen remains 0 in all error case */ - btrfs_warn(fs_info, "failed to start uuid_rescan task"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); } From 4db8d56c6f4cbea7293d4236efac4a507dbfa6b1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Feb 2026 15:47:57 +0000 Subject: [PATCH 439/828] btrfs: remove btrfs_handle_fs_error() after failure to recover log trees There is no need to call btrfs_handle_fs_error() (which we are trying to deprecate) if we fail to recover log trees: 1) Such a failure results in failing the mount immediately; 2) If the recovery started a transaction before failing, it has already aborted the transaction down in the call chain. So remove the btrfs_handle_fs_error() call, replace it with an error message and assert that the FS is in error state (so that no partial updates are committed due to a transaction that was not aborted). Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b855e2cec2ec..e6574456a306 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2023,9 +2023,9 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); btrfs_put_root(log_tree_root); - if (ret) { - btrfs_handle_fs_error(fs_info, ret, - "Failed to recover log tree"); + if (unlikely(ret)) { + ASSERT(BTRFS_FS_ERROR(fs_info) != 0); + btrfs_err(fs_info, "failed to recover log trees with error: %d", ret); return ret; } From 912db40655684a0a427c8b118dd0c36eb5a61fe4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Feb 2026 09:49:49 +0000 Subject: [PATCH 440/828] btrfs: convert log messages to error level in btrfs_replay_log() We are logging messages as warnings but they should really have an error level instead, as if the respective conditions are met the mount will fail. So convert them to error level and also log the error code returned by read_tree_block(). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e6574456a306..028ba86bcf73 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1994,7 +1994,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, int level = btrfs_super_log_root_level(disk_super); if (unlikely(fs_devices->rw_devices == 0)) { - btrfs_warn(fs_info, "log replay required on RO media"); + btrfs_err(fs_info, "log replay required on RO media"); return -EIO; } @@ -2008,9 +2008,9 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, check.owner_root = BTRFS_TREE_LOG_OBJECTID; log_tree_root->node = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(log_tree_root->node)) { - btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); log_tree_root->node = NULL; + btrfs_err(fs_info, "failed to read log tree with error: %d", ret); btrfs_put_root(log_tree_root); return ret; } From 8ac7fad32b93044f4350ab35f11fee1a61286723 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Feb 2026 14:23:29 +0000 Subject: [PATCH 441/828] btrfs: remove pointless WARN_ON() in cache_save_setup() This WARN_ON(ret) is never executed since the previous if statement makes us jump into the 'out_put' label when ret is not zero. The existing transaction abort inside the if statement also gives us a stack trace, so we don't need to move the WARN_ON(ret) into the if statement either. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5f76683b3f21..77285ade3a0e 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3341,7 +3341,6 @@ again: btrfs_abort_transaction(trans, ret); goto out_put; } - WARN_ON(ret); /* We've already setup this transaction, go ahead and exit */ if (block_group->cache_generation == trans->transid && From 2ab22446425c2c72b44926bc964c263e06e7e137 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 11 Feb 2026 11:01:25 -0800 Subject: [PATCH 442/828] btrfs: fix referenced/exclusive check in squota_check_parent_usage() We compared rfer_cmpr against excl_cmpr_sum instead of rfer_cmpr_sum which is confusing. I expect that rfer_cmpr == excl_cmpr in squota, but it is much better to be consistent in case of any surprises or bugs. Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/cover.1764796022.git.boris@bur.io/T/#mccb231643ffd290b44a010d4419474d280be5537 Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 38adadb936dc..19edd25ff5d1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -370,7 +370,7 @@ static bool squota_check_parent_usage(struct btrfs_fs_info *fs_info, struct btrf nr_members++; } mismatch = (parent->excl != excl_sum || parent->rfer != rfer_sum || - parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != excl_cmpr_sum); + parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != rfer_cmpr_sum); WARN(mismatch, "parent squota qgroup %hu/%llu has mismatched usage from its %d members. " From 3f501412f2079ca14bf68a18d80a2b7a823f1f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Mon, 16 Feb 2026 22:12:15 +0100 Subject: [PATCH 443/828] btrfs: free pages on error in btrfs_uring_read_extent() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this function the 'pages' object is never freed in the hopes that it is picked up by btrfs_uring_read_finished() whenever that executes in the future. But that's just the happy path. Along the way previous allocations might have gone wrong, or we might not get -EIOCBQUEUED from btrfs_encoded_read_regular_fill_pages(). In all these cases, we go to a cleanup section that frees all memory allocated by this function without assuming any deferred execution, and this also needs to happen for the 'pages' allocation. Fixes: 34310c442e17 ("btrfs: add io_uring command for encoded reads (ENCODED_READ ioctl)") Signed-off-by: Miquel Sabaté Solà Reviewed-by: Filipe Manana Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f1b56be6f8f4..dadf9bf30f08 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4651,7 +4651,7 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, { struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp)); struct extent_io_tree *io_tree = &inode->io_tree; - struct page **pages; + struct page **pages = NULL; struct btrfs_uring_priv *priv = NULL; unsigned long nr_pages; int ret; @@ -4709,6 +4709,11 @@ out_fail: btrfs_unlock_extent(io_tree, start, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); kfree(priv); + for (int i = 0; i < nr_pages; i++) { + if (pages[i]) + __free_page(pages[i]); + } + kfree(pages); return ret; } From a7526533128fd3210e15cee1e14c7a0fe97087c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Mon, 16 Feb 2026 01:22:52 +0100 Subject: [PATCH 444/828] btrfs: don't commit the super block when unmounting a shutdown filesystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When unmounting a filesystem we will try, among many other things, to commit the super block. On a filesystem that was shutdown, though, this will always fail with -EROFS as writes are forbidden on this context; and an error will be reported. Don't commit the super block on this situation, which should be fine as the filesystem is frozen before shutdown and, therefore, it should be at a consistent state. Signed-off-by: Miquel Sabaté Solà Reviewed-by: Filipe Manana Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 028ba86bcf73..6a38489a4e18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4397,9 +4397,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) */ btrfs_flush_workqueue(fs_info->delayed_workers); - ret = btrfs_commit_super(fs_info); - if (ret) - btrfs_err(fs_info, "commit super ret %d", ret); + /* + * If the filesystem is shutdown, then an attempt to commit the + * super block (or any write) will just fail. Since we freeze + * the filesystem before shutting it down, the filesystem is in + * a consistent state and we don't need to commit super blocks. + */ + if (!btrfs_is_shutdown(fs_info)) { + ret = btrfs_commit_super(fs_info); + if (ret) + btrfs_err(fs_info, "commit super block returned %d", ret); + } } kthread_stop(fs_info->transaction_kthread); From 3cf0f35779d364cf2003c617bb7f3f3e41023372 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 18:25:42 +0000 Subject: [PATCH 445/828] btrfs: fix error message order of parameters in btrfs_delete_delayed_dir_index() Fix the error message in btrfs_delete_delayed_dir_index() if __btrfs_add_delayed_item() fails: the message says root, inode, index, error, but we're actually passing index, root, inode, error. Fixes: adc1ef55dc04 ("btrfs: add details to error messages at btrfs_delete_delayed_dir_index()") Signed-off-by: Mark Harmstone Reviewed-by: Filipe Manana Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 1739a0b29c49..2746841c167d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1657,7 +1657,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, if (unlikely(ret)) { btrfs_err(trans->fs_info, "failed to add delayed dir index item, root: %llu, inode: %llu, index: %llu, error: %d", - index, btrfs_root_id(node->root), node->inode_id, ret); + btrfs_root_id(node->root), node->inode_id, index, ret); btrfs_delayed_item_release_metadata(dir->root, item); btrfs_release_delayed_item(item); } From 511dc8912ae3e929c1a182f5e6b2326516fd42a0 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 10:21:44 +0000 Subject: [PATCH 446/828] btrfs: fix incorrect key offset in error message in check_dev_extent_item() Fix the error message in check_dev_extent_item(), when an overlapping stripe is encountered. For dev extents, objectid is the disk number and offset the physical address, so prev_key->objectid should actually be prev_key->offset. (I can't take any credit for this one - this was discovered by Chris and his friend Claude.) Reported-by: Chris Mason Fixes: 008e2512dc56 ("btrfs: tree-checker: add dev extent item checks") Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 452394b34d01..9774779f060b 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1921,7 +1921,7 @@ static int check_dev_extent_item(const struct extent_buffer *leaf, if (unlikely(prev_key->offset + prev_len > key->offset)) { generic_err(leaf, slot, "dev extent overlap, prev offset %llu len %llu current offset %llu", - prev_key->objectid, prev_len, key->offset); + prev_key->offset, prev_len, key->offset); return -EUCLEAN; } } From a10172780526c2002e062102ad4f2aabac495889 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 14:39:46 +0000 Subject: [PATCH 447/828] btrfs: fix objectid value in error message in check_extent_data_ref() Fix a copy-paste error in check_extent_data_ref(): we're printing root as in the message above, we should be printing objectid. Fixes: f333a3c7e832 ("btrfs: tree-checker: validate dref root and objectid") Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 9774779f060b..ac4c4573ee39 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1740,7 +1740,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf, objectid > BTRFS_LAST_FREE_OBJECTID)) { extent_err(leaf, slot, "invalid extent data backref objectid value %llu", - root); + objectid); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { From 44e2fda66427a0442d8d2c0e6443256fb458ab6b Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 17:46:13 +0000 Subject: [PATCH 448/828] btrfs: fix warning in scrub_verify_one_metadata() Commit b471965fdb2d ("btrfs: fix replace/scrub failure with metadata_uuid") fixed the comparison in scrub_verify_one_metadata() to use metadata_uuid rather than fsid, but left the warning as it was. Fix it so it matches what we're doing. Fixes: b471965fdb2d ("btrfs: fix replace/scrub failure with metadata_uuid") Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2a64e2d50ced..052d83feea9a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -744,7 +744,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr btrfs_warn_rl(fs_info, "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, - header->fsid, fs_info->fs_devices->fsid); + header->fsid, fs_info->fs_devices->metadata_uuid); return; } if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid, From 1c7e9111f4e6d6d42bc47759c9af1ef91f03ac2c Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 17:32:39 +0000 Subject: [PATCH 449/828] btrfs: print correct subvol num if active swapfile prevents deletion Fix the error message in btrfs_delete_subvolume() if we can't delete a subvolume because it has an active swapfile: we were printing the number of the parent rather than the target. Fixes: 60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted") Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ade590707793..d28d55beaacd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4779,7 +4779,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", - btrfs_root_id(root)); + btrfs_root_id(dest)); ret = -EPERM; goto out_up_write; } From 587bb33b10bda645a1028c1737ad3992b3d7cf61 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 17:46:41 +0000 Subject: [PATCH 450/828] btrfs: fix compat mask in error messages in btrfs_check_features() Commit d7f67ac9a928 ("btrfs: relax block-group-tree feature dependency checks") introduced a regression when it comes to handling unsupported incompat or compat_ro flags. Beforehand we only printed the flags that we didn't recognize, afterwards we printed them all, which is less useful. Fix the error handling so it behaves like it used to. Fixes: d7f67ac9a928 ("btrfs: relax block-group-tree feature dependency checks") Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6a38489a4e18..49987334dd15 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3187,7 +3187,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) { btrfs_err(fs_info, "cannot mount because of unknown incompat features (0x%llx)", - incompat); + incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP); return -EINVAL; } @@ -3219,7 +3219,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", - compat_ro); + compat_ro_unsupp); return -EINVAL; } @@ -3232,7 +3232,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_err(fs_info, "cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", - compat_ro); + compat_ro_unsupp); return -EINVAL; } From f15fb3d41543244d1179f423da4a4832a55bc050 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Fri, 20 Feb 2026 12:53:17 +0000 Subject: [PATCH 451/828] btrfs: fix chunk map leak in btrfs_map_block() after btrfs_chunk_map_num_copies() Fix a chunk map leak in btrfs_map_block(): if we return early with -EINVAL, we're not freeing the chunk map that we've just looked up. Fixes: 0ae653fbec2b ("btrfs: reduce chunk_map lookups in btrfs_map_block()") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Filipe Manana Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 50f7aae70418..b8cbd3ecb94d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6921,8 +6921,10 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } num_copies = btrfs_chunk_map_num_copies(map); - if (io_geom.mirror_num > num_copies) - return -EINVAL; + if (io_geom.mirror_num > num_copies) { + ret = -EINVAL; + goto out; + } map_offset = logical - map->start; io_geom.raid56_full_stripe_start = (u64)-1; From 54b9395b186a60d1655186c561a505c590654395 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Fri, 20 Feb 2026 12:52:56 +0000 Subject: [PATCH 452/828] btrfs: fix chunk map leak in btrfs_map_block() after btrfs_translate_remap() If the call to btrfs_translate_remap() in btrfs_map_block() returns an error code, we were leaking the chunk map. Fix it by jumping to out rather than returning directly. Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/20260125125830.2352988-1-clm@meta.com/ Fixes: 18ba64992871 ("btrfs: redirect I/O for remapped block groups") Reviewed-by: Filipe Manana Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b8cbd3ecb94d..3c37c5d2267b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6907,7 +6907,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ret = btrfs_translate_remap(fs_info, &new_logical, length); if (ret) - return ret; + goto out; if (new_logical != logical) { btrfs_free_chunk_map(map); From 7885ca40c305c64ffa444e1fc55edd6acb7a6e5b Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Thu, 19 Feb 2026 14:16:02 +0000 Subject: [PATCH 453/828] btrfs: fix transaction handle leaks in btrfs_last_identity_remap_gone() btrfs_abort_transaction(), unlike btrfs_commit_transaction(), doesn't also free the transaction handle. Fix the instances in btrfs_last_identity_remap_gone() where we're also leaking the transaction on abort. Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/20260125125129.2245240-1-clm@meta.com/ Fixes: 979e1dc3d69e ("btrfs: handle deletions from remapped block group") Reviewed-by: Filipe Manana Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fcd0a2ba3554..2119dddd6f8e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4723,6 +4723,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, ret = btrfs_remove_dev_extents(trans, chunk_map); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); return ret; } @@ -4732,6 +4733,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, if (unlikely(ret)) { mutex_unlock(&trans->fs_info->chunk_mutex); btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); return ret; } } @@ -4750,6 +4752,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, ret = remove_chunk_stripes(trans, chunk_map, path); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); return ret; } From f8db8009ea65297dba7786668d4561f6dbd99678 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Thu, 19 Feb 2026 14:30:59 +0000 Subject: [PATCH 454/828] btrfs: check block group lookup in remove_range_from_remap_tree() Add a check in remove_range_from_remap_tree() after we call btrfs_lookup_block_group(), to check if it is NULL. This shouldn't happen, but if it does we at least get an error rather than a segfault. Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/20260125125129.2245240-1-clm@meta.com/ Fixes: 979e1dc3d69e ("btrfs: handle deletions from remapped block group") Reviewed-by: Filipe Manana Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2119dddd6f8e..cdb53c0b26ec 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -5985,6 +5985,9 @@ static int remove_range_from_remap_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group *dest_bg; dest_bg = btrfs_lookup_block_group(fs_info, new_addr); + if (unlikely(!dest_bg)) + return -EUCLEAN; + adjust_block_group_remap_bytes(trans, dest_bg, -overlap_length); btrfs_put_block_group(dest_bg); ret = btrfs_add_to_free_space_tree(trans, From 22f8bcec5aeb05104b3eaa950cb5a345e95f0aa8 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Tue, 3 Feb 2026 08:55:07 +0800 Subject: [PATCH 455/828] HID: intel-ish-hid: ipc: Add Nova Lake-H/S PCI device IDs Add device IDs of Nova Lake-H and Nova Lake-S into ishtp support list. Signed-off-by: Zhang Lixu Reviewed-by: Andy Shevchenko Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/hw-ish.h | 2 ++ drivers/hid/intel-ish-hid/ipc/pci-ish.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index fa5d68c36313..27389971b96c 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -39,6 +39,8 @@ #define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 #define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 #define PCI_DEVICE_ID_INTEL_ISH_WCL 0x4D45 +#define PCI_DEVICE_ID_INTEL_ISH_NVL_H 0xD354 +#define PCI_DEVICE_ID_INTEL_ISH_NVL_S 0x6E78 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 1612e8cb23f0..ed3405c05e73 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -28,11 +28,15 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_LNL_M, ISHTP_DRIVER_DATA_PTL, ISHTP_DRIVER_DATA_WCL, + ISHTP_DRIVER_DATA_NVL_H, + ISHTP_DRIVER_DATA_NVL_S, }; #define ISH_FW_GEN_LNL_M "lnlm" #define ISH_FW_GEN_PTL "ptl" #define ISH_FW_GEN_WCL "wcl" +#define ISH_FW_GEN_NVL_H "nvlh" +#define ISH_FW_GEN_NVL_S "nvls" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -47,6 +51,12 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_WCL] = { .fw_generation = ISH_FW_GEN_WCL, }, + [ISHTP_DRIVER_DATA_NVL_H] = { + .fw_generation = ISH_FW_GEN_NVL_H, + }, + [ISHTP_DRIVER_DATA_NVL_S] = { + .fw_generation = ISH_FW_GEN_NVL_S, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -76,6 +86,8 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, ISH_PTL_H, ISHTP_DRIVER_DATA_PTL)}, {PCI_DEVICE_DATA(INTEL, ISH_PTL_P, ISHTP_DRIVER_DATA_PTL)}, {PCI_DEVICE_DATA(INTEL, ISH_WCL, ISHTP_DRIVER_DATA_WCL)}, + {PCI_DEVICE_DATA(INTEL, ISH_NVL_H, ISHTP_DRIVER_DATA_NVL_H)}, + {PCI_DEVICE_DATA(INTEL, ISH_NVL_S, ISHTP_DRIVER_DATA_NVL_S)}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); From 7c698de0dc5daa1e1a5fd1f0c6aa1b6bb2f5d867 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Feb 2026 10:00:02 +0100 Subject: [PATCH 456/828] HID: apple: Add EPOMAKER TH87 to the non-apple keyboards list EPOMAKER TH87 has the very same ID as Apple Aluminum keyboard (05ac:024f) although it doesn't work as expected in compatible way. Put three entries to the non-apple keyboards list to exclude this device: one for BT ("TH87"), one for USB ("HFD Epomaker TH87") and one for dongle ("2.4G Wireless Receiver"). Link: https://bugzilla.suse.com/show_bug.cgi?id=1258455 Signed-off-by: Takashi Iwai Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 894adc23367b..9dcb252c5d6c 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -365,6 +365,9 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = { { "A3R" }, { "hfd.cn" }, { "WKB603" }, + { "TH87" }, /* EPOMAKER TH87 BT mode */ + { "HFD Epomaker TH87" }, /* EPOMAKER TH87 USB mode */ + { "2.4G Wireless Receiver" }, /* EPOMAKER TH87 dongle */ }; static bool apple_is_non_apple_keyboard(struct hid_device *hdev) From 8f153eb745463b0715f1aad41e765cd83e9da8c0 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 29 Jan 2026 10:24:05 +0800 Subject: [PATCH 457/828] wifi: ath12k: use correct pdev id when requesting firmware stats To get firmware statistics, currently ar->pdev->pdev_id is passed as an argument to ath12k_mac_get_fw_stats() in ath12k_mac_op_sta_statistics(). For single pdev device like WCN7850, its value is 0 which represents the SoC pdev id. As a result, WCN7850 firmware sends the same reply to host twice, which further results in memory leak: unreferenced object 0xffff88812e286000 (size 192): comm "softirq", pid 0, jiffies 4294981997 hex dump (first 32 bytes): 10 a5 40 11 81 88 ff ff 10 a5 40 11 81 88 ff ff ..@.......@..... 00 00 00 00 00 00 00 00 80 ff ff ff 33 05 00 00 ............3... backtrace (crc cecc8c82): __kmalloc_cache_noprof ath12k_wmi_tlv_fw_stats_parse ath12k_wmi_tlv_iter ath12k_wmi_op_rx ath12k_htc_rx_completion_handler ath12k_ce_per_engine_service ath12k_pci_ce_workqueue process_one_work bh_worker tasklet_action handle_softirqs Detailed explanation is: 1. ath12k_mac_get_fw_stats() called in ath12k_mac_op_sta_statistics() to get vdev statistics, making the caller thread wait. 2. firmware sends the first reply, ath12k_wmi_tlv_fw_stats_data_parse() allocates buffers to cache necessary information. Following that, in ath12k_wmi_fw_stats_process() if events of all started vdev haved been received, is_end flag is set hence the waiting thread gets waken up by the ar->fw_stats_done/->fw_stats_complete signals. 3. ath12k_mac_get_fw_stats() wakes up and returns successfully. ath12k_mac_op_sta_statistics() saves required parameters and calls ath12k_fw_stats_reset() to free buffers allocated earlier. 4. firmware sends the second reply. As usual, buffers are allocated and attached to the ar->fw_stats.vdevs list. Note this time there is no thread waiting, therefore no chance to free those buffers. 5. ath12k module gets unloaded. If there has been no more firmware statistics request made since step 4, or if the request fails (see the example in the following patch), there is no chance to call ath12k_fw_stats_reset(). Consequently those buffers leak. Actually for single pdev device, using SoC pdev id in ath12k_mac_op_sta_statistics() is wrong, because the purpose is to get statistics of a specific station, which is mapped to a specific pdev. That said, the id of actual individual pdev should be fetched and used instead. The helper ath12k_mac_get_target_pdev_id() serves for this purpose, hence use it to fix this issue. Note it also works for other devices as well due to the single_pdev_only check inside. The same applies to ath12k_mac_op_get_txpower() and ath12k_mac_op_link_sta_statistics() as well. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 79e7b04b5388 ("wifi: ath12k: report station mode signal strength") Fixes: e92c658b056b ("wifi: ath12k: add get_txpower mac ops") Fixes: ebebe66ec208 ("wifi: ath12k: fill link station statistics for MLO") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260129-ath12k-fw-stats-fixes-v1-1-55d66064f4d5@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 68431a0e128e..5287fa2f66e5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5430,7 +5430,7 @@ int ath12k_mac_op_get_txpower(struct ieee80211_hw *hw, ar->last_tx_power_update)) goto send_tx_power; - params.pdev_id = ar->pdev->pdev_id; + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); params.vdev_id = arvif->vdev_id; params.stats_id = WMI_REQUEST_PDEV_STAT; ret = ath12k_mac_get_fw_stats(ar, ¶ms); @@ -13452,7 +13452,7 @@ void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw, /* TODO: Use real NF instead of default one. */ signal = rate_info.rssi_comb; - params.pdev_id = ar->pdev->pdev_id; + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); params.vdev_id = 0; params.stats_id = WMI_REQUEST_VDEV_STAT; @@ -13580,7 +13580,7 @@ void ath12k_mac_op_link_sta_statistics(struct ieee80211_hw *hw, spin_unlock_bh(&ar->ab->dp->dp_lock); if (!signal && ahsta->ahvif->vdev_type == WMI_VDEV_TYPE_STA) { - params.pdev_id = ar->pdev->pdev_id; + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); params.vdev_id = 0; params.stats_id = WMI_REQUEST_VDEV_STAT; From 7259b1a0e54c2d3051ac8f1eb01de121b11118ea Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 29 Jan 2026 10:24:06 +0800 Subject: [PATCH 458/828] wifi: ath12k: fix station lookup failure when disconnecting from AP In ath12k_wmi_tlv_fw_stats_data_parse() and ath12k_wmi_tlv_rssi_chain_parse(), the driver uses ieee80211_find_sta_by_ifaddr() to look up the station associated with the incoming firmware statistics. This works under normal conditions but fails during AP disconnection, resulting in log messages like: wlan0: deauthenticating from xxxxxx by local choice (Reason: 3=DEAUTH_LEAVING) wlan0: moving STA xxxxxx to state 3 wlan0: moving STA xxxxxx to state 2 wlan0: moving STA xxxxxx to state 1 ath12k_pci 0000:02:00.0: not found station bssid xxxxxx for vdev stat ath12k_pci 0000:02:00.0: not found station of bssid xxxxxx for rssi chain ath12k_pci 0000:02:00.0: failed to pull fw stats: -71 ath12k_pci 0000:02:00.0: time out while waiting for get fw stats wlan0: Removed STA xxxxxx wlan0: Destroyed STA xxxxxx The failure happens because the station has already been removed from ieee80211_local::sta_hash by the time firmware statistics are requested through drv_sta_statistics(). Switch the lookup to ath12k_link_sta_find_by_addr(), which searches the driver's link station hash table that still has the station recorded at that time. This also implicitly fixes another issue: the current code always uses deflink regardless of which link the statistics belong to, which is incorrect in MLO scenarios. The new helper returns the correct link station. Additionally, raise the log level on lookup failures. With the updated helper, such failures should no longer occur under normal conditions. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 79e7b04b5388 ("wifi: ath12k: report station mode signal strength") Fixes: 6af5bc381b36 ("wifi: ath12k: report station mode per-chain signal strength") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260129-ath12k-fw-stats-fixes-v1-2-55d66064f4d5@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 36 ++++++++++----------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 7617fc3a2479..404f031a3c87 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -8241,8 +8241,6 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, struct ath12k_fw_stats *stats = parse->stats; struct ath12k *ar; struct ath12k_link_vif *arvif; - struct ieee80211_sta *sta; - struct ath12k_sta *ahsta; struct ath12k_link_sta *arsta; int i, ret = 0; const void *data = ptr; @@ -8278,21 +8276,19 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, arvif = ath12k_mac_get_arvif(ar, le32_to_cpu(src->vdev_id)); if (arvif) { - sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar), - arvif->bssid, - NULL); - if (sta) { - ahsta = ath12k_sta_to_ahsta(sta); - arsta = &ahsta->deflink; + spin_lock_bh(&ab->base_lock); + arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid); + if (arsta) { arsta->rssi_beacon = le32_to_cpu(src->beacon_snr); ath12k_dbg(ab, ATH12K_DBG_WMI, "wmi stats vdev id %d snr %d\n", src->vdev_id, src->beacon_snr); } else { - ath12k_dbg(ab, ATH12K_DBG_WMI, - "not found station bssid %pM for vdev stat\n", - arvif->bssid); + ath12k_warn(ab, + "not found link sta with bssid %pM for vdev stat\n", + arvif->bssid); } + spin_unlock_bh(&ab->base_lock); } data += sizeof(*src); @@ -8363,8 +8359,6 @@ static int ath12k_wmi_tlv_rssi_chain_parse(struct ath12k_base *ab, struct ath12k_fw_stats *stats = parse->stats; struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; - struct ieee80211_sta *sta; - struct ath12k_sta *ahsta; struct ath12k *ar; int vdev_id; int j; @@ -8400,19 +8394,15 @@ static int ath12k_wmi_tlv_rssi_chain_parse(struct ath12k_base *ab, "stats bssid %pM vif %p\n", arvif->bssid, arvif->ahvif->vif); - sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar), - arvif->bssid, - NULL); - if (!sta) { - ath12k_dbg(ab, ATH12K_DBG_WMI, - "not found station of bssid %pM for rssi chain\n", - arvif->bssid); + guard(spinlock_bh)(&ab->base_lock); + arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid); + if (!arsta) { + ath12k_warn(ab, + "not found link sta with bssid %pM for rssi chain\n", + arvif->bssid); return -EPROTO; } - ahsta = ath12k_sta_to_ahsta(sta); - arsta = &ahsta->deflink; - BUILD_BUG_ON(ARRAY_SIZE(arsta->chain_signal) > ARRAY_SIZE(stats_rssi->rssi_avg_beacon)); From 3540cc453f5679d8c4d5ccc9834a1f5f8184af3a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Feb 2026 16:43:48 +0100 Subject: [PATCH 459/828] ALSA: usb-audio: Drop superfluous kernel-doc markers We don't process USB-audio driver code for kernel-doc, and the "/**" marker leads to warnings with W=1 builds. Drop the superfluous markers. Link: https://patch.msgid.link/20260226154414.1081568-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_s1810c.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/usb/mixer_s1810c.c b/sound/usb/mixer_s1810c.c index 473cb29efa7f..7eac7d1bce64 100644 --- a/sound/usb/mixer_s1810c.c +++ b/sound/usb/mixer_s1810c.c @@ -71,7 +71,7 @@ * * e I guess the same as with mixer * */ -/** struct s1810c_ctl_packet - basic vendor request +/* struct s1810c_ctl_packet - basic vendor request * @selector: device/mixer/output * @b: request-dependant field b * @tag: fixed value identifying type of request @@ -94,14 +94,14 @@ struct s1810c_ctl_packet { __le32 e; }; -/** selectors for CMD request +/* selectors for CMD request */ #define SC1810C_SEL_DEVICE 0 #define SC1810C_SEL_MIXER 0x64 #define SC1810C_SEL_OUTPUT 0x65 -/** control ids */ +/* control ids */ #define SC1810C_CTL_LINE_SW 0 #define SC1810C_CTL_MUTE_SW 1 #define SC1824C_CTL_MONO_SW 2 @@ -127,7 +127,7 @@ struct s1810c_ctl_packet { #define SC1810C_GET_STATE_TAG SC1810C_SET_STATE_TAG #define SC1810C_GET_STATE_LEN SC1810C_SET_STATE_LEN -/** Mixer levels normally range from 0 (off) to 0x0100 0000 (0 dB). +/* Mixer levels normally range from 0 (off) to 0x0100 0000 (0 dB). * raw_level = 2^24 * 10^(db_level / 20), thus * -3dB = 0xb53bf0 (technically, half-power -3.01...dB would be 0xb504f3) * -96dB = 0x109 @@ -145,7 +145,7 @@ struct s1810c_ctl_packet { #define MIXER_LEVEL_N3DB 0xb53bf0 #define MIXER_LEVEL_0DB 0x1000000 -/** +/* * This packet includes mixer volumes and * various other fields, it's an extended * version of ctl_packet, with a and b @@ -155,7 +155,7 @@ struct s1810c_state_packet { __le32 fields[63]; }; -/** indices into s1810c_state_packet.fields[] +/* indices into s1810c_state_packet.fields[] */ #define SC1810C_STATE_TAG_IDX 2 #define SC1810C_STATE_LEN_IDX 3 From 1d6452a0ce78cd3f4e48943b5ba21d273a658298 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Feb 2026 16:43:49 +0100 Subject: [PATCH 460/828] ALSA: usb: qcom: Correct parameter comment for uaudio_transfer_buffer_setup() At fixing the memory leak of xfer buffer, we forgot to update the corresponding comment, too. This resulted in a kernel-doc warning with W=1. Let's correct it. Fixes: 5c7ef5001292 ("ALSA: qc_audio_offload: avoid leaking xfer_buf allocation") Link: https://patch.msgid.link/20260226154414.1081568-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/qcom/qc_audio_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 01e6063c2207..510b68cced33 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -1007,7 +1007,7 @@ put_suspend: /** * uaudio_transfer_buffer_setup() - fetch and populate xfer buffer params * @subs: usb substream - * @xfer_buf: xfer buf to be allocated + * @xfer_buf_cpu: xfer buf to be allocated * @xfer_buf_len: size of allocation * @mem_info: QMI response info * From dc9786a06d53291a5af824e854dd0769b1a97dbe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Feb 2026 16:54:45 +0100 Subject: [PATCH 461/828] ALSA: us144mkii: Drop kernel-doc markers We don't process this driver code for kernel-doc, and the "/**" marker leads to warnings with W=1 builds. Drop the superfluous markers, and also fix the invalid mark up, too. Link: https://patch.msgid.link/20260226155456.1092186-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/usx2y/us144mkii.c | 14 +++++----- sound/usb/usx2y/us144mkii_capture.c | 12 ++++---- sound/usb/usx2y/us144mkii_controls.c | 42 ++++++++++++++-------------- sound/usb/usx2y/us144mkii_midi.c | 22 +++++++-------- sound/usb/usx2y/us144mkii_playback.c | 10 +++---- 5 files changed, 50 insertions(+), 50 deletions(-) diff --git a/sound/usb/usx2y/us144mkii.c b/sound/usb/usx2y/us144mkii.c index bc71968df8e2..0cf4fa74e210 100644 --- a/sound/usb/usx2y/us144mkii.c +++ b/sound/usb/usx2y/us144mkii.c @@ -10,8 +10,8 @@ MODULE_AUTHOR("Šerif Rami "); MODULE_DESCRIPTION("ALSA Driver for TASCAM US-144MKII"); MODULE_LICENSE("GPL"); -/** - * @brief Module parameters for ALSA card instantiation. +/* + * Module parameters for ALSA card instantiation. * * These parameters allow users to configure how the ALSA sound card * for the TASCAM US-144MKII is instantiated. @@ -269,7 +269,7 @@ void tascam_stop_work_handler(struct work_struct *work) atomic_set(&tascam->active_urbs, 0); } -/** +/* * tascam_card_private_free() - Frees private data associated with the sound * card. * @card: Pointer to the ALSA sound card instance. @@ -291,7 +291,7 @@ static void tascam_card_private_free(struct snd_card *card) } } -/** +/* * tascam_suspend() - Handles device suspension. * @intf: The USB interface being suspended. * @message: Power management message. @@ -332,7 +332,7 @@ static int tascam_suspend(struct usb_interface *intf, pm_message_t message) return 0; } -/** +/* * tascam_resume() - Handles device resumption from suspend. * @intf: The USB interface being resumed. * @@ -390,7 +390,7 @@ static void tascam_error_timer(struct timer_list *t) schedule_work(&tascam->midi_out_work); } -/** +/* * tascam_probe() - Probes for the TASCAM US-144MKII device. * @intf: The USB interface being probed. * @usb_id: The USB device ID. @@ -565,7 +565,7 @@ free_card: return err; } -/** +/* * tascam_disconnect() - Disconnects the TASCAM US-144MKII device. * @intf: The USB interface being disconnected. * diff --git a/sound/usb/usx2y/us144mkii_capture.c b/sound/usb/usx2y/us144mkii_capture.c index 00188ff6cd51..af120bf62173 100644 --- a/sound/usb/usx2y/us144mkii_capture.c +++ b/sound/usb/usx2y/us144mkii_capture.c @@ -3,7 +3,7 @@ #include "us144mkii.h" -/** +/* * tascam_capture_open() - Opens the PCM capture substream. * @substream: The ALSA PCM substream to open. * @@ -23,7 +23,7 @@ static int tascam_capture_open(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_capture_close() - Closes the PCM capture substream. * @substream: The ALSA PCM substream to close. * @@ -41,7 +41,7 @@ static int tascam_capture_close(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_capture_prepare() - Prepares the PCM capture substream for use. * @substream: The ALSA PCM substream to prepare. * @@ -62,7 +62,7 @@ static int tascam_capture_prepare(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_capture_pointer() - Returns the current capture pointer position. * @substream: The ALSA PCM substream. * @@ -91,7 +91,7 @@ tascam_capture_pointer(struct snd_pcm_substream *substream) return do_div(pos, runtime->buffer_size); } -/** +/* * tascam_capture_ops - ALSA PCM operations for capture. * * This structure defines the callback functions for capture stream operations, @@ -109,7 +109,7 @@ const struct snd_pcm_ops tascam_capture_ops = { .pointer = tascam_capture_pointer, }; -/** +/* * decode_tascam_capture_block() - Decodes a raw 512-byte block from the device. * @src_block: Pointer to the 512-byte raw source block. * @dst_block: Pointer to the destination buffer for decoded audio frames. diff --git a/sound/usb/usx2y/us144mkii_controls.c b/sound/usb/usx2y/us144mkii_controls.c index 62055fb8e7ba..81ded11e3709 100644 --- a/sound/usb/usx2y/us144mkii_controls.c +++ b/sound/usb/usx2y/us144mkii_controls.c @@ -3,8 +3,8 @@ #include "us144mkii.h" -/** - * @brief Text descriptions for playback output source options. +/* + * Text descriptions for playback output source options. * * Used by ALSA kcontrol elements to provide user-friendly names for * the playback routing options (e.g., "Playback 1-2", "Playback 3-4"). @@ -12,15 +12,15 @@ static const char *const playback_source_texts[] = { "Playback 1-2", "Playback 3-4" }; -/** - * @brief Text descriptions for capture input source options. +/* + * Text descriptions for capture input source options. * * Used by ALSA kcontrol elements to provide user-friendly names for * the capture routing options (e.g., "Analog In", "Digital In"). */ static const char *const capture_source_texts[] = { "Analog In", "Digital In" }; -/** +/* * tascam_playback_source_info() - ALSA control info callback for playback * source. * @kcontrol: The ALSA kcontrol instance. @@ -38,7 +38,7 @@ static int tascam_playback_source_info(struct snd_kcontrol *kcontrol, return snd_ctl_enum_info(uinfo, 1, 2, playback_source_texts); } -/** +/* * tascam_line_out_get() - ALSA control get callback for Line Outputs Source. * @kcontrol: The ALSA kcontrol instance. * @ucontrol: The ALSA control element value structure to fill. @@ -60,7 +60,7 @@ static int tascam_line_out_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_line_out_put() - ALSA control put callback for Line Outputs Source. * @kcontrol: The ALSA kcontrol instance. * @ucontrol: The ALSA control element value structure containing the new value. @@ -89,7 +89,7 @@ static int tascam_line_out_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_line_out_control - ALSA kcontrol definition for Line Outputs Source. * * This defines a new ALSA mixer control named "Line OUTPUTS Source" that allows @@ -106,7 +106,7 @@ static const struct snd_kcontrol_new tascam_line_out_control = { .put = tascam_line_out_put, }; -/** +/* * tascam_digital_out_get() - ALSA control get callback for Digital Outputs * Source. * @kcontrol: The ALSA kcontrol instance. @@ -129,7 +129,7 @@ static int tascam_digital_out_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_digital_out_put() - ALSA control put callback for Digital Outputs * Source. * @kcontrol: The ALSA kcontrol instance. @@ -159,7 +159,7 @@ static int tascam_digital_out_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_digital_out_control - ALSA kcontrol definition for Digital Outputs * Source. * @@ -177,7 +177,7 @@ static const struct snd_kcontrol_new tascam_digital_out_control = { .put = tascam_digital_out_put, }; -/** +/* * tascam_capture_source_info() - ALSA control info callback for capture source. * @kcontrol: The ALSA kcontrol instance. * @uinfo: The ALSA control element info structure to fill. @@ -194,7 +194,7 @@ static int tascam_capture_source_info(struct snd_kcontrol *kcontrol, return snd_ctl_enum_info(uinfo, 1, 2, capture_source_texts); } -/** +/* * tascam_capture_12_get() - ALSA control get callback for Capture channels 1 * and 2 Source. * @kcontrol: The ALSA kcontrol instance. @@ -217,7 +217,7 @@ static int tascam_capture_12_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_capture_12_put() - ALSA control put callback for Capture channels 1 * and 2 Source. * @kcontrol: The ALSA kcontrol instance. @@ -247,7 +247,7 @@ static int tascam_capture_12_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_capture_12_control - ALSA kcontrol definition for Capture channels 1 * and 2 Source. * @@ -265,7 +265,7 @@ static const struct snd_kcontrol_new tascam_capture_12_control = { .put = tascam_capture_12_put, }; -/** +/* * tascam_capture_34_get() - ALSA control get callback for Capture channels 3 * and 4 Source. * @kcontrol: The ALSA kcontrol instance. @@ -288,7 +288,7 @@ static int tascam_capture_34_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_capture_34_put() - ALSA control put callback for Capture channels 3 * and 4 Source. * @kcontrol: The ALSA kcontrol instance. @@ -318,7 +318,7 @@ static int tascam_capture_34_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_capture_34_control - ALSA kcontrol definition for Capture channels 3 * and 4 Source. * @@ -336,7 +336,7 @@ static const struct snd_kcontrol_new tascam_capture_34_control = { .put = tascam_capture_34_put, }; -/** +/* * tascam_samplerate_info() - ALSA control info callback for Sample Rate. * @kcontrol: The ALSA kcontrol instance. * @uinfo: The ALSA control element info structure to fill. @@ -356,7 +356,7 @@ static int tascam_samplerate_info(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_samplerate_get() - ALSA control get callback for Sample Rate. * @kcontrol: The ALSA kcontrol instance. * @ucontrol: The ALSA control element value structure to fill. @@ -400,7 +400,7 @@ static int tascam_samplerate_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_samplerate_control - ALSA kcontrol definition for Sample Rate. * * This defines a new ALSA mixer control named "Sample Rate" that displays diff --git a/sound/usb/usx2y/us144mkii_midi.c b/sound/usb/usx2y/us144mkii_midi.c index ed2afec2a89a..4871797b1670 100644 --- a/sound/usb/usx2y/us144mkii_midi.c +++ b/sound/usb/usx2y/us144mkii_midi.c @@ -3,7 +3,7 @@ #include "us144mkii.h" -/** +/* * tascam_midi_in_work_handler() - Deferred work for processing MIDI input. * @work: The work_struct instance. * @@ -75,7 +75,7 @@ out: usb_put_urb(urb); } -/** +/* * tascam_midi_in_open() - Opens the MIDI input substream. * @substream: The ALSA rawmidi substream to open. * @@ -92,7 +92,7 @@ static int tascam_midi_in_open(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_in_close() - Closes the MIDI input substream. * @substream: The ALSA rawmidi substream to close. * @@ -103,7 +103,7 @@ static int tascam_midi_in_close(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_in_trigger() - Triggers MIDI input stream activity. * @substream: The ALSA rawmidi substream. * @up: Boolean indicating whether to start (1) or stop (0) the stream. @@ -150,7 +150,7 @@ static void tascam_midi_in_trigger(struct snd_rawmidi_substream *substream, } } -/** +/* * tascam_midi_in_ops - ALSA rawmidi operations for MIDI input. * * This structure defines the callback functions for MIDI input stream @@ -205,7 +205,7 @@ out: usb_put_urb(urb); } -/** +/* * tascam_midi_out_work_handler() - Deferred work for sending MIDI data * @work: The work_struct instance. * @@ -282,7 +282,7 @@ static void tascam_midi_out_work_handler(struct work_struct *work) } } -/** +/* * tascam_midi_out_open() - Opens the MIDI output substream. * @substream: The ALSA rawmidi substream to open. * @@ -301,7 +301,7 @@ static int tascam_midi_out_open(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_out_close() - Closes the MIDI output substream. * @substream: The ALSA rawmidi substream to close. * @@ -312,7 +312,7 @@ static int tascam_midi_out_close(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_out_drain() - Drains the MIDI output stream. * @substream: The ALSA rawmidi substream. * @@ -340,7 +340,7 @@ static void tascam_midi_out_drain(struct snd_rawmidi_substream *substream) usb_kill_anchored_urbs(&tascam->midi_out_anchor); } -/** +/* * tascam_midi_out_trigger() - Triggers MIDI output stream activity. * @substream: The ALSA rawmidi substream. * @up: Boolean indicating whether to start (1) or stop (0) the stream. @@ -361,7 +361,7 @@ static void tascam_midi_out_trigger(struct snd_rawmidi_substream *substream, } } -/** +/* * tascam_midi_out_ops - ALSA rawmidi operations for MIDI output. * * This structure defines the callback functions for MIDI output stream diff --git a/sound/usb/usx2y/us144mkii_playback.c b/sound/usb/usx2y/us144mkii_playback.c index 0cb9699ec211..7efaca0a6489 100644 --- a/sound/usb/usx2y/us144mkii_playback.c +++ b/sound/usb/usx2y/us144mkii_playback.c @@ -3,7 +3,7 @@ #include "us144mkii.h" -/** +/* * tascam_playback_open() - Opens the PCM playback substream. * @substream: The ALSA PCM substream to open. * @@ -23,7 +23,7 @@ static int tascam_playback_open(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_playback_close() - Closes the PCM playback substream. * @substream: The ALSA PCM substream to close. * @@ -41,7 +41,7 @@ static int tascam_playback_close(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_playback_prepare() - Prepares the PCM playback substream for use. * @substream: The ALSA PCM substream to prepare. * @@ -108,7 +108,7 @@ static int tascam_playback_prepare(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_playback_pointer() - Returns the current playback pointer position. * @substream: The ALSA PCM substream. * @@ -137,7 +137,7 @@ tascam_playback_pointer(struct snd_pcm_substream *substream) return do_div(pos, runtime->buffer_size); } -/** +/* * tascam_playback_ops - ALSA PCM operations for playback. * * This structure defines the callback functions for playback stream operations, From ec3070f01fa30f2c5547d645dbb76174304bf0e4 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 8 Jan 2026 17:09:54 +0100 Subject: [PATCH 462/828] HID: multitouch: Keep latency normal on deactivate for reactivation gesture Uniwill devices have a built in gesture in the touchpad to de- and reactivate it by double taping the upper left corner. This gesture stops working when latency is set to high, so this patch keeps the latency on normal. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach [jkosina@suse.com: change bit from 24 to 25] [jkosina@suse.com: update shortlog] Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 8052b35bfd7d..b8a748bbf0fd 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -77,6 +77,7 @@ MODULE_LICENSE("GPL"); #define MT_QUIRK_ORIENTATION_INVERT BIT(22) #define MT_QUIRK_APPLE_TOUCHBAR BIT(23) #define MT_QUIRK_YOGABOOK9I BIT(24) +#define MT_QUIRK_KEEP_LATENCY_ON_CLOSE BIT(25) #define MT_INPUTMODE_TOUCHSCREEN 0x02 #define MT_INPUTMODE_TOUCHPAD 0x03 @@ -214,6 +215,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); #define MT_CLS_WIN_8_DISABLE_WAKEUP 0x0016 #define MT_CLS_WIN_8_NO_STICKY_FINGERS 0x0017 #define MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU 0x0018 +#define MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE 0x0019 /* vendor specific classes */ #define MT_CLS_3M 0x0101 @@ -335,6 +337,15 @@ static const struct mt_class mt_classes[] = { MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_WIN8_PTP_BUTTONS, .export_all_inputs = true }, + { .name = MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE, + .quirks = MT_QUIRK_ALWAYS_VALID | + MT_QUIRK_IGNORE_DUPLICATES | + MT_QUIRK_HOVERING | + MT_QUIRK_CONTACT_CNT_ACCURATE | + MT_QUIRK_STICKY_FINGERS | + MT_QUIRK_WIN8_PTP_BUTTONS | + MT_QUIRK_KEEP_LATENCY_ON_CLOSE, + .export_all_inputs = true }, /* * vendor specific classes @@ -855,7 +866,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, if ((cls->name == MT_CLS_WIN_8 || cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT || cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU || - cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) && + cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP || + cls->name == MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE) && (field->application == HID_DG_TOUCHPAD || field->application == HID_DG_TOUCHSCREEN)) app->quirks |= MT_QUIRK_CONFIDENCE; @@ -1768,7 +1780,8 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) int ret; if (td->is_haptic_touchpad && (td->mtclass.name == MT_CLS_WIN_8 || - td->mtclass.name == MT_CLS_WIN_8_FORCE_MULTI_INPUT)) { + td->mtclass.name == MT_CLS_WIN_8_FORCE_MULTI_INPUT || + td->mtclass.name == MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE)) { if (hid_haptic_input_configured(hdev, td->haptic, hi) == 0) td->is_haptic_touchpad = false; } else { @@ -2081,7 +2094,12 @@ static void mt_on_hid_hw_open(struct hid_device *hdev) static void mt_on_hid_hw_close(struct hid_device *hdev) { - mt_set_modes(hdev, HID_LATENCY_HIGH, TOUCHPAD_REPORT_NONE); + struct mt_device *td = hid_get_drvdata(hdev); + + if (td->mtclass.quirks & MT_QUIRK_KEEP_LATENCY_ON_CLOSE) + mt_set_modes(hdev, HID_LATENCY_NORMAL, TOUCHPAD_REPORT_NONE); + else + mt_set_modes(hdev, HID_LATENCY_HIGH, TOUCHPAD_REPORT_NONE); } /* @@ -2468,6 +2486,14 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_UNITEC, USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19) }, + /* Uniwill touchpads */ + { .driver_data = MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_PIXART, 0x0255) }, + { .driver_data = MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_PIXART, 0x0274) }, + /* VTL panels */ { .driver_data = MT_CLS_VTL, MT_USB_DEVICE(USB_VENDOR_ID_VTL, From 021ca6b670bebebc409d43845efcfe8c11c1dd54 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 23 Feb 2026 22:33:22 +0900 Subject: [PATCH 463/828] mm/slab: pass __GFP_NOWARN to refill_sheaf() if fallback is available When refill_sheaf() is called, failing to refill the sheaf doesn't necessarily mean the allocation will fail because a fallback path might be available and serve the allocation request. Suppress spurious warnings by passing __GFP_NOWARN along with __GFP_NOMEMALLOC whenever a fallback path is available. When the caller is alloc_full_sheaf() or __pcs_replace_empty_main(), the kernel always falls back to the slowpath (__slab_alloc_node()). For __prefill_sheaf_pfmemalloc(), the fallback path is available only when gfp_pfmemalloc_allowed() returns true. Reported-and-tested-by: Chris Bainbridge Closes: https://lore.kernel.org/linux-mm/aZt2-oS9lkmwT7Ch@debian.local Fixes: 1ce20c28eafd ("slab: handle pfmemalloc slabs properly with sheaves") Link: https://lore.kernel.org/linux-mm/aZwSreGj9-HHdD-j@hyeyoo Signed-off-by: Harry Yoo Link: https://patch.msgid.link/20260223133322.16705-1-harry.yoo@oracle.com Tested-by: Mikhail Gavrilov Signed-off-by: Vlastimil Babka (SUSE) --- mm/slub.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 862642c165ed..4ce24d9ee7e4 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2822,7 +2822,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) if (!sheaf) return NULL; - if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC)) { + if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { free_empty_sheaf(s, sheaf); return NULL; } @@ -4575,7 +4575,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; if (empty) { - if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC)) { + if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { full = empty; } else { /* @@ -4890,9 +4890,14 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof); static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s, struct slab_sheaf *sheaf, gfp_t gfp) { - int ret = 0; + gfp_t gfp_nomemalloc; + int ret; - ret = refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC); + gfp_nomemalloc = gfp | __GFP_NOMEMALLOC; + if (gfp_pfmemalloc_allowed(gfp)) + gfp_nomemalloc |= __GFP_NOWARN; + + ret = refill_sheaf(s, sheaf, gfp_nomemalloc); if (likely(!ret || !gfp_pfmemalloc_allowed(gfp))) return ret; From f3ec502b6755a3bfb12c1c47025ef989ff9efc72 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 25 Feb 2026 08:34:07 -0800 Subject: [PATCH 464/828] mm/slab: mark alloc tags empty for sheaves allocated with __GFP_NO_OBJ_EXT alloc_empty_sheaf() allocates sheaves from SLAB_KMALLOC caches using __GFP_NO_OBJ_EXT to avoid recursion, however it does not mark their allocation tags empty before freeing, which results in a warning when CONFIG_MEM_ALLOC_PROFILING_DEBUG is set. Fix this by marking allocation tags for such sheaves as empty. The problem was technically introduced in commit 4c0a17e28340 but only becomes possible to hit with commit 913ffd3a1bf5. Fixes: 4c0a17e28340 ("slab: prevent recursive kmalloc() in alloc_empty_sheaf()") Fixes: 913ffd3a1bf5 ("slab: handle kmalloc sheaves bootstrap") Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/all/20260223155128.3849-1-00107082@163.com/ Analyzed-by: Harry Yoo Signed-off-by: Suren Baghdasaryan Reviewed-by: Harry Yoo Tested-by: Harry Yoo Tested-by: David Wang <00107082@163.com> Link: https://patch.msgid.link/20260225163407.2218712-1-surenb@google.com Signed-off-by: Vlastimil Babka (SUSE) --- include/linux/gfp_types.h | 2 ++ mm/slab.h | 4 ++-- mm/slub.c | 33 +++++++++++++++++++++++---------- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 814bb2892f99..6c75df30a281 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -139,6 +139,8 @@ enum { * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. * * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + * mark_obj_codetag_empty() should be called upon freeing for objects allocated + * with this flag to indicate that their NULL tags are expected and normal. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) diff --git a/mm/slab.h b/mm/slab.h index 71c7261bf822..f6ef862b60ef 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -290,14 +290,14 @@ static inline void *nearest_obj(struct kmem_cache *cache, /* Determine object index from a given position */ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, - void *addr, void *obj) + void *addr, const void *obj) { return reciprocal_divide(kasan_reset_tag(obj) - addr, cache->reciprocal_size); } static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) + const struct slab *slab, const void *obj) { if (is_kfence_address(obj)) return 0; diff --git a/mm/slub.c b/mm/slub.c index 4ce24d9ee7e4..52f021711744 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2041,18 +2041,18 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG -static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) +static inline void mark_obj_codetag_empty(const void *obj) { - struct slab *obj_exts_slab; + struct slab *obj_slab; unsigned long slab_exts; - obj_exts_slab = virt_to_slab(obj_exts); - slab_exts = slab_obj_exts(obj_exts_slab); + obj_slab = virt_to_slab(obj); + slab_exts = slab_obj_exts(obj_slab); if (slab_exts) { get_slab_obj_exts(slab_exts); - unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, - obj_exts_slab, obj_exts); - struct slabobj_ext *ext = slab_obj_ext(obj_exts_slab, + unsigned int offs = obj_to_index(obj_slab->slab_cache, + obj_slab, obj); + struct slabobj_ext *ext = slab_obj_ext(obj_slab, slab_exts, offs); if (unlikely(is_codetag_empty(&ext->ref))) { @@ -2090,7 +2090,7 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ -static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} +static inline void mark_obj_codetag_empty(const void *obj) {} static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, struct slabobj_ext *vec, unsigned int objects) {} @@ -2211,7 +2211,7 @@ retry: * assign slabobj_exts in parallel. In this case the existing * objcg vector should be reused. */ - mark_objexts_empty(vec); + mark_obj_codetag_empty(vec); if (unlikely(!allow_spin)) kfree_nolock(vec); else @@ -2254,7 +2254,7 @@ static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) * NULL, therefore replace NULL with CODETAG_EMPTY to indicate that * the extension for obj_exts is expected to be NULL. */ - mark_objexts_empty(obj_exts); + mark_obj_codetag_empty(obj_exts); if (allow_spin) kfree(obj_exts); else @@ -2312,6 +2312,10 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) #else /* CONFIG_SLAB_OBJ_EXT */ +static inline void mark_obj_codetag_empty(const void *obj) +{ +} + static inline void init_slab_obj_exts(struct slab *slab) { } @@ -2783,6 +2787,15 @@ static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) { + /* + * If the sheaf was created with __GFP_NO_OBJ_EXT flag then its + * corresponding extension is NULL and alloc_tag_sub() will throw a + * warning, therefore replace NULL with CODETAG_EMPTY to indicate + * that the extension for this sheaf is expected to be NULL. + */ + if (s->flags & SLAB_KMALLOC) + mark_obj_codetag_empty(sheaf); + kfree(sheaf); stat(s, SHEAF_FREE); From 2b351ea42820a7ecc2e8305724536512984f4419 Mon Sep 17 00:00:00 2001 From: Sanjay Chitroda Date: Thu, 26 Feb 2026 11:17:12 +0530 Subject: [PATCH 465/828] mm/slub: drop duplicate kernel-doc for ksize() The implementation of ksize() was updated with kernel-doc by commit fab0694646d7 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c") However, the public header still contains a kernel-doc comment attached to the ksize() prototype. Having documentation both in the header and next to the implementation causes Sphinx to treat the function as being documented twice, resulting in the warning: WARNING: Duplicate C declaration, also defined at core-api/mm-api:521 Declaration is '.. c:function:: size_t ksize(const void *objp)' Kernel-doc guidelines recommend keeping the documentation with the function implementation. Therefore remove the redundant kernel-doc block from include/linux/slab.h so that the implementation in slub.c remains the canonical source for documentation. No functional change. Fixes: fab0694646d7 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c") Signed-off-by: Sanjay Chitroda Link: https://patch.msgid.link/20260226054712.3610744-1-sanjayembedded@gmail.com Signed-off-by: Vlastimil Babka (SUSE) --- include/linux/slab.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index a5a5e4108ae5..15a60b501b95 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -517,18 +517,6 @@ void kfree_sensitive(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) -/** - * ksize - Report actual allocation size of associated object - * - * @objp: Pointer returned from a prior kmalloc()-family allocation. - * - * This should not be used for writing beyond the originally requested - * allocation size. Either use krealloc() or round up the allocation size - * with kmalloc_size_roundup() prior to allocation. If this is used to - * access beyond the originally requested allocation size, UBSAN_BOUNDS - * and/or FORTIFY_SOURCE may trip, since they only know about the - * originally allocated size via the __alloc_size attribute. - */ size_t ksize(const void *objp); #ifdef CONFIG_PRINTK From 71c1978ab6d2c6d48c31311855f1a85377c152ae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Feb 2026 16:47:52 +0100 Subject: [PATCH 466/828] ASoC: SDCA: Fix comments for sdca_irq_request() The kernel-doc comments for sdca_irq_request() contained some typos that lead to build warnings with W=1. Let's correct them. Fixes: b126394d9ec6 ("ASoC: SDCA: Generic interrupt support") Acked-by: Mark Brown Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260226154753.1083320-1-tiwai@suse.de --- sound/soc/sdca/sdca_interrupts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sdca/sdca_interrupts.c b/sound/soc/sdca/sdca_interrupts.c index d9e22cf40f77..95b1ab4ba1b0 100644 --- a/sound/soc/sdca/sdca_interrupts.c +++ b/sound/soc/sdca/sdca_interrupts.c @@ -265,9 +265,9 @@ static int sdca_irq_request_locked(struct device *dev, } /** - * sdca_request_irq - request an individual SDCA interrupt + * sdca_irq_request - request an individual SDCA interrupt * @dev: Pointer to the struct device against which things should be allocated. - * @interrupt_info: Pointer to the interrupt information structure. + * @info: Pointer to the interrupt information structure. * @sdca_irq: SDCA interrupt position. * @name: Name to be given to the IRQ. * @handler: A callback thread function to be called for the IRQ. From 795469820c638b4449f3bb90ee5e98ebccfbc480 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 23 Feb 2026 14:01:56 -0800 Subject: [PATCH 467/828] kcsan: test: Adjust "expect" allocation type for kmalloc_obj The call to kmalloc_obj(observed.lines) returns "char (*)[3][512]", a pointer to the whole 2D array. But "expect" wants to be "char (*)[512]", the decayed pointer type, as if it were observed.lines itself (though without the "3" bounds). This produces the following build error: ../kernel/kcsan/kcsan_test.c: In function '__report_matches': ../kernel/kcsan/kcsan_test.c:171:16: error: assignment to 'char (*)[512]' from incompatible pointer type 'char (*)[3][512]' [-Wincompatible-pointer-types] 171 | expect = kmalloc_obj(observed.lines); | ^ Instead of changing the "expect" type to "char (*)[3][512]" and requiring a dereference at each use (e.g. "(expect*)[0]"), just explicitly cast the return to the desired type. Note that I'm intentionally not switching back to byte-based "kmalloc" here because I cannot find a way for the Coccinelle script (which will be used going forward to catch future conversions) to exclude this case. Tested with: $ ./tools/testing/kunit/kunit.py run \ --kconfig_add CONFIG_DEBUG_KERNEL=y \ --kconfig_add CONFIG_KCSAN=y \ --kconfig_add CONFIG_KCSAN_KUNIT_TEST=y \ --arch=x86_64 --qemu_args '-smp 2' kcsan Reported-by: Nathan Chancellor Fixes: 69050f8d6d07 ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types") Signed-off-by: Kees Cook --- kernel/kcsan/kcsan_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index 79e655ea4ca1..ae758150ccb9 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -168,7 +168,7 @@ static bool __report_matches(const struct expect_report *r) if (!report_available()) return false; - expect = kmalloc_obj(observed.lines); + expect = (typeof(expect))kmalloc_obj(observed.lines); if (WARN_ON(!expect)) return false; From e5cb94ba5f96d691d8885175d4696d6ae6bc5ec9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Feb 2026 08:22:32 +0000 Subject: [PATCH 468/828] arm64: Fix sampling the "stable" virtual counter in preemptible section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ben reports that when running with CONFIG_DEBUG_PREEMPT, using __arch_counter_get_cntvct_stable() results in well deserves warnings, as we access a per-CPU variable without preemption disabled. Fix the issue by disabling preemption on reading the counter. We can probably do a lot better by not disabling preemption on systems that do not require horrible workarounds to return a valid counter value, but this plugs the issue for the time being. Fixes: 29cc0f3aa7c6 ("arm64: Force the use of CNTVCT_EL0 in __delay()") Reported-by: Ben Horgan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/aZw3EGs4rbQvbAzV@e134344.arm.com Tested-by: Ben Horgan Tested-by: André Draszik Signed-off-by: Will Deacon --- arch/arm64/lib/delay.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c index d02341303899..e278e060e78a 100644 --- a/arch/arm64/lib/delay.c +++ b/arch/arm64/lib/delay.c @@ -32,7 +32,11 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops) * Note that userspace cannot change the offset behind our back either, * as the vcpu mutex is held as long as KVM_RUN is in progress. */ -#define __delay_cycles() __arch_counter_get_cntvct_stable() +static cycles_t notrace __delay_cycles(void) +{ + guard(preempt_notrace)(); + return __arch_counter_get_cntvct_stable(); +} void __delay(unsigned long cycles) { From df6e4ab654dc482c1d45776257a62ac10e14086c Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Thu, 26 Feb 2026 17:29:11 +0530 Subject: [PATCH 469/828] arm64: topology: Fix false warning in counters_read_on_cpu() for same-CPU reads The counters_read_on_cpu() function warns when called with IRQs disabled to prevent deadlock in smp_call_function_single(). However, this warning is spurious when reading counters on the current CPU, since no IPI is needed for same CPU reads. Commit 12eb8f4fff24 ("cpufreq: CPPC: Update FIE arch_freq_scale in ticks for non-PCC regs") changed the CPPC Frequency Invariance Engine to read AMU counters directly from the scheduler tick for non-PCC register spaces (like FFH), instead of deferring to a kthread. This means counters_read_on_cpu() is now called with IRQs disabled from the tick handler, triggering the warning. Fix this by restructuring the logic: when IRQs are disabled (tick context), call the function directly for same-CPU reads. Otherwise use smp_call_function_single(). Fixes: 997c021abc6e ("cpufreq: CPPC: Update FIE arch_freq_scale in ticks for non-PCC regs") Suggested-by: Will Deacon Signed-off-by: Sumit Gupta Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 3fe1faab0362..b32f13358fbb 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -400,16 +400,25 @@ static inline int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) { /* - * Abort call on counterless CPU or when interrupts are - * disabled - can lead to deadlock in smp sync call. + * Abort call on counterless CPU. */ if (!cpu_has_amu_feat(cpu)) return -EOPNOTSUPP; - if (WARN_ON_ONCE(irqs_disabled())) - return -EPERM; - - smp_call_function_single(cpu, func, val, 1); + if (irqs_disabled()) { + /* + * When IRQs are disabled (tick path: sched_tick -> + * topology_scale_freq_tick or cppc_scale_freq_tick), only local + * CPU counter reads are allowed. Remote CPU counter read would + * require smp_call_function_single() which is unsafe with IRQs + * disabled. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) + return -EPERM; + func(val); + } else { + smp_call_function_single(cpu, func, val, 1); + } return 0; } From 8678591b47469fe16357234efef9b260317b8be4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 25 Feb 2026 15:02:51 -0700 Subject: [PATCH 470/828] kbuild: Split .modinfo out from ELF_DETAILS Commit 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped") added .modinfo to ELF_DETAILS while removing it from COMMON_DISCARDS, as it was needed in vmlinux.unstripped and ELF_DETAILS was present in all architecture specific vmlinux linker scripts. While this shuffle is fine for vmlinux, ELF_DETAILS and COMMON_DISCARDS may be used by other linker scripts, such as the s390 and x86 compressed boot images, which may not expect to have a .modinfo section. In certain circumstances, this could result in a bootloader failing to load the compressed kernel [1]. Commit ddc6cbef3ef1 ("s390/boot/vmlinux.lds.S: Ensure bzImage ends with SecureBoot trailer") recently addressed this for the s390 bzImage but the same bug remains for arm, parisc, and x86. The presence of .modinfo in the x86 bzImage was the root cause of the issue worked around with commit d50f21091358 ("kbuild: align modinfo section for Secureboot Authenticode EDK2 compat"). misc.c in arch/x86/boot/compressed includes lib/decompress_unzstd.c, which in turn includes lib/xxhash.c and its MODULE_LICENSE / MODULE_DESCRIPTION macros due to the STATIC definition. Split .modinfo out from ELF_DETAILS into its own macro and handle it in all vmlinux linker scripts. Discard .modinfo in the places where it was previously being discarded from being in COMMON_DISCARDS, as it has never been necessary in those uses. Cc: stable@vger.kernel.org Fixes: 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped") Reported-by: Ed W Closes: https://lore.kernel.org/587f25e0-a80e-46a5-9f01-87cb40cfa377@wildgooses.com/ [1] Tested-by: Ed W # x86_64 Link: https://patch.msgid.link/20260225-separate-modinfo-from-elf-details-v1-1-387ced6baf4b@kernel.org Signed-off-by: Nathan Chancellor --- arch/alpha/kernel/vmlinux.lds.S | 1 + arch/arc/kernel/vmlinux.lds.S | 1 + arch/arm/boot/compressed/vmlinux.lds.S | 1 + arch/arm/kernel/vmlinux-xip.lds.S | 1 + arch/arm/kernel/vmlinux.lds.S | 1 + arch/arm64/kernel/vmlinux.lds.S | 1 + arch/csky/kernel/vmlinux.lds.S | 1 + arch/hexagon/kernel/vmlinux.lds.S | 1 + arch/loongarch/kernel/vmlinux.lds.S | 1 + arch/m68k/kernel/vmlinux-nommu.lds | 1 + arch/m68k/kernel/vmlinux-std.lds | 1 + arch/m68k/kernel/vmlinux-sun3.lds | 1 + arch/mips/kernel/vmlinux.lds.S | 1 + arch/nios2/kernel/vmlinux.lds.S | 1 + arch/openrisc/kernel/vmlinux.lds.S | 1 + arch/parisc/boot/compressed/vmlinux.lds.S | 1 + arch/parisc/kernel/vmlinux.lds.S | 1 + arch/powerpc/kernel/vmlinux.lds.S | 1 + arch/riscv/kernel/vmlinux.lds.S | 1 + arch/s390/kernel/vmlinux.lds.S | 1 + arch/sh/kernel/vmlinux.lds.S | 1 + arch/sparc/kernel/vmlinux.lds.S | 1 + arch/um/kernel/dyn.lds.S | 1 + arch/um/kernel/uml.lds.S | 1 + arch/x86/boot/compressed/vmlinux.lds.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 4 +++- 27 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 2efa7dfc798a..2d136c63db16 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -71,6 +71,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 61a1b2b96e1d..6af63084ff28 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -123,6 +123,7 @@ SECTIONS _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index d411abd4310e..2d916647df03 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -21,6 +21,7 @@ SECTIONS COMMON_DISCARDS *(.ARM.exidx*) *(.ARM.extab*) + *(.modinfo) *(.note.*) *(.rel.*) *(.printk_index) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index f2e8d4fac068..5afb725998ec 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -154,6 +154,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ARM_DETAILS ARM_ASSERTS diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index d592a203f9c6..c07843c3c53d 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -153,6 +153,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ARM_DETAILS ARM_ASSERTS diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ad6133b89e7a..2964aad0362e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -349,6 +349,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS HEAD_SYMBOLS diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S index d718961786d2..81943981b3af 100644 --- a/arch/csky/kernel/vmlinux.lds.S +++ b/arch/csky/kernel/vmlinux.lds.S @@ -109,6 +109,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 1150b77fa281..aae22283b5e0 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -62,6 +62,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS .hexagon.attributes 0 : { *(.hexagon.attributes) } diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 08ea921cdec1..d0e1377a041d 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -147,6 +147,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS #ifdef CONFIG_EFI_STUB diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds index 2624fc18c131..45d7f4b0177b 100644 --- a/arch/m68k/kernel/vmlinux-nommu.lds +++ b/arch/m68k/kernel/vmlinux-nommu.lds @@ -85,6 +85,7 @@ SECTIONS { _end = .; STABS_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded */ diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 1ccdd04ae462..7326586afe15 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -58,6 +58,7 @@ SECTIONS _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded */ diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index f13ddcc2af5c..1b19fef201fb 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -51,6 +51,7 @@ __init_begin = .; _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded */ diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 2b708fac8d2c..579b2cc1995a 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -217,6 +217,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS /* These must appear regardless of . */ diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S index 37b958055064..206f92445bfa 100644 --- a/arch/nios2/kernel/vmlinux.lds.S +++ b/arch/nios2/kernel/vmlinux.lds.S @@ -57,6 +57,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 049bff45f612..9b29c3211774 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS /* Throw in the debugging sections */ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded -- must be last */ diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S index ab7b43990857..87d24cc824b6 100644 --- a/arch/parisc/boot/compressed/vmlinux.lds.S +++ b/arch/parisc/boot/compressed/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS /* Sections to be discarded */ DISCARDS /DISCARD/ : { + *(.modinfo) #ifdef CONFIG_64BIT /* temporary hack until binutils is fixed to not emit these * for static binaries diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index b445e47903cf..0ca93d6d7235 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -165,6 +165,7 @@ SECTIONS _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS .note 0 : { *(.note) } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 15850296c0a9..8fc11d6565bf 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -397,6 +397,7 @@ SECTIONS _end = . ; DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 61bd5ba6680a..997f9eb3b22b 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -170,6 +170,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS .riscv.attributes 0 : { *(.riscv.attributes) } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 53bcbb91bb9b..2b62395e35bf 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -221,6 +221,7 @@ SECTIONS /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS /* diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 008c30289eaa..169c63fb3c1d 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -89,6 +89,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index f1b86eb30340..7ea510d9b42f 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -191,6 +191,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index a36b7918a011..ad3cefeff2ac 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -172,6 +172,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index a409d4b66114..30aa24348d60 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -113,6 +113,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 587ce3e7c504..e0b152715d9c 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -88,7 +88,7 @@ SECTIONS /DISCARD/ : { *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss) *(.hash) *(.gnu.hash) - *(.note.*) + *(.note.*) *(.modinfo) } .got.plt (INFO) : { diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3a24a3fc55f5..4711a35e706c 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -427,6 +427,7 @@ SECTIONS .llvm_bb_addr_map : { *(.llvm_bb_addr_map) } #endif + MODINFO ELF_DETAILS DISCARDS diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eeb070f330bd..1e1580febe4b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -848,12 +848,14 @@ /* Required sections not related to debugging. */ #define ELF_DETAILS \ - .modinfo : { *(.modinfo) . = ALIGN(8); } \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ .strtab 0 : { *(.strtab) } \ .shstrtab 0 : { *(.shstrtab) } +#define MODINFO \ + .modinfo : { *(.modinfo) . = ALIGN(8); } + #ifdef CONFIG_GENERIC_BUG #define BUG_TABLE \ . = ALIGN(8); \ From d2395bb194ef212b36521ec4fa85e38b45675acb Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 25 Feb 2026 15:07:17 -0700 Subject: [PATCH 471/828] genksyms: Fix parsing a declarator with a preceding attribute After commit 07919126ecfc ("netfilter: annotate NAT helper hook pointers with __rcu"), genksyms fails to parse the __rcu annotation when building with CONFIG_DEBUG_INFO_BTF=y, CONFIG_PAHOLE_HAS_BTF_TAG=y, and a version of clang that supports btf_type_tag. $ clang --version | head -1 ClangBuiltLinux clang version 22.1.0 (https://github.com/llvm/llvm-project.git 4434dabb69916856b824f68a64b029c67175e532) $ cat kernel/configs/repro.config CONFIG_BPF_SYSCALL=y CONFIG_MODVERSIONS=y # CONFIG_DEBUG_INFO_NONE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_INFO_BTF=y $ make -skj"$(nproc)" ARCH=x86_64 LLVM=1 mrproper defconfig repro.config all WARNING: modpost: EXPORT symbol "nf_nat_ftp_hook" [vmlinux] version generation failed, symbol will not be versioned. ... WARNING: modpost: EXPORT symbol "nf_nat_irc_hook" [vmlinux] version generation failed, symbol will not be versioned. ... genksyms falls over parsing the __rcu attribute in the declarator: # Kernel reproducer $ make -skj"$(nproc)" ARCH=x86_64 KCFLAGS=-D__GENKSYMS__ LLVM=1 net/netfilter/nf_conntrack_ftp.i $ scripts/genksyms/genksyms -w :2: syntax error Optionally allow an attribute to precede a declarator to resolve this error and properly generate symbol versions. Fixes: 07919126ecfc ("netfilter: annotate NAT helper hook pointers with __rcu") Link: https://patch.msgid.link/20260225-genksyms-fix-attribute-declarator-v1-1-1b21478663fb@kernel.org Tested-by: Nicolas Schier Reviewed-by: Nicolas Schier Signed-off-by: Nathan Chancellor --- scripts/genksyms/parse.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index efdcf07c4eb6..cabcd146f3aa 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -325,8 +325,8 @@ direct_declarator: { $$ = $4; } | direct_declarator BRACKET_PHRASE { $$ = $2; } - | '(' declarator ')' - { $$ = $3; } + | '(' attribute_opt declarator ')' + { $$ = $4; } ; /* Nested declarators differ from regular declarators in that they do From ef06fd16d48704eac868441d98d4ef083d8f3d07 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 26 Feb 2026 07:55:25 +0000 Subject: [PATCH 472/828] bpf, arm64: Force 8-byte alignment for JIT buffer to prevent atomic tearing struct bpf_plt contains a u64 target field. Currently, the BPF JIT allocator requests an alignment of 4 bytes (sizeof(u32)) for the JIT buffer. Because the base address of the JIT buffer can be 4-byte aligned (e.g., ending in 0x4 or 0xc), the relative padding logic in build_plt() fails to ensure that target lands on an 8-byte boundary. This leads to two issues: 1. UBSAN reports misaligned-access warnings when dereferencing the structure. 2. More critically, target is updated concurrently via WRITE_ONCE() in bpf_arch_text_poke() while the JIT'd code executes ldr. On arm64, 64-bit loads/stores are only guaranteed to be single-copy atomic if they are 64-bit aligned. A misaligned target risks a torn read, causing the JIT to jump to a corrupted address. Fix this by increasing the allocation alignment requirement to 8 bytes (sizeof(u64)) in bpf_jit_binary_pack_alloc(). This anchors the base of the JIT buffer to an 8-byte boundary, allowing the relative padding math in build_plt() to correctly align the target field. Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") Signed-off-by: Fuad Tabba Acked-by: Will Deacon Link: https://lore.kernel.org/r/20260226075525.233321-1-tabba@google.com Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 356d33c7a4ae..adf84962d579 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2119,7 +2119,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); image_size = extable_offset + extable_size; ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, - sizeof(u32), &header, &image_ptr, + sizeof(u64), &header, &image_ptr, jit_fill_hole); if (!ro_header) { prog = orig_prog; From ad6fface76da42721c15e8fb281570aaa44a2c01 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 25 Feb 2026 12:12:49 +0100 Subject: [PATCH 473/828] bpf: Fix kprobe_multi cookies access in show_fdinfo callback We don't check if cookies are available on the kprobe_multi link before accessing them in show_fdinfo callback, we should. Cc: stable@vger.kernel.org Fixes: da7e9c0a7fbc ("bpf: Add show_fdinfo for kprobe_multi") Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20260225111249.186230-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9bc0dfd235af..0b040a417442 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2454,8 +2454,10 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_kprobe_multi_link *kmulti_link; + bool has_cookies; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + has_cookies = !!kmulti_link->cookies; seq_printf(seq, "kprobe_cnt:\t%u\n" @@ -2467,7 +2469,7 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, for (int i = 0; i < kmulti_link->cnt; i++) { seq_printf(seq, "%llu\t %pS\n", - kmulti_link->cookies[i], + has_cookies ? kmulti_link->cookies[i] : 0, (void *)kmulti_link->addrs[i]); } } From b7bf516c3ecd9a2aae2dc2635178ab87b734fef1 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Wed, 25 Feb 2026 05:34:44 +0000 Subject: [PATCH 474/828] bpf: Fix stack-out-of-bounds write in devmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_upper_ifindexes() iterates over all upper devices and writes their indices into an array without checking bounds. Also the callers assume that the max number of upper devices is MAX_NEST_DEV and allocate excluded_devices[1+MAX_NEST_DEV] on the stack, but that assumption is not correct and the number of upper devices could be larger than MAX_NEST_DEV (e.g., many macvlans), causing a stack-out-of-bounds write. Add a max parameter to get_upper_ifindexes() to avoid the issue. When there are too many upper devices, return -EOVERFLOW and abort the redirect. To reproduce, create more than MAX_NEST_DEV(8) macvlans on a device with an XDP program attached using BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS. Then send a packet to the device to trigger the XDP redirect path. Reported-by: syzbot+10cc7f13760b31bd2e61@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/698c4ce3.050a0220.340abe.000b.GAE@google.com/T/ Fixes: aeea1b86f936 ("bpf, devmap: Exclude XDP broadcast to master device") Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Kohei Enju Link: https://lore.kernel.org/r/20260225053506.4738-1-kohei@enjuk.jp Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2625601de76e..2984e938f94d 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -588,18 +588,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin } /* Get ifindex of each upper device. 'indexes' must be able to hold at - * least MAX_NEST_DEV elements. - * Returns the number of ifindexes added. + * least 'max' elements. + * Returns the number of ifindexes added, or -EOVERFLOW if there are too + * many upper devices. */ -static int get_upper_ifindexes(struct net_device *dev, int *indexes) +static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max) { struct net_device *upper; struct list_head *iter; int n = 0; netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (n >= max) + return -EOVERFLOW; indexes[n++] = upper->ifindex; } + return n; } @@ -615,7 +619,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, int err; if (exclude_ingress) { - num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices, + ARRAY_SIZE(excluded_devices) - 1); + if (num_excluded < 0) + return num_excluded; + excluded_devices[num_excluded++] = dev_rx->ifindex; } @@ -733,7 +741,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, int err; if (exclude_ingress) { - num_excluded = get_upper_ifindexes(dev, excluded_devices); + num_excluded = get_upper_ifindexes(dev, excluded_devices, + ARRAY_SIZE(excluded_devices) - 1); + if (num_excluded < 0) + return num_excluded; + excluded_devices[num_excluded++] = dev->ifindex; } From 60e3cbef3500ad6101bc91946e645f69b1bb9556 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Tue, 24 Feb 2026 16:33:51 -0800 Subject: [PATCH 475/828] selftests/bpf: Fix a memory leak in xdp_flowtable test test_progs run with ASAN reported [1]: ==126==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7f1ff3cfa340 in calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:77 #1 0x5610c15bb520 in bpf_program_attach_fd /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/lib/bpf/libbpf.c:13164 #2 0x5610c15bb740 in bpf_program__attach_xdp /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/lib/bpf/libbpf.c:13204 #3 0x5610c14f91d3 in test_xdp_flowtable /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c:138 #4 0x5610c1533566 in run_one_test /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/testing/selftests/bpf/test_progs.c:1406 #5 0x5610c1537fb0 in main /codebuild/output/src685977285/src/actions-runner/_work/vmtest/vmtest/src/tools/testing/selftests/bpf/test_progs.c:2097 #6 0x7f1ff25df1c9 (/lib/x86_64-linux-gnu/libc.so.6+0x2a1c9) (BuildId: 8e9fd827446c24067541ac5390e6f527fb5947bb) #7 0x7f1ff25df28a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2a28a) (BuildId: 8e9fd827446c24067541ac5390e6f527fb5947bb) #8 0x5610c0bd3180 in _start (/tmp/work/vmtest/vmtest/selftests/bpf/test_progs+0x593180) (BuildId: cdf9f103f42307dc0a2cd6cfc8afcbc1366cf8bd) Fix by properly destroying bpf_link on exit in xdp_flowtable test. [1] https://github.com/kernel-patches/vmtest/actions/runs/22361085418/job/64716490680 Signed-off-by: Ihor Solodrai Reviewed-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/20260225003351.465104-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c index 3f9146d83d79..325e0b64dc35 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c @@ -67,7 +67,7 @@ void test_xdp_flowtable(void) struct nstoken *tok = NULL; int iifindex, stats_fd; __u32 value, key = 0; - struct bpf_link *link; + struct bpf_link *link = NULL; if (SYS_NOFAIL("nft -v")) { fprintf(stdout, "Missing required nft tool\n"); @@ -160,6 +160,7 @@ void test_xdp_flowtable(void) ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed"); out: + bpf_link__destroy(link); xdp_flowtable__destroy(skel); if (tok) close_netns(tok); From 6881af27f9ea0f5ca8f606f573ef5cc25ca31fe4 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Tue, 24 Feb 2026 16:33:48 -0800 Subject: [PATCH 476/828] selftests/bpf: Fix OOB read in dmabuf_collector Dmabuf name allocations can be less than DMA_BUF_NAME_LEN characters, but bpf_probe_read_kernel always tries to read exactly that many bytes. If a name is less than DMA_BUF_NAME_LEN characters, bpf_probe_read_kernel will read past the end. bpf_probe_read_kernel_str stops at the first NUL terminator so use it instead, like iter_dmabuf_for_each already does. Fixes: ae5d2c59ecd7 ("selftests/bpf: Add test for dmabuf_iter") Reported-by: Jerome Lee Signed-off-by: T.J. Mercier Link: https://lore.kernel.org/r/20260225003349.113746-1-tjmercier@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/dmabuf_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c index 13cdb11fdeb2..9cbb7442646e 100644 --- a/tools/testing/selftests/bpf/progs/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c @@ -48,7 +48,7 @@ int dmabuf_collector(struct bpf_iter__dmabuf *ctx) /* Buffers are not required to be named */ if (pname) { - if (bpf_probe_read_kernel(name, sizeof(name), pname)) + if (bpf_probe_read_kernel_str(name, sizeof(name), pname) < 0) return 1; /* Name strings can be provided by userspace */ From 459cb3c054c2352bb321648744b620259a716b60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 26 Feb 2026 08:41:48 +0100 Subject: [PATCH 477/828] kbuild: install-extmod-build: Package resolve_btfids if necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_DEBUG_INFO_BTF_MODULES is enabled and vmlinux is available, Makefile.modfinal and gen-btf.sh will try to use resolve_btfids on the module .ko. install-extmod-build currently does not package resolve_btfids, so that step fails. Package resolve_btfids if it may be used. Signed-off-by: Thomas Weißschuh Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/20260226-kbuild-resolve_btfids-v1-1-2bf38b93dfe7@linutronix.de [nathan: Small commit message tweaks] Signed-off-by: Nathan Chancellor --- scripts/package/install-extmod-build | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 2576cf7902db..f12e1ffe409e 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -32,6 +32,10 @@ mkdir -p "${destdir}" echo tools/objtool/objtool fi + if is_enabled CONFIG_DEBUG_INFO_BTF_MODULES; then + echo tools/bpf/resolve_btfids/resolve_btfids + fi + echo Module.symvers echo "arch/${SRCARCH}/include/generated" echo include/config/auto.conf From 749989b2d90ddc7dd253ad3b11a77cf882721acf Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 26 Feb 2026 12:45:17 +0000 Subject: [PATCH 478/828] sched_ext: Fix SCX_EFLAG_INITIALIZED being a no-op flag SCX_EFLAG_INITIALIZED is the sole member of enum scx_exit_flags with no explicit value, so the compiler assigns it 0. This makes the bitwise OR in scx_ops_init() a no-op: sch->exit_info->flags |= SCX_EFLAG_INITIALIZED; /* |= 0 */ As a result, BPF schedulers cannot distinguish whether ops.init() completed successfully by inspecting exit_info->flags. Assign the value 1LLU << 0 so the flag is actually set. Fixes: f3aec2adce8d ("sched_ext: Add SCX_EFLAG_INITIALIZED to indicate successful ops.init()") Signed-off-by: David Carlier Signed-off-by: Tejun Heo --- kernel/sched/ext_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h index 386c677e4c9a..11ebb744d893 100644 --- a/kernel/sched/ext_internal.h +++ b/kernel/sched/ext_internal.h @@ -74,7 +74,7 @@ enum scx_exit_flags { * info communication. The following flag indicates whether ops.init() * finished successfully. */ - SCX_EFLAG_INITIALIZED, + SCX_EFLAG_INITIALIZED = 1LLU << 0, }; /* From dd677d0598387ea623820ab2bd0e029c377445a3 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Thu, 4 Dec 2025 12:26:13 -0800 Subject: [PATCH 479/828] nvmet-fcloop: Check remoteport port_state before calling done callback In nvme_fc_handle_ls_rqst_work, the lsrsp->done callback is only set when remoteport->port_state is FC_OBJSTATE_ONLINE. Otherwise, the nvme_fc_xmt_ls_rsp's LLDD call to lport->ops->xmt_ls_rsp is expected to fail and the nvme-fc transport layer itself will directly call nvme_fc_xmt_ls_rsp_free instead of relying on LLDD's done callback to free the lsrsp resources. Update the fcloop_t2h_xmt_ls_rsp routine to check remoteport->port_state. If online, then lsrsp->done callback will free the lsrsp. Else, return -ENODEV to signal the nvme-fc transport to handle freeing lsrsp. Cc: Ewan D. Milne Tested-by: Aristeu Rozanski Acked-by: Aristeu Rozanski Reviewed-by: Daniel Wagner Closes: https://lore.kernel.org/linux-nvme/21255200-a271-4fa0-b099-97755c8acd4c@work/ Fixes: 10c165af35d2 ("nvmet-fcloop: call done callback even when remote port is gone") Signed-off-by: Justin Tee Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c30e9a3e014f..38bd2db3d6bb 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -491,6 +491,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; struct nvmet_fc_target_port *targetport = rport->targetport; struct fcloop_tport *tport; + int ret = 0; if (!targetport) { /* @@ -500,12 +501,18 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, * We end up here from delete association exchange: * nvmet_fc_xmt_disconnect_assoc sends an async request. * - * Return success because this is what LLDDs do; silently - * drop the response. + * Return success when remoteport is still online because this + * is what LLDDs do and silently drop the response. Otherwise, + * return with error to signal upper layer to perform the lsrsp + * resource cleanup. */ - lsrsp->done(lsrsp); + if (remoteport->port_state == FC_OBJSTATE_ONLINE) + lsrsp->done(lsrsp); + else + ret = -ENODEV; + kmem_cache_free(lsreq_cache, tls_req); - return 0; + return ret; } memcpy(lsreq->rspaddr, lsrsp->rspbuf, From b6c3af46c26f2d07c10a1452adc34b821719327e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 23 Feb 2026 19:06:51 +0100 Subject: [PATCH 480/828] pinctrl: cy8c95x0: Don't miss reading the last bank registers When code had been changed to use for_each_set_clump8(), it mistakenly switched from chip->nport to chip->tpin since the cy8c9540 and cy8c9560 have a 4-pin gap. This, in particular, led to the missed read of the last bank interrupt status register and hence missing interrupts on those pins. Restore the upper limit in for_each_set_clump8() to take into consideration that gap. Fixes: 83e29a7a1fdf ("pinctrl: cy8c95x0; Switch to use for_each_set_clump8()") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-cy8c95x0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index a4b04bf6d081..5c055d344ac9 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -627,7 +627,7 @@ static int cy8c95x0_write_regs_mask(struct cy8c95x0_pinctrl *chip, int reg, bitmap_scatter(tmask, mask, chip->map, MAX_LINE); bitmap_scatter(tval, val, chip->map, MAX_LINE); - for_each_set_clump8(offset, bits, tmask, chip->tpin) { + for_each_set_clump8(offset, bits, tmask, chip->nport * BANK_SZ) { unsigned int i = offset / 8; write_val = bitmap_get_value8(tval, offset); @@ -655,7 +655,7 @@ static int cy8c95x0_read_regs_mask(struct cy8c95x0_pinctrl *chip, int reg, bitmap_scatter(tmask, mask, chip->map, MAX_LINE); bitmap_scatter(tval, val, chip->map, MAX_LINE); - for_each_set_clump8(offset, bits, tmask, chip->tpin) { + for_each_set_clump8(offset, bits, tmask, chip->nport * BANK_SZ) { unsigned int i = offset / 8; ret = cy8c95x0_regmap_read_bits(chip, reg, i, bits, &read_val); From e96493229a6399e902062213c6381162464cdd50 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Tue, 24 Feb 2026 16:09:22 +0100 Subject: [PATCH 481/828] spi: stm32: fix missing pointer assignment in case of dma chaining Commit c4f2c05ab029 ("spi: stm32: fix pointer-to-pointer variables usage") introduced a regression since dma descriptors generated as part of the stm32_spi_prepare_rx_dma_mdma_chaining function are not well propagated to the caller function, leading to mdma-dma chaining being no more functional. Fixes: c4f2c05ab029 ("spi: stm32: fix pointer-to-pointer variables usage") Signed-off-by: Alain Volmat Acked-by: Antonio Quartulli Link: https://patch.msgid.link/20260224-spi-stm32-chaining-fix-v1-1-5da7a4851b66@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index b99de8c4cc99..33f211e159ef 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -1625,6 +1625,9 @@ static int stm32_spi_prepare_rx_dma_mdma_chaining(struct stm32_spi *spi, return -EINVAL; } + *rx_mdma_desc = _mdma_desc; + *rx_dma_desc = _dma_desc; + return 0; } From 4fc3a433c13944ee5766ec5b9bf6f1eb4d29b880 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 23 Feb 2026 13:34:35 -0300 Subject: [PATCH 482/828] smb: client: use atomic_t for mnt_cifs_flags Use atomic_t for cifs_sb_info::mnt_cifs_flags as it's currently accessed locklessly and may be changed concurrently in mount/remount and reconnect paths. Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 2 +- fs/smb/client/cifs_fs_sb.h | 2 +- fs/smb/client/cifs_ioctl.h | 8 -- fs/smb/client/cifs_unicode.c | 14 ---- fs/smb/client/cifs_unicode.h | 14 +++- fs/smb/client/cifsacl.c | 17 ++-- fs/smb/client/cifsfs.c | 84 ++++++++++---------- fs/smb/client/cifsglob.h | 61 +++++++++++--- fs/smb/client/connect.c | 65 ++++++++------- fs/smb/client/dfs_cache.c | 2 +- fs/smb/client/dir.c | 53 +++++++------ fs/smb/client/file.c | 90 +++++++++++---------- fs/smb/client/fs_context.c | 149 +++++++++++++++++------------------ fs/smb/client/fs_context.h | 2 +- fs/smb/client/inode.c | 146 ++++++++++++++++++---------------- fs/smb/client/ioctl.c | 2 +- fs/smb/client/link.c | 14 ++-- fs/smb/client/misc.c | 16 ++-- fs/smb/client/readdir.c | 39 ++++----- fs/smb/client/reparse.c | 29 +++---- fs/smb/client/reparse.h | 4 +- fs/smb/client/smb1ops.c | 22 ++++-- fs/smb/client/smb2file.c | 2 +- fs/smb/client/smb2misc.c | 18 +---- fs/smb/client/smb2ops.c | 8 +- fs/smb/client/smb2pdu.c | 13 ++- fs/smb/client/xattr.c | 6 +- 27 files changed, 465 insertions(+), 417 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index c327c246a9b4..04bb95091f49 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -118,7 +118,7 @@ static const char *path_no_prefix(struct cifs_sb_info *cifs_sb, if (!*path) return path; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) && cifs_sb->prepath) { len = strlen(cifs_sb->prepath) + 1; if (unlikely(len > strlen(path))) diff --git a/fs/smb/client/cifs_fs_sb.h b/fs/smb/client/cifs_fs_sb.h index 5e8d163cb5f8..84e7e366b0ff 100644 --- a/fs/smb/client/cifs_fs_sb.h +++ b/fs/smb/client/cifs_fs_sb.h @@ -55,7 +55,7 @@ struct cifs_sb_info { struct nls_table *local_nls; struct smb3_fs_context *ctx; atomic_t active; - unsigned int mnt_cifs_flags; + atomic_t mnt_cifs_flags; struct delayed_work prune_tlinks; struct rcu_head rcu; diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index b51ce64fcccf..147496ac9f9f 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -122,11 +122,3 @@ struct smb3_notify_info { #define CIFS_GOING_FLAGS_DEFAULT 0x0 /* going down */ #define CIFS_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define CIFS_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ - -static inline bool cifs_forced_shutdown(struct cifs_sb_info *sbi) -{ - if (CIFS_MOUNT_SHUTDOWN & sbi->mnt_cifs_flags) - return true; - else - return false; -} diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c index e7891b4406f2..e2edc207cef2 100644 --- a/fs/smb/client/cifs_unicode.c +++ b/fs/smb/client/cifs_unicode.c @@ -11,20 +11,6 @@ #include "cifsglob.h" #include "cifs_debug.h" -int cifs_remap(struct cifs_sb_info *cifs_sb) -{ - int map_type; - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) - map_type = SFM_MAP_UNI_RSVD; - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - map_type = SFU_MAP_UNI_RSVD; - else - map_type = NO_MAP_UNI_RSVD; - - return map_type; -} - /* Convert character using the SFU - "Services for Unix" remapping range */ static bool convert_sfu_char(const __u16 src_char, char *target) diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h index 9249db3b78c3..3e9cd9acf0a9 100644 --- a/fs/smb/client/cifs_unicode.h +++ b/fs/smb/client/cifs_unicode.h @@ -22,6 +22,7 @@ #include #include #include "../../nls/nls_ucs2_utils.h" +#include "cifsglob.h" /* * Macs use an older "SFM" mapping of the symbols above. Fortunately it does @@ -65,10 +66,21 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen, const struct nls_table *codepage); int cifsConvertToUTF16(__le16 *target, const char *source, int srclen, const struct nls_table *cp, int map_chars); -int cifs_remap(struct cifs_sb_info *cifs_sb); __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, const struct nls_table *cp, int remap); wchar_t cifs_toupper(wchar_t in); +static inline int cifs_remap(const struct cifs_sb_info *cifs_sb) +{ + unsigned int sbflags = cifs_sb_flags(cifs_sb); + + if (sbflags & CIFS_MOUNT_MAP_SFM_CHR) + return SFM_MAP_UNI_RSVD; + if (sbflags & CIFS_MOUNT_MAP_SPECIAL_CHR) + return SFU_MAP_UNI_RSVD; + + return NO_MAP_UNI_RSVD; +} + #endif /* _CIFS_UNICODE_H */ diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 6fa12c901c14..f4cb3018a358 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -356,7 +356,7 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, psid->num_subauth, SID_MAX_SUB_AUTHORITIES); } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) || + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_UID_FROM_ACL) || (cifs_sb_master_tcon(cifs_sb)->posix_extensions)) { uint32_t unix_id; bool is_group; @@ -1612,7 +1612,8 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, struct smb_acl *dacl_ptr = NULL; struct smb_ntsd *pntsd = NULL; /* acl obtained from server */ struct smb_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + unsigned int sbflags; struct tcon_link *tlink; struct smb_version_operations *ops; bool mode_from_sid, id_from_sid; @@ -1643,15 +1644,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, return rc; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) - mode_from_sid = true; - else - mode_from_sid = false; - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) - id_from_sid = true; - else - id_from_sid = false; + sbflags = cifs_sb_flags(cifs_sb); + mode_from_sid = sbflags & CIFS_MOUNT_MODE_FROM_SID; + id_from_sid = sbflags & CIFS_MOUNT_UID_FROM_ACL; /* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 99b04234a08e..427558404aa5 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -226,16 +226,18 @@ cifs_sb_deactive(struct super_block *sb) static int cifs_read_super(struct super_block *sb) { - struct inode *inode; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; + unsigned int sbflags; struct timespec64 ts; + struct inode *inode; int rc = 0; cifs_sb = CIFS_SB(sb); tcon = cifs_sb_master_tcon(cifs_sb); + sbflags = cifs_sb_flags(cifs_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) + if (sbflags & CIFS_MOUNT_POSIXACL) sb->s_flags |= SB_POSIXACL; if (tcon->snapshot_time) @@ -311,7 +313,7 @@ cifs_read_super(struct super_block *sb) } #ifdef CONFIG_CIFS_NFSD_EXPORT - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + if (sbflags & CIFS_MOUNT_SERVER_INUM) { cifs_dbg(FYI, "export ops supported\n"); sb->s_export_op = &cifs_export_ops; } @@ -389,8 +391,7 @@ statfs_out: static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) { - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(CIFS_SB(file)); struct TCP_Server_Info *server = tcon->ses->server; struct inode *inode = file_inode(file); int rc; @@ -418,11 +419,9 @@ out_unlock: static int cifs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { - struct cifs_sb_info *cifs_sb; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode)); - cifs_sb = CIFS_SB(inode->i_sb); - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { + if (sbflags & CIFS_MOUNT_NO_PERM) { if ((mask & MAY_EXEC) && !execute_ok(inode)) return -EACCES; else @@ -568,15 +567,17 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses) static void cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); + seq_puts(s, ",cache="); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + if (sbflags & CIFS_MOUNT_STRICT_IO) seq_puts(s, "strict"); - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + else if (sbflags & CIFS_MOUNT_DIRECT_IO) seq_puts(s, "none"); - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) + else if (sbflags & CIFS_MOUNT_RW_CACHE) seq_puts(s, "singleclient"); /* assume only one client access */ - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) + else if (sbflags & CIFS_MOUNT_RO_CACHE) seq_puts(s, "ro"); /* read only caching assumed */ else seq_puts(s, "loose"); @@ -637,6 +638,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct sockaddr *srcaddr; + unsigned int sbflags; + srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; seq_show_option(s, "vers", tcon->ses->server->vals->version_string); @@ -670,16 +673,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root) (int)(srcaddr->sa_family)); } + sbflags = cifs_sb_flags(cifs_sb); seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid)); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + if (sbflags & CIFS_MOUNT_OVERR_UID) seq_puts(s, ",forceuid"); else seq_puts(s, ",noforceuid"); seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid)); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + if (sbflags & CIFS_MOUNT_OVERR_GID) seq_puts(s, ",forcegid"); else seq_puts(s, ",noforcegid"); @@ -722,53 +726,53 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",unix"); else seq_puts(s, ",nounix"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) + if (sbflags & CIFS_MOUNT_NO_DFS) seq_puts(s, ",nodfs"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + if (sbflags & CIFS_MOUNT_POSIX_PATHS) seq_puts(s, ",posixpaths"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + if (sbflags & CIFS_MOUNT_SET_UID) seq_puts(s, ",setuids"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) + if (sbflags & CIFS_MOUNT_UID_FROM_ACL) seq_puts(s, ",idsfromsid"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + if (sbflags & CIFS_MOUNT_SERVER_INUM) seq_puts(s, ",serverino"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + if (sbflags & CIFS_MOUNT_RWPIDFORWARD) seq_puts(s, ",rwpidforward"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) + if (sbflags & CIFS_MOUNT_NOPOSIXBRL) seq_puts(s, ",forcemand"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (sbflags & CIFS_MOUNT_NO_XATTR) seq_puts(s, ",nouser_xattr"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + if (sbflags & CIFS_MOUNT_MAP_SPECIAL_CHR) seq_puts(s, ",mapchars"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + if (sbflags & CIFS_MOUNT_MAP_SFM_CHR) seq_puts(s, ",mapposix"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + if (sbflags & CIFS_MOUNT_UNX_EMUL) seq_puts(s, ",sfu"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + if (sbflags & CIFS_MOUNT_NO_BRL) seq_puts(s, ",nobrl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE) + if (sbflags & CIFS_MOUNT_NO_HANDLE_CACHE) seq_puts(s, ",nohandlecache"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) + if (sbflags & CIFS_MOUNT_MODE_FROM_SID) seq_puts(s, ",modefromsid"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + if (sbflags & CIFS_MOUNT_CIFS_ACL) seq_puts(s, ",cifsacl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + if (sbflags & CIFS_MOUNT_DYNPERM) seq_puts(s, ",dynperm"); if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(s, ",acl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) + if (sbflags & CIFS_MOUNT_MF_SYMLINKS) seq_puts(s, ",mfsymlinks"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) + if (sbflags & CIFS_MOUNT_FSCACHE) seq_puts(s, ",fsc"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) + if (sbflags & CIFS_MOUNT_NOSSYNC) seq_puts(s, ",nostrictsync"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + if (sbflags & CIFS_MOUNT_NO_PERM) seq_puts(s, ",noperm"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) + if (sbflags & CIFS_MOUNT_CIFS_BACKUPUID) seq_printf(s, ",backupuid=%u", from_kuid_munged(&init_user_ns, cifs_sb->ctx->backupuid)); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) + if (sbflags & CIFS_MOUNT_CIFS_BACKUPGID) seq_printf(s, ",backupgid=%u", from_kgid_munged(&init_user_ns, cifs_sb->ctx->backupgid)); @@ -909,10 +913,10 @@ static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc) static int cifs_drop_inode(struct inode *inode) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode)); /* no serverino => unconditional eviction */ - return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) || + return !(sbflags & CIFS_MOUNT_SERVER_INUM) || inode_generic_drop(inode); } @@ -950,7 +954,7 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb) char *s, *p; char sep; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) return dget(sb->s_root); full_path = cifs_build_path_to_root(ctx, cifs_sb, diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 080ea601c209..6f9b6c72962b 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1580,24 +1580,59 @@ CIFS_I(struct inode *inode) return container_of(inode, struct cifsInodeInfo, netfs.inode); } -static inline struct cifs_sb_info * -CIFS_SB(struct super_block *sb) +static inline void *cinode_to_fsinfo(struct cifsInodeInfo *cinode) +{ + return cinode->netfs.inode.i_sb->s_fs_info; +} + +static inline void *super_to_fsinfo(struct super_block *sb) { return sb->s_fs_info; } -static inline struct cifs_sb_info * -CIFS_FILE_SB(struct file *file) +static inline void *inode_to_fsinfo(struct inode *inode) { - return CIFS_SB(file_inode(file)->i_sb); + return inode->i_sb->s_fs_info; +} + +static inline void *file_to_fsinfo(struct file *file) +{ + return file_inode(file)->i_sb->s_fs_info; +} + +static inline void *dentry_to_fsinfo(struct dentry *dentry) +{ + return dentry->d_sb->s_fs_info; +} + +static inline void *const_dentry_to_fsinfo(const struct dentry *dentry) +{ + return dentry->d_sb->s_fs_info; +} + +#define CIFS_SB(_ptr) \ + ((struct cifs_sb_info *) \ + _Generic((_ptr), \ + struct cifsInodeInfo * : cinode_to_fsinfo, \ + const struct dentry * : const_dentry_to_fsinfo, \ + struct super_block * : super_to_fsinfo, \ + struct dentry * : dentry_to_fsinfo, \ + struct inode * : inode_to_fsinfo, \ + struct file * : file_to_fsinfo)(_ptr)) + +/* + * Use atomic_t for @cifs_sb->mnt_cifs_flags as it is currently accessed + * locklessly and may be changed concurrently by mount/remount and reconnect + * paths. + */ +static inline unsigned int cifs_sb_flags(const struct cifs_sb_info *cifs_sb) +{ + return atomic_read(&cifs_sb->mnt_cifs_flags); } static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - return '/'; - else - return '\\'; + return (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) ? '/' : '\\'; } static inline void @@ -2314,9 +2349,8 @@ static inline bool __cifs_cache_state_check(struct cifsInodeInfo *cinode, unsigned int oplock_flags, unsigned int sb_flags) { - struct cifs_sb_info *cifs_sb = CIFS_SB(cinode->netfs.inode.i_sb); + unsigned int sflags = cifs_sb_flags(CIFS_SB(cinode)); unsigned int oplock = READ_ONCE(cinode->oplock); - unsigned int sflags = cifs_sb->mnt_cifs_flags; return (oplock & oplock_flags) || (sflags & sb_flags); } @@ -2336,4 +2370,9 @@ static inline void cifs_reset_oplock(struct cifsInodeInfo *cinode) WRITE_ONCE(cinode->oplock, 0); } +static inline bool cifs_forced_shutdown(const struct cifs_sb_info *sbi) +{ + return cifs_sb_flags(sbi) & CIFS_MOUNT_SHUTDOWN; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 33dfe116ca52..87686c804057 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2915,8 +2915,8 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - unsigned int oldflags = old->mnt_cifs_flags & CIFS_MOUNT_MASK; - unsigned int newflags = new->mnt_cifs_flags & CIFS_MOUNT_MASK; + unsigned int oldflags = cifs_sb_flags(old) & CIFS_MOUNT_MASK; + unsigned int newflags = cifs_sb_flags(new) & CIFS_MOUNT_MASK; if ((sb->s_flags & CIFS_MS_MASK) != (mnt_data->flags & CIFS_MS_MASK)) return 0; @@ -2971,9 +2971,9 @@ static int match_prepath(struct super_block *sb, struct smb3_fs_context *ctx = mnt_data->ctx; struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + bool old_set = (cifs_sb_flags(old) & CIFS_MOUNT_USE_PREFIX_PATH) && old->prepath; - bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + bool new_set = (cifs_sb_flags(new) & CIFS_MOUNT_USE_PREFIX_PATH) && new->prepath; if (tcon->origin_fullpath && @@ -3004,7 +3004,7 @@ cifs_match_super(struct super_block *sb, void *data) cifs_sb = CIFS_SB(sb); /* We do not want to use a superblock that has been shutdown */ - if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) { + if (cifs_forced_shutdown(cifs_sb)) { spin_unlock(&cifs_tcp_ses_lock); return 0; } @@ -3469,6 +3469,8 @@ ip_connect(struct TCP_Server_Info *server) int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) { struct smb3_fs_context *ctx = cifs_sb->ctx; + unsigned int sbflags; + int rc = 0; INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); INIT_LIST_HEAD(&cifs_sb->tcon_sb_link); @@ -3493,17 +3495,16 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) } ctx->local_nls = cifs_sb->local_nls; - smb3_update_mnt_flags(cifs_sb); + sbflags = smb3_update_mnt_flags(cifs_sb); if (ctx->direct_io) cifs_dbg(FYI, "mounting share using direct i/o\n"); if (ctx->cache_ro) { cifs_dbg(VFS, "mounting share with read only caching. Ensure that the share will not be modified while in use.\n"); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RO_CACHE; + sbflags |= CIFS_MOUNT_RO_CACHE; } else if (ctx->cache_rw) { cifs_dbg(VFS, "mounting share in single client RW caching mode. Ensure that no other systems will be accessing the share.\n"); - cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_RO_CACHE | - CIFS_MOUNT_RW_CACHE); + sbflags |= CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE; } if ((ctx->cifs_acl) && (ctx->dynperm)) @@ -3512,16 +3513,19 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) if (ctx->prepath) { cifs_sb->prepath = kstrdup(ctx->prepath, GFP_KERNEL); if (cifs_sb->prepath == NULL) - return -ENOMEM; - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + rc = -ENOMEM; + else + sbflags |= CIFS_MOUNT_USE_PREFIX_PATH; } - return 0; + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); + return rc; } /* Release all succeed connections */ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) { + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; int rc = 0; if (mnt_ctx->tcon) @@ -3533,7 +3537,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) mnt_ctx->ses = NULL; mnt_ctx->tcon = NULL; mnt_ctx->server = NULL; - mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; + atomic_andnot(CIFS_MOUNT_POSIX_PATHS, &cifs_sb->mnt_cifs_flags); free_xid(mnt_ctx->xid); } @@ -3587,19 +3591,23 @@ out: int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) { struct TCP_Server_Info *server; + struct cifs_tcon *tcon = NULL; struct cifs_sb_info *cifs_sb; struct smb3_fs_context *ctx; - struct cifs_tcon *tcon = NULL; + unsigned int sbflags; int rc = 0; - if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->server || !mnt_ctx->ses || !mnt_ctx->fs_ctx || - !mnt_ctx->cifs_sb)) { - rc = -EINVAL; - goto out; + if (WARN_ON_ONCE(!mnt_ctx)) + return -EINVAL; + if (WARN_ON_ONCE(!mnt_ctx->server || !mnt_ctx->ses || + !mnt_ctx->fs_ctx || !mnt_ctx->cifs_sb)) { + mnt_ctx->tcon = NULL; + return -EINVAL; } server = mnt_ctx->server; ctx = mnt_ctx->fs_ctx; cifs_sb = mnt_ctx->cifs_sb; + sbflags = cifs_sb_flags(cifs_sb); /* search for existing tcon to this server share */ tcon = cifs_get_tcon(mnt_ctx->ses, ctx); @@ -3614,9 +3622,9 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) * path (i.e., do not remap / and \ and do not map any special characters) */ if (tcon->posix_extensions) { - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; - cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR | - CIFS_MOUNT_MAP_SPECIAL_CHR); + sbflags |= CIFS_MOUNT_POSIX_PATHS; + sbflags &= ~(CIFS_MOUNT_MAP_SFM_CHR | + CIFS_MOUNT_MAP_SPECIAL_CHR); } #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY @@ -3643,12 +3651,11 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { server->ops->qfs_tcon(mnt_ctx->xid, tcon, cifs_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { + if (sbflags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) cifs_dbg(VFS, "mounted to read only share\n"); - else if ((cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_RW_CACHE) == 0) + else if (!(sbflags & CIFS_MOUNT_RW_CACHE)) cifs_dbg(VFS, "read only mount of RW share\n"); /* no need to log a RW mount of a typical RW share */ } @@ -3660,11 +3667,12 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) * Inside cifs_fscache_get_super_cookie it checks * that we do not get super cookie twice. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) + if (sbflags & CIFS_MOUNT_FSCACHE) cifs_fscache_get_super_cookie(tcon); out: mnt_ctx->tcon = tcon; + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); return rc; } @@ -3783,7 +3791,8 @@ int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx) cifs_sb, full_path, tcon->Flags & SMB_SHARE_IS_IN_DFS); if (rc != 0) { cifs_server_dbg(VFS, "cannot query dirs between root and final path, enabling CIFS_MOUNT_USE_PREFIX_PATH\n"); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, + &cifs_sb->mnt_cifs_flags); rc = 0; } } @@ -3863,7 +3872,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) * Force the use of prefix path to support failover on DFS paths that resolve to targets * that have different prefix paths. */ - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags); kfree(cifs_sb->prepath); cifs_sb->prepath = ctx->prepath; ctx->prepath = NULL; @@ -4357,7 +4366,7 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb) kuid_t fsuid = current_fsuid(); int err; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER)) return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); spin_lock(&cifs_sb->tlink_tree_lock); diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 983132735d72..83f8cf2f8d2b 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1333,7 +1333,7 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) * Force the use of prefix path to support failover on DFS paths that resolve to targets * that have different prefix paths. */ - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags); refresh_tcon_referral(tcon, true); return 0; diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index cb10088197d2..953f1fee8cb8 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -82,10 +82,11 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa const char *tree, int tree_len, bool prefix) { - int dfsplen; - int pplen = 0; - struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry); + unsigned int sbflags = cifs_sb_flags(cifs_sb); char dirsep = CIFS_DIR_SEP(cifs_sb); + int pplen = 0; + int dfsplen; char *s; if (unlikely(!page)) @@ -96,7 +97,7 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa else dfsplen = 0; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + if (sbflags & CIFS_MOUNT_USE_PREFIX_PATH) pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0; s = dentry_path_raw(direntry, page, PATH_MAX); @@ -123,7 +124,7 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa if (dfsplen) { s -= dfsplen; memcpy(s, tree, dfsplen); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { + if (sbflags & CIFS_MOUNT_POSIX_PATHS) { int i; for (i = 0; i < dfsplen; i++) { if (s[i] == '\\') @@ -152,7 +153,7 @@ char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page static int check_name(struct dentry *direntry, struct cifs_tcon *tcon) { - struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry); int i; if (unlikely(tcon->fsAttrInfo.MaxPathNameComponentLength && @@ -160,7 +161,7 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon) le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength))) return -ENAMETOOLONG; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { cifs_dbg(FYI, "Invalid file name\n"); @@ -181,11 +182,12 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int rc = -ENOENT; int create_options = CREATE_NOT_DIR; int desired_access; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifs_tcon *tcon = tlink_tcon(tlink); const char *full_path; void *page = alloc_dentry_path(); struct inode *newinode = NULL; + unsigned int sbflags; int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; @@ -365,6 +367,7 @@ retry_open: * If Open reported that we actually created a file then we now have to * set the mode if possible. */ + sbflags = cifs_sb_flags(cifs_sb); if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { struct cifs_unix_set_info_args args = { .mode = mode, @@ -374,7 +377,7 @@ retry_open: .device = 0, }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); if (inode->i_mode & S_ISGID) args.gid = inode->i_gid; @@ -411,9 +414,9 @@ cifs_create_get_file_info: if (server->ops->set_lease_key) server->ops->set_lease_key(newinode, fid); if ((*oplock & CIFS_CREATE_ACTION) && S_ISREG(newinode->i_mode)) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + if (sbflags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { newinode->i_uid = current_fsuid(); if (inode->i_mode & S_ISGID) newinode->i_gid = inode->i_gid; @@ -458,18 +461,20 @@ int cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct file *file, unsigned int oflags, umode_t mode) { - int rc; - unsigned int xid; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct cifs_open_info_data buf = {}; + struct TCP_Server_Info *server; + struct cifsFileInfo *file_info; + struct cifs_pending_open open; + struct cifs_fid fid = {}; struct tcon_link *tlink; struct cifs_tcon *tcon; - struct TCP_Server_Info *server; - struct cifs_fid fid = {}; - struct cifs_pending_open open; + unsigned int sbflags; + unsigned int xid; __u32 oplock; - struct cifsFileInfo *file_info; - struct cifs_open_info_data buf = {}; + int rc; - if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) + if (unlikely(cifs_forced_shutdown(cifs_sb))) return smb_EIO(smb_eio_trace_forced_shutdown); /* @@ -499,7 +504,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", inode, direntry, direntry); - tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { rc = PTR_ERR(tlink); goto out_free_xid; @@ -536,13 +541,13 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out; } - if (file->f_flags & O_DIRECT && - CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { - if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + sbflags = cifs_sb_flags(cifs_sb); + if ((file->f_flags & O_DIRECT) && (sbflags & CIFS_MOUNT_STRICT_IO)) { + if (sbflags & CIFS_MOUNT_NO_BRL) file->f_op = &cifs_file_direct_nobrl_ops; else file->f_op = &cifs_file_direct_ops; - } + } file_info = cifs_new_fileinfo(&fid, file, tlink, oplock, buf.symlink_target); if (file_info == NULL) { diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 18f31d4eb98d..f3ddcdf406c8 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -270,7 +270,7 @@ static void cifs_begin_writeback(struct netfs_io_request *wreq) static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) { struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq); - struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode); struct cifsFileInfo *open_file = NULL; rreq->rsize = cifs_sb->ctx->rsize; @@ -281,7 +281,7 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RWPIDFORWARD) req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { WARN_ON_ONCE(1); @@ -906,7 +906,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, * close because it may cause a error when we open this file * again and get at least level II oplock. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_STRICT_IO) set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); cifs_set_oplock_level(cifsi, 0); } @@ -955,11 +955,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, int cifs_file_flush(const unsigned int xid, struct inode *inode, struct cifsFileInfo *cfile) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifs_tcon *tcon; int rc; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOSSYNC) return 0; if (cfile && (OPEN_FMODE(cfile->f_flags) & FMODE_WRITE)) { @@ -1015,24 +1015,24 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) int cifs_open(struct inode *inode, struct file *file) { + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct cifs_open_info_data data = {}; + struct cifsFileInfo *cfile = NULL; + struct TCP_Server_Info *server; + struct cifs_pending_open open; + bool posix_open_ok = false; + struct cifs_fid fid = {}; + struct tcon_link *tlink; + struct cifs_tcon *tcon; + const char *full_path; + unsigned int sbflags; int rc = -EACCES; unsigned int xid; __u32 oplock; - struct cifs_sb_info *cifs_sb; - struct TCP_Server_Info *server; - struct cifs_tcon *tcon; - struct tcon_link *tlink; - struct cifsFileInfo *cfile = NULL; void *page; - const char *full_path; - bool posix_open_ok = false; - struct cifs_fid fid = {}; - struct cifs_pending_open open; - struct cifs_open_info_data data = {}; xid = get_xid(); - cifs_sb = CIFS_SB(inode->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) { free_xid(xid); return smb_EIO(smb_eio_trace_forced_shutdown); @@ -1056,9 +1056,9 @@ int cifs_open(struct inode *inode, struct file *file) cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", inode, file->f_flags, full_path); - if (file->f_flags & O_DIRECT && - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + sbflags = cifs_sb_flags(cifs_sb); + if ((file->f_flags & O_DIRECT) && (sbflags & CIFS_MOUNT_STRICT_IO)) { + if (sbflags & CIFS_MOUNT_NO_BRL) file->f_op = &cifs_file_direct_nobrl_ops; else file->f_op = &cifs_file_direct_ops; @@ -1209,7 +1209,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(cinode); #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); @@ -1222,7 +1222,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) rc = cifs_push_posix_locks(cfile); else #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -2011,7 +2011,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(cinode); #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ /* we are going to update can_cache_brlcks here - need a write access */ @@ -2024,7 +2024,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) rc = cifs_push_posix_locks(cfile); else #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -2428,11 +2428,11 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_FILE_SB(file); + cifs_sb = CIFS_SB(file); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) posix_lck = true; if (!lock && !unlock) { @@ -2455,14 +2455,14 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) int cifs_lock(struct file *file, int cmd, struct file_lock *flock) { - int rc, xid; + struct cifs_sb_info *cifs_sb = CIFS_SB(file); + struct cifsFileInfo *cfile; int lock = 0, unlock = 0; bool wait_flag = false; bool posix_lck = false; - struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - struct cifsFileInfo *cfile; __u32 type; + int rc, xid; rc = -EACCES; xid = get_xid(); @@ -2477,12 +2477,11 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_FILE_SB(file); set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) posix_lck = true; /* * BB add code here to normalize offset and length to account for @@ -2532,11 +2531,11 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) { + struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode); struct cifsFileInfo *open_file = NULL; - struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); /* only filter by fsuid on multiuser mounts */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; spin_lock(&cifs_inode->open_file_lock); @@ -2589,10 +2588,10 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, return rc; } - cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); + cifs_sb = CIFS_SB(cifs_inode); /* only filter by fsuid on multiuser mounts */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; spin_lock(&cifs_inode->open_file_lock); @@ -2787,7 +2786,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file_inode(file); - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(file); rc = file_write_and_wait_range(file, start, end); if (rc) { @@ -2801,7 +2800,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) file, datasync); tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; if (server->ops->flush == NULL) { rc = -ENOSYS; @@ -2853,7 +2852,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2870,7 +2869,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) && (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, NULL, CIFS_WRITE_OP))) { @@ -2893,7 +2892,7 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifsFileInfo *cfile = (struct cifsFileInfo *) iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); @@ -2906,7 +2905,7 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) if (CIFS_CACHE_WRITE(cinode)) { if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) { written = netfs_file_write_iter(iocb, from); goto out; } @@ -2994,7 +2993,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifsFileInfo *cfile = (struct cifsFileInfo *) iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); @@ -3011,7 +3010,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) if (!CIFS_CACHE_READ(cinode)) return netfs_unbuffered_read_iter(iocb, to); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) { + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0) { if (iocb->ki_flags & IOCB_DIRECT) return netfs_unbuffered_read_iter(iocb, to); return netfs_buffered_read_iter(iocb, to); @@ -3130,10 +3129,9 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, if (is_inode_writable(cifsInode) || ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { /* This inode is open for write at least once */ - struct cifs_sb_info *cifs_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(cifsInode); - cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_DIRECT_IO) { /* since no page cache to corrupt on directio we can change size safely */ return true; @@ -3181,7 +3179,7 @@ void cifs_oplock_break(struct work_struct *work) server = tcon->ses->server; scoped_guard(spinlock, &cinode->open_file_lock) { - unsigned int sbflags = cifs_sb->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(cifs_sb); server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, cfile->oplock_epoch, &purge_cache); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 09fe749e97ee..54090739535f 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -2062,161 +2062,160 @@ smb3_cleanup_fs_context(struct smb3_fs_context *ctx) kfree(ctx); } -void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb) +unsigned int smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); struct smb3_fs_context *ctx = cifs_sb->ctx; if (ctx->nodfs) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS; + sbflags |= CIFS_MOUNT_NO_DFS; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_DFS; + sbflags &= ~CIFS_MOUNT_NO_DFS; if (ctx->noperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; + sbflags |= CIFS_MOUNT_NO_PERM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_PERM; + sbflags &= ~CIFS_MOUNT_NO_PERM; if (ctx->setuids) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; + sbflags |= CIFS_MOUNT_SET_UID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SET_UID; + sbflags &= ~CIFS_MOUNT_SET_UID; if (ctx->setuidfromacl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL; + sbflags |= CIFS_MOUNT_UID_FROM_ACL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UID_FROM_ACL; + sbflags &= ~CIFS_MOUNT_UID_FROM_ACL; if (ctx->server_ino) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; + sbflags |= CIFS_MOUNT_SERVER_INUM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; + sbflags &= ~CIFS_MOUNT_SERVER_INUM; if (ctx->remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR; + sbflags |= CIFS_MOUNT_MAP_SFM_CHR; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SFM_CHR; + sbflags &= ~CIFS_MOUNT_MAP_SFM_CHR; if (ctx->sfu_remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; + sbflags |= CIFS_MOUNT_MAP_SPECIAL_CHR; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR; + sbflags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR; if (ctx->no_xattr) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; + sbflags |= CIFS_MOUNT_NO_XATTR; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_XATTR; + sbflags &= ~CIFS_MOUNT_NO_XATTR; if (ctx->sfu_emul) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; + sbflags |= CIFS_MOUNT_UNX_EMUL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UNX_EMUL; + sbflags &= ~CIFS_MOUNT_UNX_EMUL; if (ctx->nobrl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; + sbflags |= CIFS_MOUNT_NO_BRL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_BRL; + sbflags &= ~CIFS_MOUNT_NO_BRL; if (ctx->nohandlecache) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE; + sbflags |= CIFS_MOUNT_NO_HANDLE_CACHE; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_HANDLE_CACHE; + sbflags &= ~CIFS_MOUNT_NO_HANDLE_CACHE; if (ctx->nostrictsync) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC; + sbflags |= CIFS_MOUNT_NOSSYNC; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOSSYNC; + sbflags &= ~CIFS_MOUNT_NOSSYNC; if (ctx->mand_lock) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; + sbflags |= CIFS_MOUNT_NOPOSIXBRL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOPOSIXBRL; + sbflags &= ~CIFS_MOUNT_NOPOSIXBRL; if (ctx->rwpidforward) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; + sbflags |= CIFS_MOUNT_RWPIDFORWARD; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_RWPIDFORWARD; + sbflags &= ~CIFS_MOUNT_RWPIDFORWARD; if (ctx->mode_ace) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID; + sbflags |= CIFS_MOUNT_MODE_FROM_SID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MODE_FROM_SID; + sbflags &= ~CIFS_MOUNT_MODE_FROM_SID; if (ctx->cifs_acl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; + sbflags |= CIFS_MOUNT_CIFS_ACL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_ACL; + sbflags &= ~CIFS_MOUNT_CIFS_ACL; if (ctx->backupuid_specified) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; + sbflags |= CIFS_MOUNT_CIFS_BACKUPUID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPUID; + sbflags &= ~CIFS_MOUNT_CIFS_BACKUPUID; if (ctx->backupgid_specified) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; + sbflags |= CIFS_MOUNT_CIFS_BACKUPGID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPGID; + sbflags &= ~CIFS_MOUNT_CIFS_BACKUPGID; if (ctx->override_uid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; + sbflags |= CIFS_MOUNT_OVERR_UID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_UID; + sbflags &= ~CIFS_MOUNT_OVERR_UID; if (ctx->override_gid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; + sbflags |= CIFS_MOUNT_OVERR_GID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_GID; + sbflags &= ~CIFS_MOUNT_OVERR_GID; if (ctx->dynperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; + sbflags |= CIFS_MOUNT_DYNPERM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DYNPERM; + sbflags &= ~CIFS_MOUNT_DYNPERM; if (ctx->fsc) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; + sbflags |= CIFS_MOUNT_FSCACHE; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_FSCACHE; + sbflags &= ~CIFS_MOUNT_FSCACHE; if (ctx->multiuser) - cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | - CIFS_MOUNT_NO_PERM); + sbflags |= CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_NO_PERM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MULTIUSER; + sbflags &= ~CIFS_MOUNT_MULTIUSER; if (ctx->strict_io) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; + sbflags |= CIFS_MOUNT_STRICT_IO; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_STRICT_IO; + sbflags &= ~CIFS_MOUNT_STRICT_IO; if (ctx->direct_io) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; + sbflags |= CIFS_MOUNT_DIRECT_IO; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DIRECT_IO; + sbflags &= ~CIFS_MOUNT_DIRECT_IO; if (ctx->mfsymlinks) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; + sbflags |= CIFS_MOUNT_MF_SYMLINKS; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MF_SYMLINKS; - if (ctx->mfsymlinks) { - if (ctx->sfu_emul) { - /* - * Our SFU ("Services for Unix") emulation allows now - * creating new and reading existing SFU symlinks. - * Older Linux kernel versions were not able to neither - * read existing nor create new SFU symlinks. But - * creating and reading SFU style mknod and FIFOs was - * supported for long time. When "mfsymlinks" and - * "sfu" are both enabled at the same time, it allows - * reading both types of symlinks, but will only create - * them with mfsymlinks format. This allows better - * Apple compatibility, compatibility with older Linux - * kernel clients (probably better for Samba too) - * while still recognizing old Windows style symlinks. - */ - cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); - } - } - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SHUTDOWN; + sbflags &= ~CIFS_MOUNT_MF_SYMLINKS; - return; + if (ctx->mfsymlinks && ctx->sfu_emul) { + /* + * Our SFU ("Services for Unix") emulation allows now + * creating new and reading existing SFU symlinks. + * Older Linux kernel versions were not able to neither + * read existing nor create new SFU symlinks. But + * creating and reading SFU style mknod and FIFOs was + * supported for long time. When "mfsymlinks" and + * "sfu" are both enabled at the same time, it allows + * reading both types of symlinks, but will only create + * them with mfsymlinks format. This allows better + * Apple compatibility, compatibility with older Linux + * kernel clients (probably better for Samba too) + * while still recognizing old Windows style symlinks. + */ + cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); + } + sbflags &= ~CIFS_MOUNT_SHUTDOWN; + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); + return sbflags; } diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 49b2a6f09ca2..0b64fcb5d302 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -374,7 +374,7 @@ int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx); int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); -void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); +unsigned int smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); /* * max deferred close timeout (jiffies) - 2^30 diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index d4d3cfeb6c90..3e844c55ab8a 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -40,32 +40,33 @@ static void cifs_set_netfs_context(struct inode *inode) static void cifs_set_ops(struct inode *inode) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct netfs_inode *ictx = netfs_inode(inode); + unsigned int sbflags = cifs_sb_flags(cifs_sb); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &cifs_file_inode_ops; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { + if (sbflags & CIFS_MOUNT_DIRECT_IO) { set_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + if (sbflags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_direct_nobrl_ops; else inode->i_fop = &cifs_file_direct_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + } else if (sbflags & CIFS_MOUNT_STRICT_IO) { + if (sbflags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_strict_nobrl_ops; else inode->i_fop = &cifs_file_strict_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + } else if (sbflags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_nobrl_ops; else { /* not direct, send byte range locks */ inode->i_fop = &cifs_file_ops; } /* check if server can support readahead */ - if (cifs_sb_master_tcon(cifs_sb)->ses->server->max_read < - PAGE_SIZE + MAX_CIFS_HDR_SIZE) + if (tcon->ses->server->max_read < PAGE_SIZE + MAX_CIFS_HDR_SIZE) inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else inode->i_data.a_ops = &cifs_addr_ops; @@ -194,8 +195,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr, inode->i_gid = fattr->cf_gid; /* if dynperm is set, don't clobber existing mode */ - if (inode_state_read(inode) & I_NEW || - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) + if ((inode_state_read(inode) & I_NEW) || + !(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_DYNPERM)) inode->i_mode = fattr->cf_mode; cifs_i->cifsAttrs = fattr->cf_cifsattrs; @@ -248,10 +249,8 @@ cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr) { struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - return; - - fattr->cf_uniqueid = iunique(sb, ROOT_I); + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) + fattr->cf_uniqueid = iunique(sb, ROOT_I); } /* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */ @@ -259,6 +258,8 @@ void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb) { + unsigned int sbflags; + memset(fattr, 0, sizeof(*fattr)); fattr->cf_uniqueid = le64_to_cpu(info->UniqueId); fattr->cf_bytes = le64_to_cpu(info->NumOfBytes); @@ -317,8 +318,9 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, break; } + sbflags = cifs_sb_flags(cifs_sb); fattr->cf_uid = cifs_sb->ctx->linux_uid; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) { + if (!(sbflags & CIFS_MOUNT_OVERR_UID)) { u64 id = le64_to_cpu(info->Uid); if (id < ((uid_t)-1)) { kuid_t uid = make_kuid(&init_user_ns, id); @@ -328,7 +330,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, } fattr->cf_gid = cifs_sb->ctx->linux_gid; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) { + if (!(sbflags & CIFS_MOUNT_OVERR_GID)) { u64 id = le64_to_cpu(info->Gid); if (id < ((gid_t)-1)) { kgid_t gid = make_kgid(&init_user_ns, id); @@ -382,7 +384,7 @@ static int update_inode_info(struct super_block *sb, * * If file type or uniqueid is different, return error. */ - if (unlikely((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && + if (unlikely((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) && CIFS_I(*inode)->uniqueid != fattr->cf_uniqueid)) { CIFS_I(*inode)->time = 0; /* force reval */ return -ESTALE; @@ -468,7 +470,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path, cifs_fill_uniqueid(sb, fattr); /* check for Minshall+French symlinks */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MF_SYMLINKS) { tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path); cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc); } @@ -1081,7 +1083,7 @@ cifs_backup_query_path_info(int xid, else if ((tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find) == 0) info.info_level = SMB_FIND_FILE_INFO_STANDARD; - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + else if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) info.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; else /* no srvino useful for fallback to some netapp */ info.info_level = SMB_FIND_FILE_DIRECTORY_INFO; @@ -1109,7 +1111,7 @@ static void cifs_set_fattr_ino(int xid, struct cifs_tcon *tcon, struct super_blo struct TCP_Server_Info *server = tcon->ses->server; int rc; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) { if (*inode) fattr->cf_uniqueid = CIFS_I(*inode)->uniqueid; else @@ -1263,14 +1265,15 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, struct inode **inode, const char *full_path) { - struct cifs_open_info_data tmp_data = {}; - struct cifs_tcon *tcon; - struct TCP_Server_Info *server; - struct tcon_link *tlink; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_open_info_data tmp_data = {}; void *smb1_backup_rsp_buf = NULL; - int rc = 0; + struct TCP_Server_Info *server; + struct cifs_tcon *tcon; + struct tcon_link *tlink; + unsigned int sbflags; int tmprc = 0; + int rc = 0; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -1370,16 +1373,17 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY handle_mnt_opt: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + sbflags = cifs_sb_flags(cifs_sb); /* query for SFU type info if supported and needed */ if ((fattr->cf_cifsattrs & ATTR_SYSTEM) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { + (sbflags & CIFS_MOUNT_UNX_EMUL)) { tmprc = cifs_sfu_type(fattr, full_path, cifs_sb, xid); if (tmprc) cifs_dbg(FYI, "cifs_sfu_type failed: %d\n", tmprc); } /* fill in 0777 bits from ACL */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) { + if (sbflags & CIFS_MOUNT_MODE_FROM_SID) { rc = cifs_acl_to_fattr(cifs_sb, fattr, *inode, true, full_path, fid); if (rc == -EREMOTE) @@ -1389,7 +1393,7 @@ handle_mnt_opt: __func__, rc); goto out; } - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { + } else if (sbflags & CIFS_MOUNT_CIFS_ACL) { rc = cifs_acl_to_fattr(cifs_sb, fattr, *inode, false, full_path, fid); if (rc == -EREMOTE) @@ -1399,7 +1403,7 @@ handle_mnt_opt: __func__, rc); goto out; } - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + } else if (sbflags & CIFS_MOUNT_UNX_EMUL) /* fill in remaining high mode bits e.g. SUID, VTX */ cifs_sfu_mode(fattr, full_path, cifs_sb, xid); else if (!(tcon->posix_extensions)) @@ -1409,7 +1413,7 @@ handle_mnt_opt: /* check for Minshall+French symlinks */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (sbflags & CIFS_MOUNT_MF_SYMLINKS) { tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path); cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc); } @@ -1509,7 +1513,7 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data, * 3. Tweak fattr based on mount options */ /* check for Minshall+French symlinks */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MF_SYMLINKS) { tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path); cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc); } @@ -1660,7 +1664,7 @@ struct inode *cifs_root_iget(struct super_block *sb) int len; int rc; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) && cifs_sb->prepath) { len = strlen(cifs_sb->prepath); path = kzalloc(len + 2 /* leading sep + null */, GFP_KERNEL); @@ -2098,8 +2102,9 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, const char *full_path, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, const unsigned int xid) { - int rc = 0; struct inode *inode = NULL; + unsigned int sbflags; + int rc = 0; if (tcon->posix_extensions) { rc = smb311_posix_get_inode_info(&inode, full_path, @@ -2139,6 +2144,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, if (parent->i_mode & S_ISGID) mode |= S_ISGID; + sbflags = cifs_sb_flags(cifs_sb); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext) { struct cifs_unix_set_info_args args = { @@ -2148,7 +2154,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, .mtime = NO_CHANGE_64, .device = 0, }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); if (parent->i_mode & S_ISGID) args.gid = parent->i_gid; @@ -2166,14 +2172,14 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, { #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ struct TCP_Server_Info *server = tcon->ses->server; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + if (!(sbflags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) server->ops->mkdir_setinfo(inode, full_path, cifs_sb, tcon, xid); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + if (sbflags & CIFS_MOUNT_DYNPERM) inode->i_mode = (mode | S_IFDIR); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { inode->i_uid = current_fsuid(); if (inode->i_mode & S_ISGID) inode->i_gid = parent->i_gid; @@ -2686,7 +2692,7 @@ cifs_dentry_needs_reval(struct dentry *dentry) { struct inode *inode = d_inode(dentry); struct cifsInodeInfo *cifs_i = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct cached_fid *cfid = NULL; @@ -2727,7 +2733,7 @@ cifs_dentry_needs_reval(struct dentry *dentry) } /* hardlinked files w/ noserverino get "special" treatment */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) && S_ISREG(inode->i_mode) && inode->i_nlink != 1) return true; @@ -2752,10 +2758,10 @@ cifs_wait_bit_killable(struct wait_bit_key *key, int mode) int cifs_revalidate_mapping(struct inode *inode) { - int rc; struct cifsInodeInfo *cifs_inode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); unsigned long *flags = &cifs_inode->flags; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + int rc; /* swapfiles are not supposed to be shared */ if (IS_SWAPFILE(inode)) @@ -2768,7 +2774,7 @@ cifs_revalidate_mapping(struct inode *inode) if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) { /* for cache=singleclient, do not invalidate */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RW_CACHE) goto skip_invalidate; cifs_inode->netfs.zero_point = cifs_inode->netfs.remote_i_size; @@ -2892,10 +2898,11 @@ int cifs_revalidate_dentry(struct dentry *dentry) int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { - struct dentry *dentry = path->dentry; - struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(path->dentry); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); + unsigned int sbflags; int rc; if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) @@ -2952,12 +2959,13 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, * enabled, and the admin hasn't overridden them, set the ownership * to the fsuid/fsgid of the current process. */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + sbflags = cifs_sb_flags(cifs_sb); + if ((sbflags & CIFS_MOUNT_MULTIUSER) && + !(sbflags & CIFS_MOUNT_CIFS_ACL) && !tcon->unix_ext) { - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) + if (!(sbflags & CIFS_MOUNT_OVERR_UID)) stat->uid = current_fsuid(); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) + if (!(sbflags & CIFS_MOUNT_OVERR_GID)) stat->gid = current_fsgid(); } return 0; @@ -3102,7 +3110,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) void *page = alloc_dentry_path(); struct inode *inode = d_inode(direntry); struct cifsInodeInfo *cifsInode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct tcon_link *tlink; struct cifs_tcon *pTcon; struct cifs_unix_set_info_args *args = NULL; @@ -3113,7 +3121,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) xid = get_xid(); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs); @@ -3266,26 +3274,26 @@ out: static int cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) { - unsigned int xid; + struct inode *inode = d_inode(direntry); + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + unsigned int sbflags = cifs_sb_flags(cifs_sb); + struct cifsFileInfo *cfile = NULL; + void *page = alloc_dentry_path(); + __u64 mode = NO_CHANGE_64; kuid_t uid = INVALID_UID; kgid_t gid = INVALID_GID; - struct inode *inode = d_inode(direntry); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifsInodeInfo *cifsInode = CIFS_I(inode); - struct cifsFileInfo *cfile = NULL; const char *full_path; - void *page = alloc_dentry_path(); - int rc = -EACCES; __u32 dosattr = 0; - __u64 mode = NO_CHANGE_64; - bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; + int rc = -EACCES; + unsigned int xid; xid = get_xid(); cifs_dbg(FYI, "setattr on file %pd attrs->ia_valid 0x%x\n", direntry, attrs->ia_valid); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + if (sbflags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs); @@ -3346,8 +3354,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (attrs->ia_valid & ATTR_GID) gid = attrs->ia_gid; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) { + if (sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) { if (uid_valid(uid) || gid_valid(gid)) { mode = NO_CHANGE_64; rc = id_mode_to_cifs_acl(inode, full_path, &mode, @@ -3358,9 +3365,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) goto cifs_setattr_exit; } } - } else - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) + } else if (!(sbflags & CIFS_MOUNT_SET_UID)) { attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); + } /* skip mode change if it's just for clearing setuid/setgid */ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -3369,9 +3376,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (attrs->ia_valid & ATTR_MODE) { mode = attrs->ia_mode; rc = 0; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) || - posix) { + if ((sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) || + cifs_sb_master_tcon(cifs_sb)->posix_extensions) { rc = id_mode_to_cifs_acl(inode, full_path, &mode, INVALID_UID, INVALID_GID); if (rc) { @@ -3393,7 +3399,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) dosattr = cifsInode->cifsAttrs | ATTR_READONLY; /* fix up mode if we're not using dynperm */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + if ((sbflags & CIFS_MOUNT_DYNPERM) == 0) attrs->ia_mode = inode->i_mode & ~S_IWUGO; } else if ((mode & S_IWUGO) && (cifsInode->cifsAttrs & ATTR_READONLY)) { @@ -3404,7 +3410,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) dosattr |= ATTR_NORMAL; /* reset local inode permissions to normal */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + if (!(sbflags & CIFS_MOUNT_DYNPERM)) { attrs->ia_mode &= ~(S_IALLUGO); if (S_ISDIR(inode->i_mode)) attrs->ia_mode |= @@ -3413,7 +3419,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) attrs->ia_mode |= cifs_sb->ctx->file_mode; } - } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + } else if (!(sbflags & CIFS_MOUNT_DYNPERM)) { /* ignore mode change - ATTR_READONLY hasn't changed */ attrs->ia_valid &= ~ATTR_MODE; } diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 8dc2651e237f..9afab3237e54 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -216,7 +216,7 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: - sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; + atomic_or(CIFS_MOUNT_SHUTDOWN, &sbi->mnt_cifs_flags); goto shutdown_good; default: rc = -EINVAL; diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index a2f7bfa8ad1e..434e8fe74080 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -544,14 +544,15 @@ int cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, const char *symname) { - int rc = -EOPNOTSUPP; - unsigned int xid; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct inode *newinode = NULL; struct tcon_link *tlink; struct cifs_tcon *pTcon; const char *full_path; + int rc = -EOPNOTSUPP; + unsigned int sbflags; + unsigned int xid; void *page; - struct inode *newinode = NULL; if (unlikely(cifs_forced_shutdown(cifs_sb))) return smb_EIO(smb_eio_trace_forced_shutdown); @@ -580,6 +581,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, cifs_dbg(FYI, "symname is %s\n", symname); /* BB what if DFS and this volume is on different share? BB */ + sbflags = cifs_sb_flags(cifs_sb); rc = -EOPNOTSUPP; switch (cifs_symlink_type(cifs_sb)) { case CIFS_SYMLINK_TYPE_UNIX: @@ -594,14 +596,14 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, break; case CIFS_SYMLINK_TYPE_MFSYMLINKS: - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (sbflags & CIFS_MOUNT_MF_SYMLINKS) { rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname); } break; case CIFS_SYMLINK_TYPE_SFU: - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + if (sbflags & CIFS_MOUNT_UNX_EMUL) { rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon, full_path, S_IFLNK, 0, symname); diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 22cde46309fe..bc24c92b8b95 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -275,13 +275,15 @@ dump_smb(void *buf, int smb_buf_length) void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); + + if (sbflags & CIFS_MOUNT_SERVER_INUM) { struct cifs_tcon *tcon = NULL; if (cifs_sb->master_tlink) tcon = cifs_sb_master_tcon(cifs_sb); - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; + atomic_andnot(CIFS_MOUNT_SERVER_INUM, &cifs_sb->mnt_cifs_flags); cifs_sb->mnt_cifs_serverino_autodisabled = true; cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s\n", tcon ? tcon->tree_name : "new server"); @@ -382,11 +384,13 @@ void cifs_done_oplock_break(struct cifsInodeInfo *cinode) bool backup_cred(struct cifs_sb_info *cifs_sb) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); + + if (sbflags & CIFS_MOUNT_CIFS_BACKUPUID) { if (uid_eq(cifs_sb->ctx->backupuid, current_fsuid())) return true; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) { + if (sbflags & CIFS_MOUNT_CIFS_BACKUPGID) { if (in_group_p(cifs_sb->ctx->backupgid)) return true; } @@ -955,7 +959,7 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); } - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags); return 0; } @@ -984,7 +988,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, * look up or tcon is not DFS. */ if (strlen(full_path) < 2 || !cifs_sb || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || + (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_DFS) || !is_tcon_dfs(tcon)) return 0; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 8615a8747b7f..be22bbc4a65a 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -121,7 +121,7 @@ retry: * want to clobber the existing one with the one that * the readdir code created. */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) fattr->cf_uniqueid = CIFS_I(inode)->uniqueid; /* @@ -177,6 +177,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) struct cifs_open_info_data data = { .reparse = { .tag = fattr->cf_cifstag, }, }; + unsigned int sbflags; fattr->cf_uid = cifs_sb->ctx->linux_uid; fattr->cf_gid = cifs_sb->ctx->linux_gid; @@ -215,12 +216,12 @@ out_reparse: * may look wrong since the inodes may not have timed out by the time * "ls" does a stat() call on them. */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) + sbflags = cifs_sb_flags(cifs_sb); + if (sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && - fattr->cf_cifsattrs & ATTR_SYSTEM) { + if ((sbflags & CIFS_MOUNT_UNX_EMUL) && + (fattr->cf_cifsattrs & ATTR_SYSTEM)) { if (fattr->cf_eof == 0) { fattr->cf_mode &= ~S_IFMT; fattr->cf_mode |= S_IFIFO; @@ -345,13 +346,14 @@ static int _initiate_cifs_search(const unsigned int xid, struct file *file, const char *full_path) { + struct cifs_sb_info *cifs_sb = CIFS_SB(file); + struct tcon_link *tlink = NULL; + struct TCP_Server_Info *server; + struct cifsFileInfo *cifsFile; + struct cifs_tcon *tcon; + unsigned int sbflags; __u16 search_flags; int rc = 0; - struct cifsFileInfo *cifsFile; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); - struct tcon_link *tlink = NULL; - struct cifs_tcon *tcon; - struct TCP_Server_Info *server; if (file->private_data == NULL) { tlink = cifs_sb_tlink(cifs_sb); @@ -385,6 +387,7 @@ _initiate_cifs_search(const unsigned int xid, struct file *file, cifs_dbg(FYI, "Full path: %s start at: %lld\n", full_path, file->f_pos); ffirst_retry: + sbflags = cifs_sb_flags(cifs_sb); /* test for Unix extensions */ /* but now check for them on the share/mount not on the SMB session */ /* if (cap_unix(tcon->ses) { */ @@ -395,7 +398,7 @@ ffirst_retry: else if ((tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find) == 0) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + } else if (sbflags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; @@ -411,8 +414,7 @@ ffirst_retry: if (rc == 0) { cifsFile->invalidHandle = false; - } else if ((rc == -EOPNOTSUPP) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + } else if (rc == -EOPNOTSUPP && (sbflags & CIFS_MOUNT_SERVER_INUM)) { cifs_autodisable_serverino(cifs_sb); goto ffirst_retry; } @@ -690,7 +692,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, loff_t first_entry_in_buffer; loff_t index_to_find = pos; struct cifsFileInfo *cfile = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(file); struct TCP_Server_Info *server = tcon->ses->server; /* check if index in the buffer */ @@ -955,6 +957,7 @@ static int cifs_filldir(char *find_entry, struct file *file, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_dirent de = { NULL, }; struct cifs_fattr fattr; + unsigned int sbflags; struct qstr name; int rc = 0; @@ -1019,15 +1022,15 @@ static int cifs_filldir(char *find_entry, struct file *file, break; } - if (de.ino && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + sbflags = cifs_sb_flags(cifs_sb); + if (de.ino && (sbflags & CIFS_MOUNT_SERVER_INUM)) { fattr.cf_uniqueid = de.ino; } else { fattr.cf_uniqueid = iunique(sb, ROOT_I); cifs_autodisable_serverino(cifs_sb); } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) && - couldbe_mf_symlink(&fattr)) + if ((sbflags & CIFS_MOUNT_MF_SYMLINKS) && couldbe_mf_symlink(&fattr)) /* * trying to get the type and mode can be slow, * so just call those regular files for now, and mark @@ -1058,7 +1061,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) const char *full_path; void *page = alloc_dentry_path(); struct cached_fid *cfid = NULL; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(file); xid = get_xid(); diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index ce9b923498b5..cd1e1eaee67a 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -55,17 +55,18 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, const char *full_path, const char *symname) { struct reparse_symlink_data_buffer *buf = NULL; - struct cifs_open_info_data data = {}; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); const char *symroot = cifs_sb->ctx->symlinkroot; - struct inode *new; - struct kvec iov; - __le16 *path = NULL; - bool directory; - char *symlink_target = NULL; - char *sym = NULL; + struct cifs_open_info_data data = {}; char sep = CIFS_DIR_SEP(cifs_sb); + char *symlink_target = NULL; u16 len, plen, poff, slen; + unsigned int sbflags; + __le16 *path = NULL; + struct inode *new; + char *sym = NULL; + struct kvec iov; + bool directory; int rc = 0; if (strlen(symname) > REPARSE_SYM_PATH_MAX) @@ -83,8 +84,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, .symlink_target = symlink_target, }; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && - symroot && symname[0] == '/') { + sbflags = cifs_sb_flags(cifs_sb); + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symroot && symname[0] == '/') { /* * This is a request to create an absolute symlink on the server * which does not support POSIX paths, and expects symlink in @@ -164,7 +165,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, * mask these characters in NT object prefix by '_' and then change * them back. */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') sym[0] = sym[1] = sym[2] = sym[5] = '_'; path = cifs_convert_path_to_utf16(sym, cifs_sb); @@ -173,7 +174,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { sym[0] = '\\'; sym[1] = sym[2] = '?'; sym[5] = ':'; @@ -197,7 +198,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, slen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX); poff = 0; plen = slen; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { /* * For absolute NT symlinks skip leading "\\??\\" in PrintName as * PrintName is user visible location in DOS/Win32 format (not in NT format). @@ -824,7 +825,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) && symroot && !relative) { /* * This is an absolute symlink from the server which does not diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 570b0d25aeba..0164dc47bdfd 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -33,7 +33,7 @@ static inline kuid_t wsl_make_kuid(struct cifs_sb_info *cifs_sb, { u32 uid = le32_to_cpu(*(__le32 *)ptr); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_OVERR_UID) return cifs_sb->ctx->linux_uid; return make_kuid(current_user_ns(), uid); } @@ -43,7 +43,7 @@ static inline kgid_t wsl_make_kgid(struct cifs_sb_info *cifs_sb, { u32 gid = le32_to_cpu(*(__le32 *)ptr); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_OVERR_GID) return cifs_sb->ctx->linux_gid; return make_kgid(current_user_ns(), gid); } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index aed49aaef8c4..9643eca0cb70 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -49,6 +49,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (!CIFSSMBQFSUnixInfo(xid, tcon)) { __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + unsigned int sbflags; cifs_dbg(FYI, "unix caps which server supports %lld\n", cap); /* @@ -75,14 +76,16 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP) cifs_dbg(VFS, "per-share encryption not supported yet\n"); + if (cifs_sb) + sbflags = cifs_sb_flags(cifs_sb); + cap &= CIFS_UNIX_CAP_MASK; if (ctx && ctx->no_psx_acl) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { cifs_dbg(FYI, "negotiated posix acl support\n"); if (cifs_sb) - cifs_sb->mnt_cifs_flags |= - CIFS_MOUNT_POSIXACL; + sbflags |= CIFS_MOUNT_POSIXACL; } if (ctx && ctx->posix_paths == 0) @@ -90,10 +93,12 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { cifs_dbg(FYI, "negotiate posix pathnames\n"); if (cifs_sb) - cifs_sb->mnt_cifs_flags |= - CIFS_MOUNT_POSIX_PATHS; + sbflags |= CIFS_MOUNT_POSIX_PATHS; } + if (cifs_sb) + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); + cifs_dbg(FYI, "Negotiate caps 0x%x\n", (int)cap); #ifdef CONFIG_CIFS_DEBUG2 if (cap & CIFS_UNIX_FCNTL_CAP) @@ -1147,7 +1152,7 @@ static int cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode, unsigned int oplock) { - unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode)); __u8 op; op = !!((oplock & CIFS_CACHE_READ_FLG) || (sbflags & CIFS_MOUNT_RO_CACHE)); @@ -1282,7 +1287,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + unsigned int sbflags = cifs_sb_flags(cifs_sb); struct inode *newinode = NULL; int rc; @@ -1298,7 +1304,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, .mtime = NO_CHANGE_64, .device = dev, }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); args.gid = current_fsgid(); } else { @@ -1317,7 +1323,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, if (rc == 0) d_instantiate(dentry, newinode); return rc; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + } else if (sbflags & CIFS_MOUNT_UNX_EMUL) { /* * Check if mounted with mount parm 'sfu' mount parm. * SFU emulation should work with all servers diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index 1ab41de2b634..ed651c946251 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -72,7 +72,7 @@ int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_i * POSIX server does not distinguish between symlinks to file and * symlink directory. So nothing is needed to fix on the client side. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) return 0; if (!*target) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index e19674d9b92b..973fce3c959c 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -455,17 +455,8 @@ calc_size_exit: __le16 * cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) { - int len; const char *start_of_path; - __le16 *to; - int map_type; - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) - map_type = SFM_MAP_UNI_RSVD; - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - map_type = SFU_MAP_UNI_RSVD; - else - map_type = NO_MAP_UNI_RSVD; + int len; /* Windows doesn't allow paths beginning with \ */ if (from[0] == '\\') @@ -479,14 +470,13 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) } else start_of_path = from; - to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, - cifs_sb->local_nls, map_type); - return to; + return cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, + cifs_sb->local_nls, cifs_remap(cifs_sb)); } __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode, unsigned int oplock) { - unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode)); __le32 lease = 0; if ((oplock & CIFS_CACHE_WRITE_FLG) || (sbflags & CIFS_MOUNT_RW_CACHE)) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index fea9a35caa57..7f2d3459cbf9 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -986,7 +986,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, rc = -EREMOTE; } if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)) + (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_DFS)) rc = -EOPNOTSUPP; goto out; } @@ -2691,7 +2691,7 @@ static int smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode, unsigned int oplock) { - unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode)); __u8 op; if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) @@ -5332,7 +5332,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode)); int rc = -EOPNOTSUPP; /* @@ -5341,7 +5341,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, * supports block and char device, socket & fifo, * and was used by default in earlier versions of Windows */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + if (sbflags & CIFS_MOUNT_UNX_EMUL) { rc = cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); } else if (CIFS_REPARSE_SUPPORT(tcon)) { diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index ef655acf673d..a2a96d817717 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3182,22 +3182,19 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, } if ((oparms->disposition != FILE_OPEN) && (oparms->cifs_sb)) { + unsigned int sbflags = cifs_sb_flags(oparms->cifs_sb); bool set_mode; bool set_owner; - if ((oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) && - (oparms->mode != ACL_NO_MODE)) + if ((sbflags & CIFS_MOUNT_MODE_FROM_SID) && + oparms->mode != ACL_NO_MODE) { set_mode = true; - else { + } else { set_mode = false; oparms->mode = ACL_NO_MODE; } - if (oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) - set_owner = true; - else - set_owner = false; - + set_owner = sbflags & CIFS_MOUNT_UID_FROM_ACL; if (set_owner | set_mode) { cifs_dbg(FYI, "add sd with mode 0x%x\n", oparms->mode); rc = add_sd_context(iov, &n_iov, oparms->mode, set_owner); diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index e1a7d9a10a53..23227f2f9428 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -149,7 +149,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, break; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR) goto out; if (pTcon->ses->server->ops->set_EA) { @@ -309,7 +309,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, break; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR) goto out; if (pTcon->ses->server->ops->query_all_EAs) @@ -398,7 +398,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) if (unlikely(cifs_forced_shutdown(cifs_sb))) return smb_EIO(smb_eio_trace_forced_shutdown); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR) return -EOPNOTSUPP; tlink = cifs_sb_tlink(cifs_sb); From d9d1e319b39ea685ede59319002d567c159d23c3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 25 Feb 2026 21:34:55 -0300 Subject: [PATCH 483/828] smb: client: fix broken multichannel with krb5+signing When mounting a share with 'multichannel,max_channels=n,sec=krb5i', the client was duplicating signing key for all secondary channels, thus making the server fail all commands sent from secondary channels due to bad signatures. Every channel has its own signing key, so when establishing a new channel with krb5 auth, make sure to use the new session key as the derived key to generate channel's signing key in SMB2_auth_kerberos(). Repro: $ mount.cifs //srv/share /mnt -o multichannel,max_channels=4,sec=krb5i $ sleep 5 $ umount /mnt $ dmesg ... CIFS: VFS: sign fail cmd 0x5 message id 0x2 CIFS: VFS: \\srv SMB signature verification returned error = -13 CIFS: VFS: sign fail cmd 0x5 message id 0x2 CIFS: VFS: \\srv SMB signature verification returned error = -13 CIFS: VFS: sign fail cmd 0x4 message id 0x2 CIFS: VFS: \\srv SMB signature verification returned error = -13 Reported-by: Xiaoli Feng Reviewed-by: Enzo Matsumiya Signed-off-by: Paulo Alcantara (Red Hat) Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a2a96d817717..04e361ed2356 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1714,19 +1714,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) is_binding = (ses->ses_status == SES_GOOD); spin_unlock(&ses->ses_lock); - /* keep session key if binding */ - if (!is_binding) { - kfree_sensitive(ses->auth_key.response); - ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, - GFP_KERNEL); - if (!ses->auth_key.response) { - cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory\n", - msg->sesskey_len); - rc = -ENOMEM; - goto out_put_spnego_key; - } - ses->auth_key.len = msg->sesskey_len; + kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = kmemdup(msg->data, + msg->sesskey_len, + GFP_KERNEL); + if (!ses->auth_key.response) { + cifs_dbg(VFS, "%s: can't allocate (%u bytes) memory\n", + __func__, msg->sesskey_len); + rc = -ENOMEM; + goto out_put_spnego_key; } + ses->auth_key.len = msg->sesskey_len; sess_data->iov[1].iov_base = msg->data + msg->sesskey_len; sess_data->iov[1].iov_len = msg->secblob_len; From 2f37dc436d4e61ff7ae0b0353cf91b8c10396e4d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 26 Feb 2026 22:28:45 +0100 Subject: [PATCH 484/828] smb: client: Don't log plaintext credentials in cifs_set_cifscreds When debug logging is enabled, cifs_set_cifscreds() logs the key payload and exposes the plaintext username and password. Remove the debug log to avoid exposing credentials. Fixes: 8a8798a5ff90 ("cifs: fetch credentials out of keyring for non-krb5 auth multiuser mounts") Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Thorsten Blum Signed-off-by: Steve French --- fs/smb/client/connect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 87686c804057..4c34d9603e05 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2236,7 +2236,6 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) /* find first : in payload */ payload = upayload->data; delim = strnchr(payload, upayload->datalen, ':'); - cifs_dbg(FYI, "payload=%s\n", payload); if (!delim) { cifs_dbg(FYI, "Unable to find ':' in payload (datalen=%d)\n", upayload->datalen); From e35626f610f3d2b7953ccddf6a77453da22b3a9e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 24 Feb 2026 21:28:32 +0100 Subject: [PATCH 485/828] net/sched: ets: fix divide by zero in the offload path Offloading ETS requires computing each class' WRR weight: this is done by averaging over the sums of quanta as 'q_sum' and 'q_psum'. Using unsigned int, the same integer size as the individual DRR quanta, can overflow and even cause division by zero, like it happened in the following splat: Oops: divide error: 0000 [#1] SMP PTI CPU: 13 UID: 0 PID: 487 Comm: tc Tainted: G E 6.19.0-virtme #45 PREEMPT(full) Tainted: [E]=UNSIGNED_MODULE Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 RIP: 0010:ets_offload_change+0x11f/0x290 [sch_ets] Code: e4 45 31 ff eb 03 41 89 c7 41 89 cb 89 ce 83 f9 0f 0f 87 b7 00 00 00 45 8b 08 31 c0 45 01 cc 45 85 c9 74 09 41 6b c4 64 31 d2 <41> f7 f2 89 c2 44 29 fa 45 89 df 41 83 fb 0f 0f 87 c7 00 00 00 44 RSP: 0018:ffffd0a180d77588 EFLAGS: 00010246 RAX: 00000000ffffff38 RBX: ffff8d3d482ca000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffd0a180d77660 RBP: ffffd0a180d77690 R08: ffff8d3d482ca2d8 R09: 00000000fffffffe R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffffe R13: ffff8d3d472f2000 R14: 0000000000000003 R15: 0000000000000000 FS: 00007f440b6c2740(0000) GS:ffff8d3dc9803000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000003cdd2000 CR3: 0000000007b58002 CR4: 0000000000172ef0 Call Trace: ets_qdisc_change+0x870/0xf40 [sch_ets] qdisc_create+0x12b/0x540 tc_modify_qdisc+0x6d7/0xbd0 rtnetlink_rcv_msg+0x168/0x6b0 netlink_rcv_skb+0x5c/0x110 netlink_unicast+0x1d6/0x2b0 netlink_sendmsg+0x22e/0x470 ____sys_sendmsg+0x38a/0x3c0 ___sys_sendmsg+0x99/0xe0 __sys_sendmsg+0x8a/0xf0 do_syscall_64+0x111/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f440b81c77e Code: 4d 89 d8 e8 d4 bc 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa RSP: 002b:00007fff951e4c10 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000481820 RCX: 00007f440b81c77e RDX: 0000000000000000 RSI: 00007fff951e4cd0 RDI: 0000000000000003 RBP: 00007fff951e4c20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff951f4fa8 R13: 00000000699ddede R14: 00007f440bb01000 R15: 0000000000486980 Modules linked in: sch_ets(E) netdevsim(E) ---[ end trace 0000000000000000 ]--- RIP: 0010:ets_offload_change+0x11f/0x290 [sch_ets] Code: e4 45 31 ff eb 03 41 89 c7 41 89 cb 89 ce 83 f9 0f 0f 87 b7 00 00 00 45 8b 08 31 c0 45 01 cc 45 85 c9 74 09 41 6b c4 64 31 d2 <41> f7 f2 89 c2 44 29 fa 45 89 df 41 83 fb 0f 0f 87 c7 00 00 00 44 RSP: 0018:ffffd0a180d77588 EFLAGS: 00010246 RAX: 00000000ffffff38 RBX: ffff8d3d482ca000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffd0a180d77660 RBP: ffffd0a180d77690 R08: ffff8d3d482ca2d8 R09: 00000000fffffffe R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffffe R13: ffff8d3d472f2000 R14: 0000000000000003 R15: 0000000000000000 FS: 00007f440b6c2740(0000) GS:ffff8d3dc9803000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000003cdd2000 CR3: 0000000007b58002 CR4: 0000000000172ef0 Kernel panic - not syncing: Fatal exception Kernel Offset: 0x30000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception ]--- Fix this using 64-bit integers for 'q_sum' and 'q_psum'. Cc: stable@vger.kernel.org Fixes: d35eb52bd2ac ("net: sch_ets: Make the ETS qdisc offloadable") Signed-off-by: Davide Caratti Reviewed-by: Jamal Hadi Salim Reviewed-by: Petr Machata Link: https://patch.msgid.link/28504887df314588c7255e9911769c36f751edee.1771964872.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 306e046276d4..a4b07b661b77 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -115,12 +115,12 @@ static void ets_offload_change(struct Qdisc *sch) struct ets_sched *q = qdisc_priv(sch); struct tc_ets_qopt_offload qopt; unsigned int w_psum_prev = 0; - unsigned int q_psum = 0; - unsigned int q_sum = 0; unsigned int quantum; unsigned int w_psum; unsigned int weight; unsigned int i; + u64 q_psum = 0; + u64 q_sum = 0; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; @@ -138,8 +138,12 @@ static void ets_offload_change(struct Qdisc *sch) for (i = 0; i < q->nbands; i++) { quantum = q->classes[i].quantum; - q_psum += quantum; - w_psum = quantum ? q_psum * 100 / q_sum : 0; + if (quantum) { + q_psum += quantum; + w_psum = div64_u64(q_psum * 100, q_sum); + } else { + w_psum = 0; + } weight = w_psum - w_psum_prev; w_psum_prev = w_psum; From 2ef2b20cf4e04ac8a6ba68493f8780776ff84300 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Feb 2026 13:15:47 +0000 Subject: [PATCH 486/828] net: annotate data-races around sk->sk_{data_ready,write_space} skmsg (and probably other layers) are changing these pointers while other cpus might read them concurrently. Add corresponding READ_ONCE()/WRITE_ONCE() annotations for UDP, TCP and AF_UNIX. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+87f770387a9e5dc6b79b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/699ee9fc.050a0220.1cd54b.0009.GAE@google.com/ Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Cc: John Fastabend Cc: Jakub Sitnicki Cc: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260225131547.1085509-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skmsg.c | 14 +++++++------- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_bpf.c | 2 +- net/ipv4/tcp_input.c | 14 ++++++++------ net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv4/udp_bpf.c | 2 +- net/unix/af_unix.c | 8 ++++---- 8 files changed, 25 insertions(+), 23 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2e26174c9919..3261793abe83 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1205,8 +1205,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) return; psock->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_strp_data_ready; - sk->sk_write_space = sk_psock_write_space; + WRITE_ONCE(sk->sk_data_ready, sk_psock_strp_data_ready); + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); } void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) @@ -1216,8 +1216,8 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) if (!psock->saved_data_ready) return; - sk->sk_data_ready = psock->saved_data_ready; - psock->saved_data_ready = NULL; + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); + WRITE_ONCE(psock->saved_data_ready, NULL); strp_stop(&psock->strp); } @@ -1296,8 +1296,8 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) return; psock->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_verdict_data_ready; - sk->sk_write_space = sk_psock_write_space; + WRITE_ONCE(sk->sk_data_ready, sk_psock_verdict_data_ready); + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); } void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) @@ -1308,6 +1308,6 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) if (!psock->saved_data_ready) return; - sk->sk_data_ready = psock->saved_data_ready; + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); psock->saved_data_ready = NULL; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f84d9a45cc9d..8cdc26e8ad68 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1446,7 +1446,7 @@ out_err: err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { - sk->sk_write_space(sk); + READ_ONCE(sk->sk_write_space)(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } if (binding) @@ -4181,7 +4181,7 @@ ao_parse: break; case TCP_NOTSENT_LOWAT: WRITE_ONCE(tp->notsent_lowat, val); - sk->sk_write_space(sk); + READ_ONCE(sk->sk_write_space)(sk); break; case TCP_INQ: if (val > 1 || val < 0) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index c449a044895e..813d2e498c93 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -725,7 +725,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } else { - sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); /* Pairs with lockless read in sk_clone_lock() */ sock_replace_proto(sk, psock->sk_proto); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 41c57efd125c..6404e53382ca 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5425,7 +5425,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM); return; } @@ -5635,7 +5635,7 @@ err: void tcp_data_ready(struct sock *sk) { if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE)) - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) @@ -5691,7 +5691,7 @@ queue_and_out: inet_csk(sk)->icsk_ack.pending |= (ICSK_ACK_NOMEM | ICSK_ACK_NOW); inet_csk_schedule_ack(sk); - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); if (skb_queue_len(&sk->sk_receive_queue) && skb->len) { reason = SKB_DROP_REASON_PROTO_MEM; @@ -6114,7 +6114,9 @@ static void tcp_new_space(struct sock *sk) tp->snd_cwnd_stamp = tcp_jiffies32; } - INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); + INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space), + sk_stream_write_space, + sk); } /* Caller made space either from: @@ -6325,7 +6327,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t BUG(); WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); } } } @@ -7792,7 +7794,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, sock_put(fastopen_sk); goto drop_and_free; } - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); } else { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d9c5a43bd281..dafb63b923d0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -1004,7 +1004,7 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, reason = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) - parent->sk_data_ready(parent); + READ_ONCE(parent->sk_data_ready)(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6c6b68a66dcd..014fdfdd331b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1787,7 +1787,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * using prepare_to_wait_exclusive(). */ while (nb) { - INDIRECT_CALL_1(sk->sk_data_ready, + INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready), sock_def_readable, sk); nb--; } diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 91233e37cd97..779a3a03762f 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -158,7 +158,7 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; if (restore) { - sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); sock_replace_proto(sk, psock->sk_proto); return 0; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3756a93dc63a..7eaa5b187fef 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1785,7 +1785,7 @@ restart: __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); sock_put(other); return 0; @@ -2278,7 +2278,7 @@ restart_locked: scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); sock_put(other); scm_destroy(&scm); return len; @@ -2351,7 +2351,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, sk_send_sigurg(other); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); return 0; out_unlock: @@ -2477,7 +2477,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); sent += size; } From 93c9475c04acad2457a7e7ea4e3ec40a6e6d94a7 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Wed, 25 Feb 2026 16:39:55 +0200 Subject: [PATCH 487/828] bridge: Check relevant per-VLAN options in VLAN range grouping The br_vlan_opts_eq_range() function determines if consecutive VLANs can be grouped together in a range for compact netlink notifications. It currently checks state, tunnel info, and multicast router configuration, but misses two categories of per-VLAN options that affect the output: 1. User-visible priv_flags (neigh_suppress, mcast_enabled) 2. Port multicast context (mcast_max_groups, mcast_n_groups) When VLANs have different settings for these options, they are incorrectly grouped into ranges, causing netlink notifications to report only one VLAN's settings for the entire range. Fix by checking priv_flags equality, but only for flags that affect netlink output (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED and BR_VLFLAG_MCAST_ENABLED), and comparing multicast context (mcast_max_groups and mcast_n_groups). Example showing the bugs before the fix: $ bridge vlan set vid 10 dev dummy1 neigh_suppress on $ bridge vlan set vid 11 dev dummy1 neigh_suppress off $ bridge -d vlan show dev dummy1 port vlan-id dummy1 10-11 ... neigh_suppress on $ bridge vlan set vid 10 dev dummy1 mcast_max_groups 100 $ bridge vlan set vid 11 dev dummy1 mcast_max_groups 200 $ bridge -d vlan show dev dummy1 port vlan-id dummy1 10-11 ... mcast_max_groups 100 After the fix, VLANs 10 and 11 are shown as separate entries with their correct individual settings. Fixes: a1aee20d5db2 ("net: bridge: Add netlink knobs for number / maximum MDB entries") Fixes: 83f6d600796c ("bridge: vlan: Allow setting VLAN neighbor suppression state") Signed-off-by: Danielle Ratson Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260225143956.3995415-2-danieller@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 10 ++++++++++ net/bridge/br_vlan_options.c | 26 +++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b9b2981c4841..9b55d38ea9ed 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1344,6 +1344,16 @@ br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, true; } +static inline bool +br_multicast_port_ctx_options_equal(const struct net_bridge_mcast_port *pmctx1, + const struct net_bridge_mcast_port *pmctx2) +{ + return br_multicast_ngroups_get(pmctx1) == + br_multicast_ngroups_get(pmctx2) && + br_multicast_ngroups_get_max(pmctx1) == + br_multicast_ngroups_get_max(pmctx2); +} + static inline bool br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx) { diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 8fa89b04ee94..5514e1fc8d1f 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -43,9 +43,29 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, u8 range_mc_rtr = br_vlan_multicast_router(range_end); u8 curr_mc_rtr = br_vlan_multicast_router(v_curr); - return v_curr->state == range_end->state && - __vlan_tun_can_enter_range(v_curr, range_end) && - curr_mc_rtr == range_mc_rtr; + if (v_curr->state != range_end->state) + return false; + + if (!__vlan_tun_can_enter_range(v_curr, range_end)) + return false; + + if (curr_mc_rtr != range_mc_rtr) + return false; + + /* Check user-visible priv_flags that affect output */ + if ((v_curr->priv_flags ^ range_end->priv_flags) & + (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED | BR_VLFLAG_MCAST_ENABLED)) + return false; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (!br_vlan_is_master(v_curr) && + !br_multicast_port_ctx_vlan_disabled(&v_curr->port_mcast_ctx) && + !br_multicast_port_ctx_options_equal(&v_curr->port_mcast_ctx, + &range_end->port_mcast_ctx)) + return false; +#endif + + return true; } bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v, From 13540021be228dcda63d02b2245ce8dad01d8473 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Wed, 25 Feb 2026 16:39:56 +0200 Subject: [PATCH 488/828] selftests: net: Add bridge VLAN range grouping tests Add a new test file bridge_vlan_dump.sh with four test cases that verify VLANs with different per-VLAN options are not incorrectly grouped into ranges in the dump output. The tests verify the kernel's br_vlan_opts_eq_range() function correctly prevents VLAN range grouping when neigh_suppress, mcast_max_groups, mcast_n_groups, or mcast_enabled options differ. Each test verifies that VLANs with different option values appear as individual entries rather than ranges, and that VLANs with matching values are properly grouped together. Example output: $ ./bridge_vlan_dump.sh TEST: VLAN range grouping with neigh_suppress [ OK ] TEST: VLAN range grouping with mcast_max_groups [ OK ] TEST: VLAN range grouping with mcast_n_groups [ OK ] TEST: VLAN range grouping with mcast_enabled [ OK ] Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260225143956.3995415-3-danieller@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/bridge_vlan_dump.sh | 204 ++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100755 tools/testing/selftests/net/bridge_vlan_dump.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index afdea6d95bde..605c54c0e8a3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -15,6 +15,7 @@ TEST_PROGS := \ big_tcp.sh \ bind_bhash.sh \ bpf_offload.py \ + bridge_vlan_dump.sh \ broadcast_ether_dst.sh \ broadcast_pmtu.sh \ busy_poll_test.sh \ diff --git a/tools/testing/selftests/net/bridge_vlan_dump.sh b/tools/testing/selftests/net/bridge_vlan_dump.sh new file mode 100755 index 000000000000..ad66731d2a6f --- /dev/null +++ b/tools/testing/selftests/net/bridge_vlan_dump.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge VLAN range grouping. VLANs are collapsed into a range entry in +# the dump if they have the same per-VLAN options. These tests verify that +# VLANs with different per-VLAN option values are not grouped together. + +# shellcheck disable=SC1091,SC2034,SC2154,SC2317 +source lib.sh + +ALL_TESTS=" + vlan_range_neigh_suppress + vlan_range_mcast_max_groups + vlan_range_mcast_n_groups + vlan_range_mcast_enabled +" + +setup_prepare() +{ + setup_ns NS + defer cleanup_all_ns + + ip -n "$NS" link add name br0 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 1 mcast_vlan_snooping 1 + ip -n "$NS" link set dev br0 up + + ip -n "$NS" link add name dummy0 type dummy + ip -n "$NS" link set dev dummy0 master br0 + ip -n "$NS" link set dev dummy0 up +} + +vlan_range_neigh_suppress() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different neigh_suppress values and verify no range grouping + bridge -n "$NS" vlan set vid 10 dev dummy0 neigh_suppress on + check_err $? "Failed to set neigh_suppress for VLAN 10" + + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress off + check_err $? "Failed to set neigh_suppress for VLAN 11" + + # Verify VLANs are not shown as a range, but individual entries exist + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different neigh_suppress incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same neigh_suppress value and verify range grouping + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress on + check_err $? "Failed to set neigh_suppress for VLAN 11" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same neigh_suppress not grouped" + + log_test "VLAN range grouping with neigh_suppress" +} + +vlan_range_mcast_max_groups() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different mcast_max_groups values and verify no range grouping + bridge -n "$NS" vlan set vid 10 dev dummy0 mcast_max_groups 100 + check_err $? "Failed to set mcast_max_groups for VLAN 10" + + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 200 + check_err $? "Failed to set mcast_max_groups for VLAN 11" + + # Verify VLANs are not shown as a range, but individual entries exist + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_max_groups incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same mcast_max_groups value and verify range grouping + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 100 + check_err $? "Failed to set mcast_max_groups for VLAN 11" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_max_groups not grouped" + + log_test "VLAN range grouping with mcast_max_groups" +} + +vlan_range_mcast_n_groups() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Add different numbers of multicast groups to each VLAN + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 10 + check_err $? "Failed to add mdb entry to VLAN 10" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 10 + + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 10 + check_err $? "Failed to add second mdb entry to VLAN 10" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 10 + + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 11 + check_err $? "Failed to add mdb entry to VLAN 11" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 11 + + # Verify VLANs are not shown as a range due to different mcast_n_groups + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_n_groups incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Add another group to VLAN 11 to match VLAN 10's count + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 11 + check_err $? "Failed to add second mdb entry to VLAN 11" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 11 + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_n_groups not grouped" + + log_test "VLAN range grouping with mcast_n_groups" +} + +vlan_range_mcast_enabled() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev br0 self + defer bridge -n "$NS" vlan del vid 10 dev br0 self + + bridge -n "$NS" vlan add vid 11 dev br0 self + defer bridge -n "$NS" vlan del vid 11 dev br0 self + + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different mcast_snooping for bridge VLANs + # Port VLANs inherit BR_VLFLAG_MCAST_ENABLED from bridge VLANs + bridge -n "$NS" vlan global set dev br0 vid 10 mcast_snooping 1 + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 0 + + # Verify port VLANs are not grouped due to different mcast_enabled + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_enabled incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same mcast_snooping and verify range grouping + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 1 + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_enabled not grouped" + + log_test "VLAN range grouping with mcast_enabled" +} + +# Verify the newest tested option is supported +if ! bridge vlan help 2>&1 | grep -q "neigh_suppress"; then + echo "SKIP: iproute2 too old, missing per-VLAN neighbor suppression support" + exit "$ksft_skip" +fi + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit "$EXIT_STATUS" From be11a537224d72b906db6b98510619770298c8a4 Mon Sep 17 00:00:00 2001 From: Chintan Vankar Date: Tue, 24 Feb 2026 23:43:59 +0530 Subject: [PATCH 489/828] net: ethernet: ti: am65-cpsw-nuss/cpsw-ale: Fix multicast entry handling in ALE table In the current implementation, flushing multicast entries in MAC mode incorrectly deletes entries for all ports instead of only the target port, disrupting multicast traffic on other ports. The cause is adding multicast entries by setting only host port bit, and not setting the MAC port bits. Fix this by setting the MAC port's bit in the port mask while adding the multicast entry. Also fix the flush logic to preserve the host port bit during removal of MAC port and free ALE entries when mask contains only host port. Fixes: 5c50a856d550 ("drivers: net: ethernet: cpsw: add multicast address to ALE table") Signed-off-by: Chintan Vankar Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224181359.2055322-1-c-vankar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- drivers/net/ethernet/ti/cpsw_ale.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 5924db6be3fe..967918050433 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -391,7 +391,7 @@ static void am65_cpsw_nuss_ndo_slave_set_rx_mode(struct net_device *ndev) cpsw_ale_set_allmulti(common->ale, ndev->flags & IFF_ALLMULTI, port->port_id); - port_mask = ALE_PORT_HOST; + port_mask = BIT(port->port_id) | ALE_PORT_HOST; /* Clear all mcast from ALE */ cpsw_ale_flush_multicast(common->ale, port_mask, -1); diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index bb969dd435b4..be7b69319221 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -450,14 +450,13 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry, ale->port_mask_bits); if ((mask & port_mask) == 0) return; /* ports dont intersect, not interested */ - mask &= ~port_mask; + mask &= (~port_mask | ALE_PORT_HOST); - /* free if only remaining port is host port */ - if (mask) + if (mask == 0x0 || mask == ALE_PORT_HOST) + cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); + else cpsw_ale_set_port_mask(ale_entry, mask, ale->port_mask_bits); - else - cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); } int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid) From 24d87712727a5017ad142d63940589a36cd25647 Mon Sep 17 00:00:00 2001 From: Ariel Silver Date: Sat, 21 Feb 2026 15:26:00 +0100 Subject: [PATCH 490/828] media: dvb-net: fix OOB access in ULE extension header tables The ule_mandatory_ext_handlers[] and ule_optional_ext_handlers[] tables in handle_one_ule_extension() are declared with 255 elements (valid indices 0-254), but the index htype is derived from network-controlled data as (ule_sndu_type & 0x00FF), giving a range of 0-255. When htype equals 255, an out-of-bounds read occurs on the function pointer table, and the OOB value may be called as a function pointer. Add a bounds check on htype against the array size before either table is accessed. Out-of-range values now cause the SNDU to be discarded. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Ariel Silver Signed-off-by: Ariel Silver Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvb_net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 8bb8dd34c223..a2159b2bc176 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -228,6 +228,9 @@ static int handle_one_ule_extension( struct dvb_net_priv *p ) unsigned char hlen = (p->ule_sndu_type & 0x0700) >> 8; unsigned char htype = p->ule_sndu_type & 0x00FF; + if (htype >= ARRAY_SIZE(ule_mandatory_ext_handlers)) + return -1; + /* Discriminate mandatory and optional extension headers. */ if (hlen == 0) { /* Mandatory extension header */ From 3385ea97c14d271dcb0c6e6fcf16972f819eecd8 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 23 Feb 2026 19:29:50 +0100 Subject: [PATCH 491/828] platform/x86: oxpec: Add support for OneXPlayer APEX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OneXPlayer Apex is a new Strix Halo handheld. It uses the same registers as the OneXPlayer Fly devices. Add a quirk for it to the oxpec driver. Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20260223183004.2696892-2-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/oxpec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c index 144a454103b9..59d6f9d9a905 100644 --- a/drivers/platform/x86/oxpec.c +++ b/drivers/platform/x86/oxpec.c @@ -11,7 +11,7 @@ * * Copyright (C) 2022 Joaquín I. Aramendía * Copyright (C) 2024 Derek J. Clark - * Copyright (C) 2025 Antheas Kapenekakis + * Copyright (C) 2025-2026 Antheas Kapenekakis */ #include @@ -142,6 +142,13 @@ static const struct dmi_system_id dmi_table[] = { }, .driver_data = (void *)oxp_2, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER APEX"), + }, + .driver_data = (void *)oxp_fly, + }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), From 4049c46edb5d44c0de045f6f504371705dd603dd Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 23 Feb 2026 19:29:51 +0100 Subject: [PATCH 492/828] platform/x86: oxpec: Add support for OneXPlayer X1z MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X1z is a variant of OneXPlayer X1 A with 8840U. It seems that only one user has this one. Add a quirk for it to the oxpec driver. Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20260223183004.2696892-3-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/oxpec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c index 59d6f9d9a905..623d9a452c46 100644 --- a/drivers/platform/x86/oxpec.c +++ b/drivers/platform/x86/oxpec.c @@ -219,6 +219,13 @@ static const struct dmi_system_id dmi_table[] = { }, .driver_data = (void *)oxp_mini_amd_pro, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1z"), + }, + .driver_data = (void *)oxp_x1, + }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), From 2a3b4a8c10a64a62c4243007139d253dc1324dfd Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 23 Feb 2026 19:29:52 +0100 Subject: [PATCH 493/828] platform/x86: oxpec: Add support for OneXPlayer X1 Air MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X1 Air is an X1 variant with a newer Intel chipset. It uses the same registers as the X1. Add a quirk for it to the oxpec driver. Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20260223183004.2696892-4-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/oxpec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c index 623d9a452c46..d6ef8dbf568e 100644 --- a/drivers/platform/x86/oxpec.c +++ b/drivers/platform/x86/oxpec.c @@ -240,6 +240,13 @@ static const struct dmi_system_id dmi_table[] = { }, .driver_data = (void *)oxp_x1, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1Air"), + }, + .driver_data = (void *)oxp_x1, + }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), From cd0883055b04586770dab43c64159348bf480a3e Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 23 Feb 2026 19:29:53 +0100 Subject: [PATCH 494/828] platform/x86: oxpec: Add support for Aokzoe A2 Pro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aokzoe A2 Pro is an older device that the oxpec driver is missing the quirk for. It has the same behavior as the AOKZOE A1 devices. Add a quirk for it to the oxpec driver. Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20260223183004.2696892-5-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/oxpec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c index d6ef8dbf568e..6d4a53a2ed60 100644 --- a/drivers/platform/x86/oxpec.c +++ b/drivers/platform/x86/oxpec.c @@ -114,6 +114,13 @@ static const struct dmi_system_id dmi_table[] = { }, .driver_data = (void *)aok_zoe_a1, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AOKZOE"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "AOKZOE A2 Pro"), + }, + .driver_data = (void *)aok_zoe_a1, + }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AOKZOE"), From 916727cfdb72cd01fef3fa6746e648f8cb70e713 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 25 Feb 2026 15:06:46 -0600 Subject: [PATCH 495/828] platform/x86: hp-bioscfg: Support allocations of larger data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some systems have much larger amounts of enumeration attributes than have been previously encountered. This can lead to page allocation failures when using kcalloc(). Switch over to using kvcalloc() to allow larger allocations. Fixes: 6b2770bfd6f92 ("platform/x86: hp-bioscfg: enum-attributes") Cc: stable@vger.kernel.org Reported-by: Paul Kerry Tested-by: Paul Kerry Closes: https://bugs.debian.org/1127612 Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20260225210646.59381-1-mario.limonciello@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c b/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c index 470b9f44ed7a..af4d1920d488 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c +++ b/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c @@ -94,8 +94,11 @@ int hp_alloc_enumeration_data(void) bioscfg_drv.enumeration_instances_count = hp_get_instance_count(HP_WMI_BIOS_ENUMERATION_GUID); - bioscfg_drv.enumeration_data = kzalloc_objs(*bioscfg_drv.enumeration_data, - bioscfg_drv.enumeration_instances_count); + if (!bioscfg_drv.enumeration_instances_count) + return -EINVAL; + bioscfg_drv.enumeration_data = kvcalloc(bioscfg_drv.enumeration_instances_count, + sizeof(*bioscfg_drv.enumeration_data), GFP_KERNEL); + if (!bioscfg_drv.enumeration_data) { bioscfg_drv.enumeration_instances_count = 0; return -ENOMEM; @@ -444,6 +447,6 @@ void hp_exit_enumeration_attributes(void) } bioscfg_drv.enumeration_instances_count = 0; - kfree(bioscfg_drv.enumeration_data); + kvfree(bioscfg_drv.enumeration_data); bioscfg_drv.enumeration_data = NULL; } From 622cc8d078e5d7586f0e40dd7aea515a946da48c Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Thu, 26 Feb 2026 15:19:43 +0100 Subject: [PATCH 496/828] platform/x86: asus-armoury: add support for GX650RX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model GX650RX. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260226141944.352923-2-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 001c011fdd56..f9d9136aec7b 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1457,6 +1457,38 @@ static const struct dmi_system_id power_limits[] = { }, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GX650RX"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 70, + .ppt_pl1_spl_max = 90, + .ppt_pl2_sppt_min = 28, + .ppt_pl2_sppt_def = 70, + .ppt_pl2_sppt_max = 100, + .ppt_pl3_fppt_min = 28, + .ppt_pl3_fppt_def = 110, + .ppt_pl3_fppt_max = 125, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 25, + .nv_temp_target_min = 76, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_max = 50, + .ppt_pl2_sppt_min = 28, + .ppt_pl2_sppt_max = 50, + .ppt_pl3_fppt_min = 28, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 76, + .nv_temp_target_max = 87, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "G513I"), From c55b84fa567ed33bc651675bcaf8d53bd12d910a Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Thu, 26 Feb 2026 15:19:44 +0100 Subject: [PATCH 497/828] platform/x86: asus-armoury: add support for FA401UM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model FA401UM. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260226141944.352923-3-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index f9d9136aec7b..208f6fe16168 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -346,6 +346,35 @@ struct power_data { * _def is not required and will be assumed to be default == max if missing. */ static const struct dmi_system_id power_limits[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "FA401UM"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 35, + .ppt_pl2_sppt_max = 80, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 80, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 15, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 35, + .ppt_pl2_sppt_min = 31, + .ppt_pl2_sppt_max = 44, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "FA401UV"), From 3350c2b3f2b8a3b985a020a4ef4f2f050a4b6a1d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 25 Feb 2026 21:12:29 -0800 Subject: [PATCH 498/828] platform_data/mlxreg: mlxreg.h: fix all kernel-doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the correct kernel-doc format & notation to eliminate kernel-doc warnings: Warning: include/linux/platform_data/mlxreg.h:24 Enum value 'MLX_WDT_TYPE1' not described in enum 'mlxreg_wdt_type' Warning: include/linux/platform_data/mlxreg.h:24 Enum value 'MLX_WDT_TYPE2' not described in enum 'mlxreg_wdt_type' Warning: include/linux/platform_data/mlxreg.h:24 Enum value 'MLX_WDT_TYPE3' not described in enum 'mlxreg_wdt_type' Warning: include/linux/platform_data/mlxreg.h:37 bad line: PHYs ready / unready state; Warning: include/linux/platform_data/mlxreg.h:153 struct member 'np' not described in 'mlxreg_core_data' Warning: include/linux/platform_data/mlxreg.h:153 struct member 'hpdev' not described in 'mlxreg_core_data' Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260226051232.549537-1-rdunlap@infradead.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/mlxreg.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index f6cca7a035c7..50b6be57da66 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -13,10 +13,10 @@ /** * enum mlxreg_wdt_type - type of HW watchdog * - * TYPE1 HW watchdog implementation exist in old systems. - * All new systems have TYPE2 HW watchdog. - * TYPE3 HW watchdog can exist on all systems with new CPLD. - * TYPE3 is selected by WD capability bit. + * @MLX_WDT_TYPE1: HW watchdog implementation in old systems. + * @MLX_WDT_TYPE2: All new systems have TYPE2 HW watchdog. + * @MLX_WDT_TYPE3: HW watchdog that can exist on all systems with new CPLD. + * TYPE3 is selected by WD capability bit. */ enum mlxreg_wdt_type { MLX_WDT_TYPE1, @@ -35,7 +35,7 @@ enum mlxreg_wdt_type { * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming * after hardware-firmware synchronization handshake; * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card - PHYs ready / unready state; + * PHYs ready / unready state; * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware * availability / unavailability for the ports on line card; * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive @@ -123,8 +123,8 @@ struct mlxreg_hotplug_device { * @reg_pwr: attribute power register; * @reg_ena: attribute enable register; * @mode: access mode; - * @np - pointer to node platform associated with attribute; - * @hpdev - hotplug device data; + * @np: pointer to node platform associated with attribute; + * @hpdev: hotplug device data; * @notifier: pointer to event notifier block; * @health_cntr: dynamic device health indication counter; * @attached: true if device has been attached after good health indication; From 377f8e788945d45b012ed9cfc35ca56c02e86cd8 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Mon, 16 Feb 2026 18:55:38 +0100 Subject: [PATCH 499/828] HID: asus: add xg mobile 2023 external hardware support XG mobile stations have the 0x5a endpoint and has to be initialized: add them to hid-asus. Signed-off-by: Denis Benato Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 +++ drivers/hid/hid-ids.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 7a08e964b9cc..687b785e2d0c 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1497,6 +1497,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD | QUIRK_ROG_ALLY_XPAD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_XGM_2023), + }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3e299a30dcde..4ab7640b119a 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -229,6 +229,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 +#define USB_DEVICE_ID_ASUSTEK_XGM_2023 0x1a9a #define USB_VENDOR_ID_ATEN 0x0557 #define USB_DEVICE_ID_ATEN_UC100KM 0x2004 From 6e3f4514e3b432871ac81717d24f56b441857f77 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 27 Feb 2026 12:49:01 +0000 Subject: [PATCH 500/828] drm/ttm: Fix ttm_pool_beneficial_order() return type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a nasty copy and paste bug, where the incorrect boolean return type of the ttm_pool_beneficial_order() helper had a consequence of avoiding direct reclaim too eagerly for drivers which use this feature (currently amdgpu). Signed-off-by: Tvrtko Ursulin Fixes: 7e9c548d3709 ("drm/ttm: Allow drivers to specify maximum beneficial TTM pool size") Cc: Christian König Cc: Thadeu Lima de Souza Cascardo Cc: dri-devel@lists.freedesktop.org Cc: # v6.19+ Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20260227124901.3177-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/ttm/ttm_pool_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool_internal.h b/drivers/gpu/drm/ttm/ttm_pool_internal.h index 82c4b7e56a99..24c179fd69d1 100644 --- a/drivers/gpu/drm/ttm/ttm_pool_internal.h +++ b/drivers/gpu/drm/ttm/ttm_pool_internal.h @@ -17,7 +17,7 @@ static inline bool ttm_pool_uses_dma32(struct ttm_pool *pool) return pool->alloc_flags & TTM_ALLOCATION_POOL_USE_DMA32; } -static inline bool ttm_pool_beneficial_order(struct ttm_pool *pool) +static inline unsigned int ttm_pool_beneficial_order(struct ttm_pool *pool) { return pool->alloc_flags & 0xff; } From e9217ca77dc35b4978db0fe901685ddb3f1e223a Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 23 Feb 2026 16:58:09 +0900 Subject: [PATCH 501/828] mm/slab: initialize slab->stride early to avoid memory ordering issues When alloc_slab_obj_exts() is called later (instead of during slab allocation and initialization), slab->stride and slab->obj_exts are updated after the slab is already accessible by multiple CPUs. The current implementation does not enforce memory ordering between slab->stride and slab->obj_exts. For correctness, slab->stride must be visible before slab->obj_exts. Otherwise, concurrent readers may observe slab->obj_exts as non-zero while stride is still stale. With stale slab->stride, slab_obj_ext() could return the wrong obj_ext. This could cause two problems: - obj_cgroup_put() is called on the wrong objcg, leading to a use-after-free due to incorrect reference counting [1] by decrementing the reference count more than it was incremented. - refill_obj_stock() is called on the wrong objcg, leading to a page_counter overflow [2] by uncharging more memory than charged. Fix this by unconditionally initializing slab->stride in alloc_slab_obj_exts_early(), before the need_slab_obj_exts() check. In the case of SLAB_OBJ_EXT_IN_OBJ, it is overridden in the function. This ensures updates to slab->stride become visible before the slab can be accessed by other CPUs via the per-node partial slab list (protected by spinlock with acquire/release semantics). Thanks to Shakeel Butt for pointing out this issue [3]. [vbabka@kernel.org: the bug reports [1] and [2] are not yet fully fixed, with investigation ongoing, but it is nevertheless a step in the right direction to only set stride once after allocating the slab and not change it later ] Fixes: 7a8e71bc619d ("mm/slab: use stride to access slabobj_ext") Reported-by: Venkat Rao Bagalkote Link: https://lore.kernel.org/lkml/ca241daa-e7e7-4604-a48d-de91ec9184a5@linux.ibm.com [1] Link: https://lore.kernel.org/all/ddff7c7d-c0c3-4780-808f-9a83268bbf0c@linux.ibm.com [2] Link: https://lore.kernel.org/linux-mm/aZu9G9mVIVzSm6Ft@hyeyoo [3] Signed-off-by: Harry Yoo Signed-off-by: Vlastimil Babka (SUSE) --- mm/slub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 52f021711744..0c906fefc31b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2196,7 +2196,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, retry: old_exts = READ_ONCE(slab->obj_exts); handle_failed_objexts_alloc(old_exts, vec, objects); - slab_set_stride(slab, sizeof(struct slabobj_ext)); if (new_slab) { /* @@ -2272,6 +2271,9 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) void *addr; unsigned long obj_exts; + /* Initialize stride early to avoid memory ordering issues */ + slab_set_stride(slab, sizeof(struct slabobj_ext)); + if (!need_slab_obj_exts(s)) return; @@ -2288,7 +2290,6 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) obj_exts |= MEMCG_DATA_OBJEXTS; #endif slab->obj_exts = obj_exts; - slab_set_stride(slab, sizeof(struct slabobj_ext)); } else if (s->flags & SLAB_OBJ_EXT_IN_OBJ) { unsigned int offset = obj_exts_offset_in_object(s); From 0f5197ea9a73a4c406c75e6d8af3a13f7f96ae89 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 25 Feb 2026 11:38:05 -0800 Subject: [PATCH 502/828] nvme-multipath: fix leak on try_module_get failure We need to fall back to the synchronous removal if we can't get a reference on the module needed for the deferred removal. Fixes: 62188639ec16 ("nvme-multipath: introduce delayed removal of the multipath head node") Reviewed-by: Nilay Shroff Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index bfcc5904e6a2..fc6800a9f7f9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -1310,13 +1310,11 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) if (!list_empty(&head->list)) goto out; - if (head->delayed_removal_secs) { - /* - * Ensure that no one could remove this module while the head - * remove work is pending. - */ - if (!try_module_get(THIS_MODULE)) - goto out; + /* + * Ensure that no one could remove this module while the head + * remove work is pending. + */ + if (head->delayed_removal_secs && try_module_get(THIS_MODULE)) { mod_delayed_work(nvme_wq, &head->remove_work, head->delayed_removal_secs * HZ); } else { From f505a45776d149632e3bd0b87f0da1609607161a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 26 Feb 2026 23:15:22 +0100 Subject: [PATCH 503/828] smb: client: Use snprintf in cifs_set_cifscreds Replace unbounded sprintf() calls with the safer snprintf(). Avoid using magic numbers and use strlen() to calculate the key descriptor buffer size. Save the size in a local variable and reuse it for the bounded snprintf() calls. Remove CIFSCREDS_DESC_SIZE. Signed-off-by: Thorsten Blum Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/connect.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 4c34d9603e05..3bad2c5c523d 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2167,9 +2167,6 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) #ifdef CONFIG_KEYS -/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */ -#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1) - /* Populate username and pw fields from keyring if possible */ static int cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) @@ -2177,6 +2174,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) int rc = 0; int is_domain = 0; const char *delim, *payload; + size_t desc_sz; char *desc; ssize_t len; struct key *key; @@ -2185,7 +2183,9 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) struct sockaddr_in6 *sa6; const struct user_key_payload *upayload; - desc = kmalloc(CIFSCREDS_DESC_SIZE, GFP_KERNEL); + /* "cifs:a:" and "cifs:d:" are the same length; +1 for NUL terminator */ + desc_sz = strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1; + desc = kmalloc(desc_sz, GFP_KERNEL); if (!desc) return -ENOMEM; @@ -2193,11 +2193,11 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) switch (server->dstaddr.ss_family) { case AF_INET: sa = (struct sockaddr_in *)&server->dstaddr; - sprintf(desc, "cifs:a:%pI4", &sa->sin_addr.s_addr); + snprintf(desc, desc_sz, "cifs:a:%pI4", &sa->sin_addr.s_addr); break; case AF_INET6: sa6 = (struct sockaddr_in6 *)&server->dstaddr; - sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); + snprintf(desc, desc_sz, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); break; default: cifs_dbg(FYI, "Bad ss_family (%hu)\n", @@ -2216,7 +2216,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) } /* didn't work, try to find a domain key */ - sprintf(desc, "cifs:d:%s", ses->domainName); + snprintf(desc, desc_sz, "cifs:d:%s", ses->domainName); cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc); key = request_key(&key_type_logon, desc, ""); if (IS_ERR(key)) { From 39195990e4c093c9eecf88f29811c6de29265214 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Feb 2026 06:10:08 -0600 Subject: [PATCH 504/828] PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fb82437fdd8c ("PCI: Change capability register offsets to hex") incorrectly converted the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value from decimal 52 to hex 0x32: -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ This broke PCI capabilities in a VMM because subsequent ones weren't DWORD-aligned. Change PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to the correct value of 0x34. fb82437fdd8c was from Baruch Siach , but this was not Baruch's fault; it's a mistake I made when applying the patch. Fixes: fb82437fdd8c ("PCI: Change capability register offsets to hex") Reported-by: David Woodhouse Closes: https://lore.kernel.org/all/3ae392a0158e9d9ab09a1d42150429dd8ca42791.camel@infradead.org Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/uapi/linux/pci_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ec1c54b5a310..14f634ab9350 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -712,7 +712,7 @@ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ #define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x34 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ From 550bae2c0931dbb664a61b08c21cf156f0a5362a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 12 Feb 2026 11:49:44 -0300 Subject: [PATCH 505/828] pmdomain: bcm: bcm2835-power: Fix broken reset status read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bcm2835_reset_status() has a misplaced parenthesis on every PM_READ() call. Since PM_READ(reg) expands to readl(power->base + (reg)), the expression: PM_READ(PM_GRAFX & PM_V3DRSTN) computes the bitwise AND of the register offset PM_GRAFX with the bitmask PM_V3DRSTN before using the result as a register offset, reading from the wrong MMIO address instead of the intended PM_GRAFX register. The same issue affects the PM_IMAGE cases. Fix by moving the closing parenthesis so PM_READ() receives only the register offset, and the bitmask is applied to the value returned by the read. Fixes: 670c672608a1 ("soc: bcm: bcm2835-pm: Add support for power domains under a new binding.") Signed-off-by: Maíra Canal Reviewed-by: Florian Fainelli Reviewed-by: Stefan Wahren Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/pmdomain/bcm/bcm2835-power.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c index 1d29addfe036..0450202bbee2 100644 --- a/drivers/pmdomain/bcm/bcm2835-power.c +++ b/drivers/pmdomain/bcm/bcm2835-power.c @@ -580,11 +580,11 @@ static int bcm2835_reset_status(struct reset_controller_dev *rcdev, switch (id) { case BCM2835_RESET_V3D: - return !PM_READ(PM_GRAFX & PM_V3DRSTN); + return !(PM_READ(PM_GRAFX) & PM_V3DRSTN); case BCM2835_RESET_H264: - return !PM_READ(PM_IMAGE & PM_H264RSTN); + return !(PM_READ(PM_IMAGE) & PM_H264RSTN); case BCM2835_RESET_ISP: - return !PM_READ(PM_IMAGE & PM_ISPRSTN); + return !(PM_READ(PM_IMAGE) & PM_ISPRSTN); default: return -EINVAL; } From e31b556c0ba21f20c298aa61181b96541140b7b9 Mon Sep 17 00:00:00 2001 From: Romain Sioen Date: Fri, 6 Feb 2026 17:32:58 +0100 Subject: [PATCH 506/828] HID: mcp2221: cancel last I2C command on read error When an I2C SMBus read operation fails, the MCP2221 internal state machine may not reset correctly, causing subsequent transactions to fail. By adding a short delay and explicitly cancelling the last command, we ensure the device is ready for the next operation. Fix an issue where i2cdetect was not able to detect all devices correctly on the bus. Signed-off-by: Romain Sioen Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index 33603b019f97..ef3b5c77c38e 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -353,6 +353,8 @@ static int mcp_i2c_smbus_read(struct mcp2221 *mcp, usleep_range(90, 100); retries++; } else { + usleep_range(980, 1000); + mcp_cancel_last_cmd(mcp); return ret; } } else { From 032e084f0d43fda78c33abfc704ac13a0891a6e7 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 27 Feb 2026 18:43:09 +0000 Subject: [PATCH 507/828] tools/sched_ext: fix strtoul() misuse in scx_hotplug_seq() scx_hotplug_seq() uses strtoul() but validates the result with a negative check (val < 0), which can never be true for an unsigned return value. Use the endptr mechanism to verify the entire string was consumed, and check errno == ERANGE for overflow detection. Signed-off-by: David Carlier Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/compat.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 8b4897fc8b99..edccc99c7294 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -125,6 +125,7 @@ static inline long scx_hotplug_seq(void) { int fd; char buf[32]; + char *endptr; ssize_t len; long val; @@ -137,8 +138,10 @@ static inline long scx_hotplug_seq(void) buf[len] = 0; close(fd); - val = strtoul(buf, NULL, 10); - SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val); + errno = 0; + val = strtoul(buf, &endptr, 10); + SCX_BUG_ON(errno == ERANGE || endptr == buf || + (*endptr != '\n' && *endptr != '\0'), "invalid num hotplug events: %ld", val); return val; } From 3d17d76d1ffb139a7492317b196ee03c8eabc9dc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 27 Feb 2026 09:07:45 -0800 Subject: [PATCH 508/828] io_uring/zcrx: don't set rx_page_size when not requested The rx_buf_len parameter was recently added to the Rx zero-copy implementation. The expectation is that when not set system will maintain previous behavior and use the default buffer size (PAGE_SIZE). This works correctly at the iouring level, but we don't preserve the same "zero means default" semantics when registering the memory provider on the netdev. mp_param.rx_page_size is unconditionally set to PAGE_SIZE. This causes __net_mp_open_rxq() to check for QCFG_RX_PAGE_SIZE support in the driver, and return -EOPNOTSUPP for drivers that don't advertise it -- even though the user never asked for large buffers. Only set mp_param.rx_page_size when rx_buf_len was explicitly provided, so that the default page size path works on all zcrx-capable drivers. mlx5 and fbnic only support 4kB pages in the current release. Fixes: 795663b4d160 ("io_uring/zcrx: implement large rx buffer support") Signed-off-by: Jakub Kicinski Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 97984a73a95d..19b287d21f4b 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -837,7 +837,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto netdev_put_unlock; - mp_param.rx_page_size = 1U << ifq->niov_shift; + if (reg.rx_buf_len) + mp_param.rx_page_size = 1U << ifq->niov_shift; mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL); From e6b899f08066e744f89df16ceb782e06868bd148 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Feb 2026 14:50:09 +0100 Subject: [PATCH 509/828] nsfs: tighten permission checks for ns iteration ioctls Even privileged services should not necessarily be able to see other privileged service's namespaces so they can't leak information to each other. Use may_see_all_namespaces() helper that centralizes this policy until the nstree adapts. Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-1-d2c2853313bd@kernel.org Fixes: a1d220d9dafa ("nsfs: iterate through mount namespaces") Reviewed-by: Jeff Layton Cc: stable@kernel.org # v6.12+ Signed-off-by: Christian Brauner --- fs/nsfs.c | 13 +++++++++++++ include/linux/ns_common.h | 2 ++ kernel/nscommon.c | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/fs/nsfs.c b/fs/nsfs.c index db91de208645..be36c10c38cf 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -199,6 +199,17 @@ static bool nsfs_ioctl_valid(unsigned int cmd) return false; } +static bool may_use_nsfs_ioctl(unsigned int cmd) +{ + switch (_IOC_NR(cmd)) { + case _IOC_NR(NS_MNT_GET_NEXT): + fallthrough; + case _IOC_NR(NS_MNT_GET_PREV): + return may_see_all_namespaces(); + } + return true; +} + static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -214,6 +225,8 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, if (!nsfs_ioctl_valid(ioctl)) return -ENOIOCTLCMD; + if (!may_use_nsfs_ioctl(ioctl)) + return -EPERM; ns = get_proc_ns(file_inode(filp)); switch (ioctl) { diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 825f5865bfc5..c8e227a3f9e2 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +bool may_see_all_namespaces(void); + static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) { return atomic_read(&ns->__ns_ref_active); diff --git a/kernel/nscommon.c b/kernel/nscommon.c index bdc3c86231d3..3166c1fd844a 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -309,3 +309,9 @@ void __ns_ref_active_get(struct ns_common *ns) return; } } + +bool may_see_all_namespaces(void) +{ + return (task_active_pid_ns(current) == &init_pid_ns) && + ns_capable_noaudit(init_pid_ns.user_ns, CAP_SYS_ADMIN); +} From d2324a9317f00013facb0ba00b00440e19d2af5e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Feb 2026 14:50:10 +0100 Subject: [PATCH 510/828] nsfs: tighten permission checks for handle opening Even privileged services should not necessarily be able to see other privileged service's namespaces so they can't leak information to each other. Use may_see_all_namespaces() helper that centralizes this policy until the nstree adapts. Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-2-d2c2853313bd@kernel.org Fixes: 5222470b2fbb ("nsfs: support file handles") Reviewed-by: Jeff Layton Cc: stable@kernel.org # v6.18+ Signed-off-by: Christian Brauner --- fs/nsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index be36c10c38cf..c215878d55e8 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -627,7 +627,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, return ERR_PTR(-EOPNOTSUPP); } - if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) { + if (owning_ns && !may_see_all_namespaces()) { ns->ops->put(ns); return ERR_PTR(-EPERM); } From 8d76afe84fa2babf604b3c173730d4d2b067e361 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Feb 2026 14:50:11 +0100 Subject: [PATCH 511/828] nstree: tighten permission checks for listing Even privileged services should not necessarily be able to see other privileged service's namespaces so they can't leak information to each other. Use may_see_all_namespaces() helper that centralizes this policy until the nstree adapts. Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-3-d2c2853313bd@kernel.org Fixes: 76b6f5dfb3fd ("nstree: add listns()") Reviewed-by: Jeff Layton Cc: stable@kernel.org # v6.19+ Signed-off-by: Christian Brauner --- kernel/nstree.c | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/kernel/nstree.c b/kernel/nstree.c index f36c59e6951d..6d12e5900ac0 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -515,32 +515,11 @@ static inline bool __must_check ns_requested(const struct klistns *kls, static inline bool __must_check may_list_ns(const struct klistns *kls, struct ns_common *ns) { - if (kls->user_ns) { - if (kls->userns_capable) - return true; - } else { - struct ns_common *owner; - struct user_namespace *user_ns; - - owner = ns_owner(ns); - if (owner) - user_ns = to_user_ns(owner); - else - user_ns = &init_user_ns; - if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN)) - return true; - } - + if (kls->user_ns && kls->userns_capable) + return true; if (is_current_namespace(ns)) return true; - - if (ns->ns_type != CLONE_NEWUSER) - return false; - - if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN)) - return true; - - return false; + return may_see_all_namespaces(); } static inline void ns_put(struct ns_common *ns) @@ -600,7 +579,7 @@ static ssize_t do_listns_userns(struct klistns *kls) ret = 0; head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head; - kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN); + kls->userns_capable = may_see_all_namespaces(); rcu_read_lock(); From 4c7b2ec23cc5d880e3ffe35e8c2aad686b67723a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Feb 2026 14:50:12 +0100 Subject: [PATCH 512/828] selftests: fix mntns iteration selftests Now that we changed permission checking make sure that we reflect that in the selftests. Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-4-d2c2853313bd@kernel.org Fixes: 9d87b1067382 ("selftests: add tests for mntns iteration") Reviewed-by: Jeff Layton Cc: stable@kernel.org # v6.14+ Signed-off-by: Christian Brauner --- .../filesystems/nsfs/iterate_mntns.c | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c index 61e55dfbf121..e19ff8168baf 100644 --- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c +++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c @@ -37,17 +37,20 @@ FIXTURE(iterate_mount_namespaces) { __u64 mnt_ns_id[MNT_NS_COUNT]; }; +static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) { + if (mnt_ns_id[i] == info->mnt_ns_id) + return true; + } + return false; +} + FIXTURE_SETUP(iterate_mount_namespaces) { for (int i = 0; i < MNT_NS_COUNT; i++) self->fd_mnt_ns[i] = -EBADF; - /* - * Creating a new user namespace let's us guarantee that we only see - * mount namespaces that we did actually create. - */ - ASSERT_EQ(unshare(CLONE_NEWUSER), 0); - for (int i = 0; i < MNT_NS_COUNT; i++) { struct mnt_ns_info info = {}; @@ -75,13 +78,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_forward) fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); ASSERT_GE(fd_mnt_ns_cur, 0); - for (;; count++) { + for (;;) { struct mnt_ns_info info = {}; int fd_mnt_ns_next; fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); if (fd_mnt_ns_next < 0 && errno == ENOENT) break; + if (mntns_in_list(self->mnt_ns_id, &info)) + count++; ASSERT_GE(fd_mnt_ns_next, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_next; @@ -96,13 +101,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_backwards) fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); ASSERT_GE(fd_mnt_ns_cur, 0); - for (;; count++) { + for (;;) { struct mnt_ns_info info = {}; int fd_mnt_ns_prev; fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); if (fd_mnt_ns_prev < 0 && errno == ENOENT) break; + if (mntns_in_list(self->mnt_ns_id, &info)) + count++; ASSERT_GE(fd_mnt_ns_prev, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_prev; @@ -125,7 +132,6 @@ TEST_F(iterate_mount_namespaces, iterate_forward) ASSERT_GE(fd_mnt_ns_next, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_next; - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); } } @@ -144,7 +150,6 @@ TEST_F(iterate_mount_namespaces, iterate_backward) ASSERT_GE(fd_mnt_ns_prev, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_prev; - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); } } From 1df97a7453eec80c1912c2d0360290a3970a7671 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 27 Feb 2026 14:48:01 -0800 Subject: [PATCH 513/828] bpf: Register dtor for freeing special fields There is a race window where BPF hash map elements can leak special fields if the program with access to the map value recreates these special fields between the check_and_free_fields done on the map value and its eventual return to the memory allocator. Several ways were explored prior to this patch, most notably [0] tried to use a poison value to reject attempts to recreate special fields for map values that have been logically deleted but still accessible to BPF programs (either while sitting in the free list or when reused). While this approach works well for task work, timers, wq, etc., it is harder to apply the idea to kptrs, which have a similar race and failure mode. Instead, we change bpf_mem_alloc to allow registering destructor for allocated elements, such that when they are returned to the allocator, any special fields created while they were accessible to programs in the mean time will be freed. If these values get reused, we do not free the fields again before handing the element back. The special fields thus may remain initialized while the map value sits in a free list. When bpf_mem_alloc is retired in the future, a similar concept can be introduced to kmalloc_nolock-backed kmem_cache, paired with the existing idea of a constructor. Note that the destructor registration happens in map_check_btf, after the BTF record is populated and (at that point) avaiable for inspection and duplication. Duplication is necessary since the freeing of embedded bpf_mem_alloc can be decoupled from actual map lifetime due to logic introduced to reduce the cost of rcu_barrier()s in mem alloc free path in 9f2c6e96c65e ("bpf: Optimize rcu_barrier usage between hash map and bpf_mem_alloc."). As such, once all callbacks are done, we must also free the duplicated record. To remove dependency on the bpf_map itself, also stash the key size of the map to obtain value from htab_elem long after the map is gone. [0]: https://lore.kernel.org/bpf/20260216131341.1285427-1-mykyta.yatsenko5@gmail.com Fixes: 14a324f6a67e ("bpf: Wire up freeing of referenced kptr") Fixes: 1bfbc267ec91 ("bpf: Enable bpf_timer and bpf_wq in any context") Reported-by: Alexei Starovoitov Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260227224806.646888-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_mem_alloc.h | 6 +++ kernel/bpf/hashtab.c | 87 +++++++++++++++++++++++++++++++++++ kernel/bpf/memalloc.c | 58 ++++++++++++++++++----- 3 files changed, 140 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index e45162ef59bb..4ce0d27f8ea2 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -14,6 +14,8 @@ struct bpf_mem_alloc { struct obj_cgroup *objcg; bool percpu; struct work_struct work; + void (*dtor_ctx_free)(void *ctx); + void *dtor_ctx; }; /* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects. @@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg /* The percpu allocation with a specific unit size. */ int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); +void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, + void (*dtor)(void *obj, void *ctx), + void (*dtor_ctx_free)(void *ctx), + void *ctx); /* Check the allocation size for kmalloc equivalent allocator */ int bpf_mem_alloc_check_size(bool percpu, size_t size); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3b9d297a53be..582f0192b7e1 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -125,6 +125,11 @@ struct htab_elem { char key[] __aligned(8); }; +struct htab_btf_record { + struct btf_record *record; + u32 key_size; +}; + static inline bool htab_is_prealloc(const struct bpf_htab *htab) { return !(htab->map.map_flags & BPF_F_NO_PREALLOC); @@ -457,6 +462,84 @@ static int htab_map_alloc_check(union bpf_attr *attr) return 0; } +static void htab_mem_dtor(void *obj, void *ctx) +{ + struct htab_btf_record *hrec = ctx; + struct htab_elem *elem = obj; + void *map_value; + + if (IS_ERR_OR_NULL(hrec->record)) + return; + + map_value = htab_elem_value(elem, hrec->key_size); + bpf_obj_free_fields(hrec->record, map_value); +} + +static void htab_pcpu_mem_dtor(void *obj, void *ctx) +{ + void __percpu *pptr = *(void __percpu **)obj; + struct htab_btf_record *hrec = ctx; + int cpu; + + if (IS_ERR_OR_NULL(hrec->record)) + return; + + for_each_possible_cpu(cpu) + bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu)); +} + +static void htab_dtor_ctx_free(void *ctx) +{ + struct htab_btf_record *hrec = ctx; + + btf_record_free(hrec->record); + kfree(ctx); +} + +static int htab_set_dtor(const struct bpf_htab *htab, void (*dtor)(void *, void *)) +{ + u32 key_size = htab->map.key_size; + const struct bpf_mem_alloc *ma; + struct htab_btf_record *hrec; + int err; + + /* No need for dtors. */ + if (IS_ERR_OR_NULL(htab->map.record)) + return 0; + + hrec = kzalloc(sizeof(*hrec), GFP_KERNEL); + if (!hrec) + return -ENOMEM; + hrec->key_size = key_size; + hrec->record = btf_record_dup(htab->map.record); + if (IS_ERR(hrec->record)) { + err = PTR_ERR(hrec->record); + kfree(hrec); + return err; + } + ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma; + /* Kinda sad, but cast away const-ness since we change ma->dtor. */ + bpf_mem_alloc_set_dtor((struct bpf_mem_alloc *)ma, dtor, htab_dtor_ctx_free, hrec); + return 0; +} + +static int htab_map_check_btf(const struct bpf_map *map, const struct btf *btf, + const struct btf_type *key_type, const struct btf_type *value_type) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + + if (htab_is_prealloc(htab)) + return 0; + /* + * We must set the dtor using this callback, as map's BTF record is not + * populated in htab_map_alloc(), so it will always appear as NULL. + */ + if (htab_is_percpu(htab)) + return htab_set_dtor(htab, htab_pcpu_mem_dtor); + else + return htab_set_dtor(htab, htab_mem_dtor); +} + static struct bpf_map *htab_map_alloc(union bpf_attr *attr) { bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -2281,6 +2364,7 @@ const struct bpf_map_ops htab_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], @@ -2303,6 +2387,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru), .map_btf_id = &htab_map_btf_ids[0], @@ -2482,6 +2567,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_percpu), .map_btf_id = &htab_map_btf_ids[0], @@ -2502,6 +2588,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru_percpu), .map_btf_id = &htab_map_btf_ids[0], diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index bd45dda9dc35..682a9f34214b 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -102,6 +102,8 @@ struct bpf_mem_cache { int percpu_size; bool draining; struct bpf_mem_cache *tgt; + void (*dtor)(void *obj, void *ctx); + void *dtor_ctx; /* list of objects to be freed after RCU GP */ struct llist_head free_by_rcu; @@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu) kfree(obj); } -static int free_all(struct llist_node *llnode, bool percpu) +static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu) { struct llist_node *pos, *t; int cnt = 0; llist_for_each_safe(pos, t, llnode) { + if (c->dtor) + c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx); free_one(pos, percpu); cnt++; } @@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace); - free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); + free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); atomic_set(&c->call_rcu_ttrace_in_progress, 0); } @@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c) if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) { if (unlikely(READ_ONCE(c->draining))) { llnode = llist_del_all(&c->free_by_rcu_ttrace); - free_all(llnode, !!c->percpu_size); + free_all(c, llnode, !!c->percpu_size); } return; } @@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c) dec_active(c, &flags); if (unlikely(READ_ONCE(c->draining))) { - free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); + free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } else { call_rcu_hurry(&c->rcu, __free_by_rcu); @@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c) * Except for waiting_for_gp_ttrace list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ - free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu); - free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu); - free_all(__llist_del_all(&c->free_llist), percpu); - free_all(__llist_del_all(&c->free_llist_extra), percpu); - free_all(__llist_del_all(&c->free_by_rcu), percpu); - free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu); - free_all(llist_del_all(&c->waiting_for_gp), percpu); + free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu); + free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu); + free_all(c, __llist_del_all(&c->free_llist), percpu); + free_all(c, __llist_del_all(&c->free_llist_extra), percpu); + free_all(c, __llist_del_all(&c->free_by_rcu), percpu); + free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu); + free_all(c, llist_del_all(&c->waiting_for_gp), percpu); } static void check_mem_cache(struct bpf_mem_cache *c) @@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma) static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) { + /* We can free dtor ctx only once all callbacks are done using it. */ + if (ma->dtor_ctx_free) + ma->dtor_ctx_free(ma->dtor_ctx); check_leaked_objs(ma); free_percpu(ma->cache); free_percpu(ma->caches); @@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size) return 0; } + +void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx), + void (*dtor_ctx_free)(void *ctx), void *ctx) +{ + struct bpf_mem_caches *cc; + struct bpf_mem_cache *c; + int cpu, i; + + ma->dtor_ctx_free = dtor_ctx_free; + ma->dtor_ctx = ctx; + + if (ma->cache) { + for_each_possible_cpu(cpu) { + c = per_cpu_ptr(ma->cache, cpu); + c->dtor = dtor; + c->dtor_ctx = ctx; + } + } + if (ma->caches) { + for_each_possible_cpu(cpu) { + cc = per_cpu_ptr(ma->caches, cpu); + for (i = 0; i < NUM_CACHES; i++) { + c = &cc->cache[i]; + c->dtor = dtor; + c->dtor_ctx = ctx; + } + } + } +} From ae51772b1e94ba1d76db19085957dbccac189c1c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 27 Feb 2026 14:48:02 -0800 Subject: [PATCH 514/828] bpf: Lose const-ness of map in map_check_btf() BPF hash map may now use the map_check_btf() callback to decide whether to set a dtor on its bpf_mem_alloc or not. Unlike C++ where members can opt out of const-ness using mutable, we must lose the const qualifier on the callback such that we can avoid the ugly cast. Make the change and adjust all existing users, and lose the comment in hashtab.c. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260227224806.646888-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- include/linux/bpf_local_storage.h | 2 +- kernel/bpf/arena.c | 2 +- kernel/bpf/arraymap.c | 2 +- kernel/bpf/bloom_filter.c | 2 +- kernel/bpf/bpf_insn_array.c | 2 +- kernel/bpf/bpf_local_storage.c | 2 +- kernel/bpf/hashtab.c | 9 ++++----- kernel/bpf/local_storage.c | 2 +- kernel/bpf/lpm_trie.c | 2 +- kernel/bpf/syscall.c | 2 +- 11 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b78b53198a2e..05b34a6355b0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -124,7 +124,7 @@ struct bpf_map_ops { u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); - int (*map_check_btf)(const struct bpf_map *map, + int (*map_check_btf)(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map) map->ops->map_seq_show_elem; } -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 85efa9772530..8157e8da61d4 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -176,7 +176,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache); -int bpf_local_storage_map_check_btf(const struct bpf_map *map, +int bpf_local_storage_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 144f30e740e8..f355cf1c1a16 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key, return -EOPNOTSUPP; } -static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { return 0; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 26763df6134a..33de68c95ad8 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } -static int array_map_check_btf(const struct bpf_map *map, +static int array_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 35e1ddca74d2..b73336c976b7 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key, return -EINVAL; } -static int bloom_map_check_btf(const struct bpf_map *map, +static int bloom_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c index c0286f25ca3c..a2f84afe6f7c 100644 --- a/kernel/bpf/bpf_insn_array.c +++ b/kernel/bpf/bpf_insn_array.c @@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } -static int insn_array_check_btf(const struct bpf_map *map, +static int insn_array_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b28f07d3a0db..2bf5ca5ae0df 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -797,7 +797,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -int bpf_local_storage_map_check_btf(const struct bpf_map *map, +int bpf_local_storage_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 582f0192b7e1..bc6bc8bb871d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -496,10 +496,10 @@ static void htab_dtor_ctx_free(void *ctx) kfree(ctx); } -static int htab_set_dtor(const struct bpf_htab *htab, void (*dtor)(void *, void *)) +static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *)) { u32 key_size = htab->map.key_size; - const struct bpf_mem_alloc *ma; + struct bpf_mem_alloc *ma; struct htab_btf_record *hrec; int err; @@ -518,12 +518,11 @@ static int htab_set_dtor(const struct bpf_htab *htab, void (*dtor)(void *, void return err; } ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma; - /* Kinda sad, but cast away const-ness since we change ma->dtor. */ - bpf_mem_alloc_set_dtor((struct bpf_mem_alloc *)ma, dtor, htab_dtor_ctx_free, hrec); + bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec); return 0; } -static int htab_map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 1ccbf28b2ad9..8fca0c64f7b1 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } -static int cgroup_storage_check_btf(const struct bpf_map *map, +static int cgroup_storage_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1adeb4d3b8cf..0f57608b385d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -751,7 +751,7 @@ free_stack: return err; } -static int trie_check_btf(const struct bpf_map *map, +static int trie_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0378e83b4099..274039e36465 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) } EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) From f41deee082dc7b1c198de21710cb5cb9c604cae0 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 27 Feb 2026 14:48:03 -0800 Subject: [PATCH 515/828] bpf: Delay freeing fields in local storage Currently, when use_kmalloc_nolock is false, the freeing of fields for a local storage selem is done eagerly before waiting for the RCU or RCU tasks trace grace period to elapse. This opens up a window where the program which has access to the selem can recreate the fields after the freeing of fields is done eagerly, causing memory leaks when the element is finally freed and returned to the kernel. Make a few changes to address this. First, delay the freeing of fields until after the grace periods have expired using a __bpf_selem_free_rcu wrapper which is eventually invoked after transitioning through the necessary number of grace period waits. Replace usage of the kfree_rcu with call_rcu to be able to take a custom callback. Finally, care needs to be taken to extend the rcu barriers for all cases, and not just when use_kmalloc_nolock is true, as RCU and RCU tasks trace callbacks can be in flight for either case and access the smap field, which is used to obtain the BTF record to walk over special fields in the map value. While we're at it, drop migrate_disable() from bpf_selem_free_rcu, since migration should be disabled for RCU callbacks already. Fixes: 9bac675e6368 ("bpf: Postpone bpf_obj_free_fields to the rcu callback") Reviewed-by: Amery Hung Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260227224806.646888-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 42 ++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 2bf5ca5ae0df..d7db1ada2dad 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -164,16 +164,28 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, bpf_local_storage_free_trace_rcu); } +/* rcu callback for use_kmalloc_nolock == false */ +static void __bpf_selem_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */ + smap = rcu_dereference_check(SDATA(selem)->smap, 1); + + if (smap) + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + kfree(selem); +} + /* rcu tasks trace callback for use_kmalloc_nolock == false */ static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) { - struct bpf_local_storage_elem *selem; - - selem = container_of(rcu, struct bpf_local_storage_elem, rcu); if (rcu_trace_implies_rcu_gp()) - kfree(selem); + __bpf_selem_free_rcu(rcu); else - kfree_rcu(selem, rcu); + call_rcu(rcu, __bpf_selem_free_rcu); } /* Handle use_kmalloc_nolock == false */ @@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem, bool vanilla_rcu) { if (vanilla_rcu) - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, __bpf_selem_free_rcu); else call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); } @@ -195,11 +207,8 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) /* The bpf_local_storage_map_free will wait for rcu_barrier */ smap = rcu_dereference_check(SDATA(selem)->smap, 1); - if (smap) { - migrate_disable(); + if (smap) bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - migrate_enable(); - } kfree_nolock(selem); } @@ -214,18 +223,12 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) void bpf_selem_free(struct bpf_local_storage_elem *selem, bool reuse_now) { - struct bpf_local_storage_map *smap; - - smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - if (!selem->use_kmalloc_nolock) { /* * No uptr will be unpin even when reuse_now == false since uptr * is only supported in task local storage, where * smap->use_kmalloc_nolock == true. */ - if (smap) - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); __bpf_selem_free(selem, reuse_now); return; } @@ -958,10 +961,9 @@ restart: */ synchronize_rcu(); - if (smap->use_kmalloc_nolock) { - rcu_barrier_tasks_trace(); - rcu_barrier(); - } + /* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */ + rcu_barrier_tasks_trace(); + rcu_barrier(); kvfree(smap->buckets); bpf_map_area_free(smap); } From baa35b3cb6b642b903162aacff13181e170c4ecc Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 27 Feb 2026 14:48:04 -0800 Subject: [PATCH 516/828] bpf: Retire rcu_trace_implies_rcu_gp() from local storage This assumption will always hold going forward, hence just remove the various checks and assume it is true with a comment for the uninformed reader. Reviewed-by: Paul E. McKenney Reviewed-by: Amery Hung Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260227224806.646888-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index d7db1ada2dad..9c96a4477f81 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; - /* If RCU Tasks Trace grace period implies RCU grace period, do - * kfree(), else do kfree_rcu(). + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. */ local_storage = container_of(rcu, struct bpf_local_storage, rcu); - if (rcu_trace_implies_rcu_gp()) - kfree(local_storage); - else - kfree_rcu(local_storage, rcu); + kfree(local_storage); } /* Handle use_kmalloc_nolock == false */ @@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - bpf_local_storage_free_rcu(rcu); - else - call_rcu(rcu, bpf_local_storage_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + bpf_local_storage_free_rcu(rcu); } static void bpf_local_storage_free(struct bpf_local_storage *local_storage, @@ -182,10 +181,11 @@ static void __bpf_selem_free_rcu(struct rcu_head *rcu) /* rcu tasks trace callback for use_kmalloc_nolock == false */ static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - __bpf_selem_free_rcu(rcu); - else - call_rcu(rcu, __bpf_selem_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + __bpf_selem_free_rcu(rcu); } /* Handle use_kmalloc_nolock == false */ @@ -214,10 +214,11 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_rcu(rcu); - else - call_rcu(rcu, bpf_selem_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + bpf_selem_free_rcu(rcu); } void bpf_selem_free(struct bpf_local_storage_elem *selem, From 2939d7b3b0e5f35359ce8f69dbbad0bfc4e920b6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 27 Feb 2026 14:48:05 -0800 Subject: [PATCH 517/828] selftests/bpf: Add tests for special fields races Add a couple of tests to ensure that the refcount drops to zero when we exercise the race where creation of a special field succeeds the logical bpf_obj_free_fields done when deleting an element. Prior to previous changes, the fields would be freed eagerly and repopulate and end up leaking, causing the reference to not drop down correctly. Running this test on a kernel without fixes will cause a hang in delete_module, since the module reference stays active due to the leaked kptr not dropping it. After the fixes tests succeed as expected. Reviewed-by: Amery Hung Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260227224806.646888-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/map_kptr_race.c | 218 ++++++++++++++++++ .../selftests/bpf/progs/map_kptr_race.c | 197 ++++++++++++++++ 2 files changed, 415 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_kptr_race.c create mode 100644 tools/testing/selftests/bpf/progs/map_kptr_race.c diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c new file mode 100644 index 000000000000..506ed55e8528 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include + +#include "map_kptr_race.skel.h" + +static int get_map_id(int map_fd) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + if (!ASSERT_OK(bpf_map_get_info_by_fd(map_fd, &info, &len), "get_map_info")) + return -1; + return info.id; +} + +static int read_refs(struct map_kptr_race *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &opts); + if (!ASSERT_OK(ret, "count_ref run")) + return -1; + if (!ASSERT_OK(opts.retval, "count_ref retval")) + return -1; + return skel->bss->num_of_refs; +} + +static void test_htab_leak(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct map_kptr_race *skel, *watcher; + int ret, map_id; + + skel = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_htab_leak), &opts); + if (!ASSERT_OK(ret, "test_htab_leak run")) + goto out_skel; + if (!ASSERT_OK(opts.retval, "test_htab_leak retval")) + goto out_skel; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_hash_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free); + if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "htab refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + map_kptr_race__destroy(skel); +} + +static void test_percpu_htab_leak(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct map_kptr_race *skel, *watcher; + int ret, map_id; + + skel = map_kptr_race__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + if (skel->rodata->nr_cpus > 16) + skel->rodata->nr_cpus = 16; + + ret = map_kptr_race__load(skel); + if (!ASSERT_OK(ret, "load")) + goto out_skel; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_percpu_htab_leak), &opts); + if (!ASSERT_OK(ret, "test_percpu_htab_leak run")) + goto out_skel; + if (!ASSERT_OK(opts.retval, "test_percpu_htab_leak retval")) + goto out_skel; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_percpu_hash_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free); + if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "percpu_htab refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + map_kptr_race__destroy(skel); +} + +static void test_sk_ls_leak(void) +{ + struct map_kptr_race *skel, *watcher; + int listen_fd = -1, client_fd = -1, map_id; + + skel = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (!ASSERT_OK(map_kptr_race__attach(skel), "attach")) + goto out_skel; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(listen_fd, 0, "start_server")) + goto out_skel; + + client_fd = connect_to_fd(listen_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out_skel; + + if (!ASSERT_EQ(skel->bss->sk_ls_leak_done, 1, "sk_ls_leak_done")) + goto out_skel; + + close(client_fd); + client_fd = -1; + close(listen_fd); + listen_fd = -1; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_sk_ls_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.sk_map_free = bpf_program__attach(watcher->progs.sk_map_free); + if (!ASSERT_OK_PTR(watcher->links.sk_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "sk_ls refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + if (client_fd >= 0) + close(client_fd); + if (listen_fd >= 0) + close(listen_fd); + map_kptr_race__destroy(skel); +} + +void serial_test_map_kptr_race(void) +{ + if (test__start_subtest("htab_leak")) + test_htab_leak(); + if (test__start_subtest("percpu_htab_leak")) + test_percpu_htab_leak(); + if (test__start_subtest("sk_ls_leak")) + test_sk_ls_leak(); +} diff --git a/tools/testing/selftests/bpf/progs/map_kptr_race.c b/tools/testing/selftests/bpf/progs/map_kptr_race.c new file mode 100644 index 000000000000..f6f136cd8f60 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_kptr_race.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include "../test_kmods/bpf_testmod_kfunc.h" + +struct map_value { + struct prog_test_ref_kfunc __kptr *ref_ptr; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} race_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} race_percpu_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} race_sk_ls_map SEC(".maps"); + +int num_of_refs; +int sk_ls_leak_done; +int target_map_id; +int map_freed; +const volatile int nr_cpus; + +SEC("tc") +int test_htab_leak(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *p, *old; + struct map_value val = {}; + struct map_value *v; + int key = 0; + + if (bpf_map_update_elem(&race_hash_map, &key, &val, BPF_ANY)) + return 1; + + v = bpf_map_lookup_elem(&race_hash_map, &key); + if (!v) + return 2; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 3; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + bpf_map_delete_elem(&race_hash_map, &key); + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 4; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + return 0; +} + +static int fill_percpu_kptr(struct map_value *v) +{ + struct prog_test_ref_kfunc *p, *old; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 1; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + return 0; +} + +SEC("tc") +int test_percpu_htab_leak(struct __sk_buff *skb) +{ + struct map_value *v, *arr[16] = {}; + struct map_value val = {}; + int key = 0; + int err = 0; + + if (bpf_map_update_elem(&race_percpu_hash_map, &key, &val, BPF_ANY)) + return 1; + + for (int i = 0; i < nr_cpus; i++) { + v = bpf_map_lookup_percpu_elem(&race_percpu_hash_map, &key, i); + if (!v) + return 2; + arr[i] = v; + } + + bpf_map_delete_elem(&race_percpu_hash_map, &key); + + for (int i = 0; i < nr_cpus; i++) { + v = arr[i]; + err = fill_percpu_kptr(v); + if (err) + return 3; + } + + return 0; +} + +SEC("tp_btf/inet_sock_set_state") +int BPF_PROG(test_sk_ls_leak, struct sock *sk, int oldstate, int newstate) +{ + struct prog_test_ref_kfunc *p, *old; + struct map_value *v; + + if (newstate != BPF_TCP_SYN_SENT) + return 0; + + if (sk_ls_leak_done) + return 0; + + v = bpf_sk_storage_get(&race_sk_ls_map, sk, NULL, + BPF_SK_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 0; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + bpf_sk_storage_delete(&race_sk_ls_map, sk); + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 0; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + sk_ls_leak_done = 1; + return 0; +} + +long target_map_ptr; + +SEC("fentry/bpf_map_put") +int BPF_PROG(map_put, struct bpf_map *map) +{ + if (target_map_id && map->id == (u32)target_map_id) + target_map_ptr = (long)map; + return 0; +} + +SEC("fexit/htab_map_free") +int BPF_PROG(htab_map_free, struct bpf_map *map) +{ + if (target_map_ptr && (long)map == target_map_ptr) + map_freed = 1; + return 0; +} + +SEC("fexit/bpf_sk_storage_map_free") +int BPF_PROG(sk_map_free, struct bpf_map *map) +{ + if (target_map_ptr && (long)map == target_map_ptr) + map_freed = 1; + return 0; +} + +SEC("syscall") +int count_ref(void *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long arg = 0; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 1; + + num_of_refs = p->cnt.refs.counter; + + bpf_kfunc_call_test_release(p); + return 0; +} + +char _license[] SEC("license") = "GPL"; From 869c63d5975d55e97f6b168e885452b3da20ea47 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 25 Feb 2026 20:14:55 +0800 Subject: [PATCH 518/828] bpf: Fix race in cpumap on PREEMPT_RT On PREEMPT_RT kernels, the per-CPU xdp_bulk_queue (bq) can be accessed concurrently by multiple preemptible tasks on the same CPU. The original code assumes bq_enqueue() and __cpu_map_flush() run atomically with respect to each other on the same CPU, relying on local_bh_disable() to prevent preemption. However, on PREEMPT_RT, local_bh_disable() only calls migrate_disable() (when PREEMPT_RT_NEEDS_BH_LOCK is not set) and does not disable preemption, which allows CFS scheduling to preempt a task during bq_flush_to_queue(), enabling another task on the same CPU to enter bq_enqueue() and operate on the same per-CPU bq concurrently. This leads to several races: 1. Double __list_del_clearprev(): after bq->count is reset in bq_flush_to_queue(), a preempting task can call bq_enqueue() -> bq_flush_to_queue() on the same bq when bq->count reaches CPU_MAP_BULK_SIZE. Both tasks then call __list_del_clearprev() on the same bq->flush_node, the second call dereferences the prev pointer that was already set to NULL by the first. 2. bq->count and bq->q[] races: concurrent bq_enqueue() can corrupt the packet queue while bq_flush_to_queue() is processing it. The race between task A (__cpu_map_flush -> bq_flush_to_queue) and task B (bq_enqueue -> bq_flush_to_queue) on the same CPU: Task A (xdp_do_flush) Task B (cpu_map_enqueue) ---------------------- ------------------------ bq_flush_to_queue(bq) spin_lock(&q->producer_lock) /* flush bq->q[] to ptr_ring */ bq->count = 0 spin_unlock(&q->producer_lock) bq_enqueue(rcpu, xdpf) <-- CFS preempts Task A --> bq->q[bq->count++] = xdpf /* ... more enqueues until full ... */ bq_flush_to_queue(bq) spin_lock(&q->producer_lock) /* flush to ptr_ring */ spin_unlock(&q->producer_lock) __list_del_clearprev(flush_node) /* sets flush_node.prev = NULL */ <-- Task A resumes --> __list_del_clearprev(flush_node) flush_node.prev->next = ... /* prev is NULL -> kernel oops */ Fix this by adding a local_lock_t to xdp_bulk_queue and acquiring it in bq_enqueue() and __cpu_map_flush(). These paths already run under local_bh_disable(), so use local_lock_nested_bh() which on non-RT is a pure annotation with no overhead, and on PREEMPT_RT provides a per-CPU sleeping lock that serializes access to the bq. To reproduce, insert an mdelay(100) between bq->count = 0 and __list_del_clearprev() in bq_flush_to_queue(), then run reproducer provided by syzkaller. Fixes: 3253cb49cbad ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT") Reported-by: syzbot+2b3391f44313b3983e91@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69369331.a70a0220.38f243.009d.GAE@google.com/T/ Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Jiayuan Chen Signed-off-by: Jiayuan Chen Link: https://lore.kernel.org/r/20260225121459.183121-2-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 04171fbc39cb..32b43cb9061b 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,7 @@ struct xdp_bulk_queue { struct list_head flush_node; struct bpf_cpu_map_entry *obj; unsigned int count; + local_lock_t bq_lock; }; /* Struct for every remote "destination" CPU in map */ @@ -451,6 +453,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, for_each_possible_cpu(i) { bq = per_cpu_ptr(rcpu->bulkq, i); bq->obj = rcpu; + local_lock_init(&bq->bq_lock); } /* Alloc queue */ @@ -722,6 +725,8 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) struct ptr_ring *q; int i; + lockdep_assert_held(&bq->bq_lock); + if (unlikely(!bq->count)) return; @@ -749,11 +754,15 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) } /* Runs under RCU-read-side, plus in softirq under NAPI protection. - * Thus, safe percpu variable access. + * Thus, safe percpu variable access. PREEMPT_RT relies on + * local_lock_nested_bh() to serialise access to the per-CPU bq. */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); + struct xdp_bulk_queue *bq; + + local_lock_nested_bh(&rcpu->bulkq->bq_lock); + bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) bq_flush_to_queue(bq); @@ -774,6 +783,8 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) list_add(&bq->flush_node, flush_list); } + + local_unlock_nested_bh(&rcpu->bulkq->bq_lock); } int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -810,7 +821,9 @@ void __cpu_map_flush(struct list_head *flush_list) struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { + local_lock_nested_bh(&bq->obj->bulkq->bq_lock); bq_flush_to_queue(bq); + local_unlock_nested_bh(&bq->obj->bulkq->bq_lock); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(bq->obj->kthread); From 1872e75375c40add4a35990de3be77b5741c252c Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 25 Feb 2026 20:14:56 +0800 Subject: [PATCH 519/828] bpf: Fix race in devmap on PREEMPT_RT On PREEMPT_RT kernels, the per-CPU xdp_dev_bulk_queue (bq) can be accessed concurrently by multiple preemptible tasks on the same CPU. The original code assumes bq_enqueue() and __dev_flush() run atomically with respect to each other on the same CPU, relying on local_bh_disable() to prevent preemption. However, on PREEMPT_RT, local_bh_disable() only calls migrate_disable() (when PREEMPT_RT_NEEDS_BH_LOCK is not set) and does not disable preemption, which allows CFS scheduling to preempt a task during bq_xmit_all(), enabling another task on the same CPU to enter bq_enqueue() and operate on the same per-CPU bq concurrently. This leads to several races: 1. Double-free / use-after-free on bq->q[]: bq_xmit_all() snapshots cnt = bq->count, then iterates bq->q[0..cnt-1] to transmit frames. If preempted after the snapshot, a second task can call bq_enqueue() -> bq_xmit_all() on the same bq, transmitting (and freeing) the same frames. When the first task resumes, it operates on stale pointers in bq->q[], causing use-after-free. 2. bq->count and bq->q[] corruption: concurrent bq_enqueue() modifying bq->count and bq->q[] while bq_xmit_all() is reading them. 3. dev_rx/xdp_prog teardown race: __dev_flush() clears bq->dev_rx and bq->xdp_prog after bq_xmit_all(). If preempted between bq_xmit_all() return and bq->dev_rx = NULL, a preempting bq_enqueue() sees dev_rx still set (non-NULL), skips adding bq to the flush_list, and enqueues a frame. When __dev_flush() resumes, it clears dev_rx and removes bq from the flush_list, orphaning the newly enqueued frame. 4. __list_del_clearprev() on flush_node: similar to the cpumap race, both tasks can call __list_del_clearprev() on the same flush_node, the second dereferences the prev pointer already set to NULL. The race between task A (__dev_flush -> bq_xmit_all) and task B (bq_enqueue -> bq_xmit_all) on the same CPU: Task A (xdp_do_flush) Task B (ndo_xdp_xmit redirect) ---------------------- -------------------------------- __dev_flush(flush_list) bq_xmit_all(bq) cnt = bq->count /* e.g. 16 */ /* start iterating bq->q[] */ <-- CFS preempts Task A --> bq_enqueue(dev, xdpf) bq->count == DEV_MAP_BULK_SIZE bq_xmit_all(bq, 0) cnt = bq->count /* same 16! */ ndo_xdp_xmit(bq->q[]) /* frames freed by driver */ bq->count = 0 <-- Task A resumes --> ndo_xdp_xmit(bq->q[]) /* use-after-free: frames already freed! */ Fix this by adding a local_lock_t to xdp_dev_bulk_queue and acquiring it in bq_enqueue() and __dev_flush(). These paths already run under local_bh_disable(), so use local_lock_nested_bh() which on non-RT is a pure annotation with no overhead, and on PREEMPT_RT provides a per-CPU sleeping lock that serializes access to the bq. Fixes: 3253cb49cbad ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT") Reported-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Jiayuan Chen Signed-off-by: Jiayuan Chen Link: https://lore.kernel.org/r/20260225121459.183121-3-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2984e938f94d..3d619d01088e 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -45,6 +45,7 @@ * types of devmap; only the lookup and insertion is different. */ #include +#include #include #include #include @@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue { struct net_device *dev_rx; struct bpf_prog *xdp_prog; unsigned int count; + local_lock_t bq_lock; }; struct bpf_dtab_netdev { @@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) int to_send = cnt; int i; + lockdep_assert_held(&bq->bq_lock); + if (unlikely(!cnt)) return; @@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list) struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { + local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock); bq_xmit_all(bq, XDP_XMIT_FLUSH); bq->dev_rx = NULL; bq->xdp_prog = NULL; __list_del_clearprev(&bq->flush_node); + local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock); } } @@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key) /* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu * variable access, and map elements stick around. See comment above - * xdp_do_flush() in filter.c. + * xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh() + * to serialise access to the per-CPU bq. */ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); + struct xdp_dev_bulk_queue *bq; + + local_lock_nested_bh(&dev->xdp_bulkq->bq_lock); + bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) bq_xmit_all(bq, 0); @@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, } bq->q[bq->count++] = xdpf; + + local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock); } static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, @@ -1127,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier, if (!netdev->xdp_bulkq) return NOTIFY_BAD; - for_each_possible_cpu(cpu) - per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev; + for_each_possible_cpu(cpu) { + struct xdp_dev_bulk_queue *bq; + + bq = per_cpu_ptr(netdev->xdp_bulkq, cpu); + bq->dev = netdev; + local_lock_init(&bq->bq_lock); + } break; case NETDEV_UNREGISTER: /* This rcu_read_lock/unlock pair is needed because From 62413a9c3cb183afb9bb6e94dd68caf4e4145f4c Mon Sep 17 00:00:00 2001 From: Paul Moses Date: Mon, 23 Feb 2026 15:05:44 +0000 Subject: [PATCH 520/828] net/sched: act_gate: snapshot parameters with RCU on replace The gate action can be replaced while the hrtimer callback or dump path is walking the schedule list. Convert the parameters to an RCU-protected snapshot and swap updates under tcf_lock, freeing the previous snapshot via call_rcu(). When REPLACE omits the entry list, preserve the existing schedule so the effective state is unchanged. Fixes: a51c328df310 ("net: qos: introduce a gate control flow action") Cc: stable@vger.kernel.org Signed-off-by: Paul Moses Tested-by: Vladimir Oltean Acked-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260223150512.2251594-2-p@1g4.org Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_gate.h | 33 ++++- net/sched/act_gate.c | 267 ++++++++++++++++++++++++----------- 2 files changed, 213 insertions(+), 87 deletions(-) diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index b147a3bb1a46..e0fded18e18c 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -51,47 +52,65 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) +static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + + return rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); +} + static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = tcf_gate_params_locked(a); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = tcf_gate_params_locked(a); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = tcf_gate_params_locked(a); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = tcf_gate_params_locked(a); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = tcf_gate_params_locked(a); + num_entries = p->num_entries; return num_entries; } @@ -105,7 +124,7 @@ static inline struct action_gate_entry u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = tcf_gate_params_locked(a); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list) diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 686eaed81b81..fdbfcaa3e2ab 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -32,9 +32,12 @@ static ktime_t gate_get_time(struct tcf_gate *gact) return KTIME_MAX; } -static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +static void tcf_gate_params_free_rcu(struct rcu_head *head); + +static void gate_get_start_time(struct tcf_gate *gact, + const struct tcf_gate_params *param, + ktime_t *start) { - struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; u64 n; @@ -69,12 +72,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) { struct tcf_gate *gact = container_of(timer, struct tcf_gate, hitimer); - struct tcf_gate_params *p = &gact->param; struct tcfg_gate_entry *next; + struct tcf_gate_params *p; ktime_t close_time, now; spin_lock(&gact->tcf_lock); + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); next = gact->next_entry; /* cycle start, clear pending bit, clear total octets */ @@ -225,6 +230,35 @@ static void release_entry_list(struct list_head *entries) } } +static int tcf_gate_copy_entries(struct tcf_gate_params *dst, + const struct tcf_gate_params *src, + struct netlink_ext_ack *extack) +{ + struct tcfg_gate_entry *entry; + int i = 0; + + list_for_each_entry(entry, &src->entries, list) { + struct tcfg_gate_entry *new; + + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + NL_SET_ERR_MSG(extack, "Not enough memory for entry"); + return -ENOMEM; + } + + new->index = entry->index; + new->gate_state = entry->gate_state; + new->interval = entry->interval; + new->ipv = entry->ipv; + new->maxoctets = entry->maxoctets; + list_add_tail(&new->list, &dst->entries); + i++; + } + + dst->num_entries = i; + return 0; +} + static int parse_gate_list(struct nlattr *list_attr, struct tcf_gate_params *sched, struct netlink_ext_ack *extack) @@ -270,24 +304,44 @@ release_list: return err; } -static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, - enum tk_offsets tko, s32 clockid, - bool do_init) +static bool gate_timer_needs_cancel(u64 basetime, u64 old_basetime, + enum tk_offsets tko, + enum tk_offsets old_tko, + s32 clockid, s32 old_clockid) { - if (!do_init) { - if (basetime == gact->param.tcfg_basetime && - tko == gact->tk_offset && - clockid == gact->param.tcfg_clockid) - return; + return basetime != old_basetime || + clockid != old_clockid || + tko != old_tko; +} - spin_unlock_bh(&gact->tcf_lock); - hrtimer_cancel(&gact->hitimer); - spin_lock_bh(&gact->tcf_lock); +static int gate_clock_resolve(s32 clockid, enum tk_offsets *tko, + struct netlink_ext_ack *extack) +{ + switch (clockid) { + case CLOCK_REALTIME: + *tko = TK_OFFS_REAL; + return 0; + case CLOCK_MONOTONIC: + *tko = TK_OFFS_MAX; + return 0; + case CLOCK_BOOTTIME: + *tko = TK_OFFS_BOOT; + return 0; + case CLOCK_TAI: + *tko = TK_OFFS_TAI; + return 0; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + return -EINVAL; } - gact->param.tcfg_basetime = basetime; - gact->param.tcfg_clockid = clockid; - gact->tk_offset = tko; - hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); +} + +static void gate_setup_timer(struct tcf_gate *gact, s32 clockid, + enum tk_offsets tko) +{ + WRITE_ONCE(gact->tk_offset, tko); + hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, + HRTIMER_MODE_ABS_SOFT); } static int tcf_gate_init(struct net *net, struct nlattr *nla, @@ -296,15 +350,22 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); - enum tk_offsets tk_offset = TK_OFFS_TAI; + u64 cycletime = 0, basetime = 0, cycletime_ext = 0; + struct tcf_gate_params *p = NULL, *old_p = NULL; + enum tk_offsets old_tk_offset = TK_OFFS_TAI; + const struct tcf_gate_params *cur_p = NULL; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; + enum tk_offsets tko = TK_OFFS_TAI; struct tcf_chain *goto_ch = NULL; - u64 cycletime = 0, basetime = 0; - struct tcf_gate_params *p; + s32 timer_clockid = CLOCK_TAI; + bool use_old_entries = false; + s32 old_clockid = CLOCK_TAI; + bool need_cancel = false; s32 clockid = CLOCK_TAI; struct tcf_gate *gact; struct tc_gate *parm; + u64 old_basetime = 0; int ret = 0, err; u32 gflags = 0; s32 prio = -1; @@ -321,26 +382,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!tb[TCA_GATE_PARMS]) return -EINVAL; - if (tb[TCA_GATE_CLOCKID]) { + if (tb[TCA_GATE_CLOCKID]) clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); - switch (clockid) { - case CLOCK_REALTIME: - tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - return -EINVAL; - } - } parm = nla_data(tb[TCA_GATE_PARMS]); index = parm->index; @@ -366,6 +409,60 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return -EEXIST; } + gact = to_gate(*a); + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + err = -ENOMEM; + goto chain_put; + } + INIT_LIST_HEAD(&p->entries); + + use_old_entries = !tb[TCA_GATE_ENTRY_LIST]; + if (!use_old_entries) { + err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); + if (err < 0) + goto err_free; + use_old_entries = !err; + } + + if (ret == ACT_P_CREATED && use_old_entries) { + NL_SET_ERR_MSG(extack, "The entry list is empty"); + err = -EINVAL; + goto err_free; + } + + if (ret != ACT_P_CREATED) { + rcu_read_lock(); + cur_p = rcu_dereference(gact->param); + + old_basetime = cur_p->tcfg_basetime; + old_clockid = cur_p->tcfg_clockid; + old_tk_offset = READ_ONCE(gact->tk_offset); + + basetime = old_basetime; + cycletime_ext = cur_p->tcfg_cycletime_ext; + prio = cur_p->tcfg_priority; + gflags = cur_p->tcfg_flags; + + if (!tb[TCA_GATE_CLOCKID]) + clockid = old_clockid; + + err = 0; + if (use_old_entries) { + err = tcf_gate_copy_entries(p, cur_p, extack); + if (!err && !tb[TCA_GATE_CYCLE_TIME]) + cycletime = cur_p->tcfg_cycletime; + } + rcu_read_unlock(); + if (err) + goto err_free; + } + if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); @@ -375,25 +472,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); - gact = to_gate(*a); - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&gact->param.entries); - - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); - if (err < 0) - goto release_idr; - - spin_lock_bh(&gact->tcf_lock); - p = &gact->param; - if (tb[TCA_GATE_CYCLE_TIME]) cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); - if (tb[TCA_GATE_ENTRY_LIST]) { - err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); - if (err < 0) - goto chain_put; - } + if (tb[TCA_GATE_CYCLE_TIME_EXT]) + cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + + err = gate_clock_resolve(clockid, &tko, extack); + if (err) + goto err_free; + timer_clockid = clockid; + + need_cancel = ret != ACT_P_CREATED && + gate_timer_needs_cancel(basetime, old_basetime, + tko, old_tk_offset, + timer_clockid, old_clockid); + + if (need_cancel) + hrtimer_cancel(&gact->hitimer); + + spin_lock_bh(&gact->tcf_lock); if (!cycletime) { struct tcfg_gate_entry *entry; @@ -402,22 +500,20 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, list_for_each_entry(entry, &p->entries, list) cycle = ktime_add_ns(cycle, entry->interval); cycletime = cycle; - if (!cycletime) { - err = -EINVAL; - goto chain_put; - } } p->tcfg_cycletime = cycletime; + p->tcfg_cycletime_ext = cycletime_ext; - if (tb[TCA_GATE_CYCLE_TIME_EXT]) - p->tcfg_cycletime_ext = - nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); - - gate_setup_timer(gact, basetime, tk_offset, clockid, - ret == ACT_P_CREATED); + if (need_cancel || ret == ACT_P_CREATED) + gate_setup_timer(gact, timer_clockid, tko); p->tcfg_priority = prio; p->tcfg_flags = gflags; - gate_get_start_time(gact, &start); + p->tcfg_basetime = basetime; + p->tcfg_clockid = timer_clockid; + gate_get_start_time(gact, p, &start); + + old_p = rcu_replace_pointer(gact->param, p, + lockdep_is_held(&gact->tcf_lock)); gact->current_close_time = start; gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; @@ -434,11 +530,15 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); + if (old_p) + call_rcu(&old_p->rcu, tcf_gate_params_free_rcu); + return ret; +err_free: + release_entry_list(&p->entries); + kfree(p); chain_put: - spin_unlock_bh(&gact->tcf_lock); - if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: @@ -446,21 +546,29 @@ release_idr: * without taking tcf_lock. */ if (ret == ACT_P_CREATED) - gate_setup_timer(gact, gact->param.tcfg_basetime, - gact->tk_offset, gact->param.tcfg_clockid, - true); + gate_setup_timer(gact, timer_clockid, tko); + tcf_idr_release(*a, bind); return err; } +static void tcf_gate_params_free_rcu(struct rcu_head *head) +{ + struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu); + + release_entry_list(&p->entries); + kfree(p); +} + static void tcf_gate_cleanup(struct tc_action *a) { struct tcf_gate *gact = to_gate(a); struct tcf_gate_params *p; - p = &gact->param; hrtimer_cancel(&gact->hitimer); - release_entry_list(&p->entries); + p = rcu_dereference_protected(gact->param, 1); + if (p) + call_rcu(&p->rcu, tcf_gate_params_free_rcu); } static int dumping_entry(struct sk_buff *skb, @@ -509,10 +617,9 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, struct nlattr *entry_list; struct tcf_t t; - spin_lock_bh(&gact->tcf_lock); - opt.action = gact->tcf_action; - - p = &gact->param; + rcu_read_lock(); + opt.action = READ_ONCE(gact->tcf_action); + p = rcu_dereference(gact->param); if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -552,12 +659,12 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD)) goto nla_put_failure; - spin_unlock_bh(&gact->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&gact->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } From 76e954155b45294c502e3d3a9e15757c858ca55e Mon Sep 17 00:00:00 2001 From: Harishankar Vishwanathan Date: Fri, 27 Feb 2026 22:32:21 +0100 Subject: [PATCH 521/828] bpf: Introduce tnum_step to step through tnum's members This commit introduces tnum_step(), a function that, when given t, and a number z returns the smallest member of t larger than z. The number z must be greater or equal to the smallest member of t and less than the largest member of t. The first step is to compute j, a number that keeps all of t's known bits, and matches all unknown bits to z's bits. Since j is a member of the t, it is already a candidate for result. However, we want our result to be (minimally) greater than z. There are only two possible cases: (1) Case j <= z. In this case, we want to increase the value of j and make it > z. (2) Case j > z. In this case, we want to decrease the value of j while keeping it > z. (Case 1) j <= z t = xx11x0x0 z = 10111101 (189) j = 10111000 (184) ^ k (Case 1.1) Let's first consider the case where j < z. We will address j == z later. Since z > j, there had to be a bit position that was 1 in z and a 0 in j, beyond which all positions of higher significance are equal in j and z. Further, this position could not have been unknown in a, because the unknown positions of a match z. This position had to be a 1 in z and known 0 in t. Let k be position of the most significant 1-to-0 flip. In our example, k = 3 (starting the count at 1 at the least significant bit). Setting (to 1) the unknown bits of t in positions of significance smaller than k will not produce a result > z. Hence, we must set/unset the unknown bits at positions of significance higher than k. Specifically, we look for the next larger combination of 1s and 0s to place in those positions, relative to the combination that exists in z. We can achieve this by concatenating bits at unknown positions of t into an integer, adding 1, and writing the bits of that result back into the corresponding bit positions previously extracted from z. >From our example, considering only positions of significance greater than k: t = xx..x z = 10..1 + 1 ----- 11..0 This is the exact combination 1s and 0s we need at the unknown bits of t in positions of significance greater than k. Further, our result must only increase the value minimally above z. Hence, unknown bits in positions of significance smaller than k should remain 0. We finally have, result = 11110000 (240) (Case 1.2) Now consider the case when j = z, for example t = 1x1x0xxx z = 10110100 (180) j = 10110100 (180) Matching the unknown bits of the t to the bits of z yielded exactly z. To produce a number greater than z, we must set/unset the unknown bits in t, and *all* the unknown bits of t candidates for being set/unset. We can do this similar to Case 1.1, by adding 1 to the bits extracted from the masked bit positions of z. Essentially, this case is equivalent to Case 1.1, with k = 0. t = 1x1x0xxx z = .0.1.100 + 1 --------- .0.1.101 This is the exact combination of bits needed in the unknown positions of t. After recalling the known positions of t, we get result = 10110101 (181) (Case 2) j > z t = x00010x1 z = 10000010 (130) j = 10001011 (139) ^ k Since j > z, there had to be a bit position which was 0 in z, and a 1 in j, beyond which all positions of higher significance are equal in j and z. This position had to be a 0 in z and known 1 in t. Let k be the position of the most significant 0-to-1 flip. In our example, k = 4. Because of the 0-to-1 flip at position k, a member of t can become greater than z if the bits in positions greater than k are themselves >= to z. To make that member *minimally* greater than z, the bits in positions greater than k must be exactly = z. Hence, we simply match all of t's unknown bits in positions more significant than k to z's bits. In positions less significant than k, we set all t's unknown bits to 0 to retain minimality. In our example, in positions of greater significance than k (=4), t=x000. These positions are matched with z (1000) to produce 1000. In positions of lower significance than k, t=10x1. All unknown bits are set to 0 to produce 1001. The final result is: result = 10001001 (137) This concludes the computation for a result > z that is a member of t. The procedure for tnum_step() in this commit implements the idea described above. As a proof of correctness, we verified the algorithm against a logical specification of tnum_step. The specification asserts the following about the inputs t, z and output res that: 1. res is a member of t, and 2. res is strictly greater than z, and 3. there does not exist another value res2 such that 3a. res2 is also a member of t, and 3b. res2 is greater than z 3c. res2 is smaller than res We checked the implementation against this logical specification using an SMT solver. The verification formula in SMTLIB format is available at [1]. The verification returned an "unsat": indicating that no input assignment exists for which the implementation and the specification produce different outputs. In addition, we also automatically generated the logical encoding of the C implementation using Agni [2] and verified it against the same specification. This verification also returned an "unsat", confirming that the implementation is equivalent to the specification. The formula for this check is also available at [3]. Link: https://pastebin.com/raw/2eRWbiit [1] Link: https://github.com/bpfverif/agni [2] Link: https://pastebin.com/raw/EztVbBJ2 [3] Co-developed-by: Srinivas Narayana Signed-off-by: Srinivas Narayana Co-developed-by: Santosh Nagarakatte Signed-off-by: Santosh Nagarakatte Signed-off-by: Harishankar Vishwanathan Link: https://lore.kernel.org/r/93fdf71910411c0f19e282ba6d03b4c65f9c5d73.1772225741.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/tnum.h | 3 +++ kernel/bpf/tnum.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/include/linux/tnum.h b/include/linux/tnum.h index fa4654ffb621..ca2cfec8de08 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a) return !(tnum_subreg(a)).mask; } +/* Returns the smallest member of t larger than z */ +u64 tnum_step(struct tnum t, u64 z); + #endif /* _LINUX_TNUM_H */ diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 26fbfbb01700..4abc359b3db0 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a) { return TNUM(swab64(a.value), swab64(a.mask)); } + +/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin + * is the smallest member of the t (= t.value) and tmax is the largest + * member of t (= t.value | t.mask), returns the smallest member of t + * larger than z. + * + * For example, + * t = x11100x0 + * z = 11110001 (241) + * result = 11110010 (242) + * + * Note: if this function is called with z >= tmax, it just returns + * early with tmax; if this function is called with z < tmin, the + * algorithm already returns tmin. + */ +u64 tnum_step(struct tnum t, u64 z) +{ + u64 tmax, j, p, q, r, s, v, u, w, res; + u8 k; + + tmax = t.value | t.mask; + + /* if z >= largest member of t, return largest member of t */ + if (z >= tmax) + return tmax; + + /* if z < smallest member of t, return smallest member of t */ + if (z < t.value) + return t.value; + + /* keep t's known bits, and match all unknown bits to z */ + j = t.value | (z & t.mask); + + if (j > z) { + p = ~z & t.value & ~t.mask; + k = fls64(p); /* k is the most-significant 0-to-1 flip */ + q = U64_MAX << k; + r = q & z; /* positions > k matched to z */ + s = ~q & t.value; /* positions <= k matched to t.value */ + v = r | s; + res = v; + } else { + p = z & ~t.value & ~t.mask; + k = fls64(p); /* k is the most-significant 1-to-0 flip */ + q = U64_MAX << k; + r = q & t.mask & z; /* unknown positions > k, matched to z */ + s = q & ~t.mask; /* known positions > k, set to 1 */ + v = r | s; + /* add 1 to unknown positions > k to make value greater than z */ + u = v + (1ULL << k); + /* extract bits in unknown positions > k from u, rest from t.value */ + w = (u & t.mask) | t.value; + res = w; + } + return res; +} From efc11a667878a1d655ff034a93a539debbfedb12 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 27 Feb 2026 22:35:02 +0100 Subject: [PATCH 522/828] bpf: Improve bounds when tnum has a single possible value We're hitting an invariant violation in Cilium that sometimes leads to BPF programs being rejected and Cilium failing to start [1]. The following extract from verifier logs shows what's happening: from 201 to 236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0 236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0 ; if (magic == MARK_MAGIC_HOST || magic == MARK_MAGIC_OVERLAY || magic == MARK_MAGIC_ENCRYPT) @ bpf_host.c:1337 236: (16) if w9 == 0xe00 goto pc+45 ; R9=scalar(smin=umin=smin32=umin32=3585,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) 237: (16) if w9 == 0xf00 goto pc+1 verifier bug: REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0xe01, 0xe00] s64=[0xe01, 0xe00] u32=[0xe01, 0xe00] s32=[0xe01, 0xe00] var_off=(0xe00, 0x0) We reach instruction 236 with two possible values for R9, 0xe00 and 0xf00. This is perfectly reflected in the tnum, but of course the ranges are less accurate and cover [0xe00; 0xf00]. Taking the fallthrough path at instruction 236 allows the verifier to reduce the range to [0xe01; 0xf00]. The tnum is however not updated. With these ranges, at instruction 237, the verifier is not able to deduce that R9 is always equal to 0xf00. Hence the fallthrough pass is explored first, the verifier refines the bounds using the assumption that R9 != 0xf00, and ends up with an invariant violation. This pattern of impossible branch + bounds refinement is common to all invariant violations seen so far. The long-term solution is likely to rely on the refinement + invariant violation check to detect dead branches, as started by Eduard. To fix the current issue, we need something with less refactoring that we can backport. This patch uses the tnum_step helper introduced in the previous patch to detect the above situation. In particular, three cases are now detected in the bounds refinement: 1. The u64 range and the tnum only overlap in umin. u64: ---[xxxxxx]----- tnum: --xx----------x- 2. The u64 range and the tnum only overlap in the maximum value represented by the tnum, called tmax. u64: ---[xxxxxx]----- tnum: xx-----x-------- 3. The u64 range and the tnum only overlap in between umin (excluded) and umax. u64: ---[xxxxxx]----- tnum: xx----x-------x- To detect these three cases, we call tnum_step(tnum, umin), which returns the smallest member of the tnum greater than umin, called tnum_next here. We're in case (1) if umin is part of the tnum and tnum_next is greater than umax. We're in case (2) if umin is not part of the tnum and tnum_next is equal to tmax. Finally, we're in case (3) if umin is not part of the tnum, tnum_next is inferior or equal to umax, and calling tnum_step a second time gives us a value past umax. This change implements these three cases. With it, the above bytecode looks as follows: 0: (85) call bpf_get_prandom_u32#7 ; R0=scalar() 1: (47) r0 |= 3584 ; R0=scalar(smin=0x8000000000000e00,umin=umin32=3584,smin32=0x80000e00,var_off=(0xe00; 0xfffffffffffff1ff)) 2: (57) r0 &= 3840 ; R0=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) 3: (15) if r0 == 0xe00 goto pc+2 ; R0=3840 4: (15) if r0 == 0xf00 goto pc+1 4: R0=3840 6: (95) exit In addition to the new selftests, this change was also verified with Agni [3]. For the record, the raw SMT is available at [4]. The property it verifies is that: If a concrete value x is contained in all input abstract values, after __update_reg_bounds, it will continue to be contained in all output abstract values. Link: https://github.com/cilium/cilium/issues/44216 [1] Link: https://pchaigno.github.io/test-verifier-complexity.html [2] Link: https://github.com/bpfverif/agni [3] Link: https://pastebin.com/raw/naCfaqNx [4] Fixes: 0df1a55afa83 ("bpf: Warn on internal verifier errors") Acked-by: Eduard Zingerman Tested-by: Marco Schirrmeister Co-developed-by: Harishankar Vishwanathan Signed-off-by: Harishankar Vishwanathan Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/ef254c4f68be19bd393d450188946821c588565d.1772225741.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb12ba020649..401d6c4960ec 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2379,6 +2379,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg) static void __update_reg64_bounds(struct bpf_reg_state *reg) { + u64 tnum_next, tmax; + bool umin_in_tnum; + /* min signed is max(sign bit) | min(other bits) */ reg->smin_value = max_t(s64, reg->smin_value, reg->var_off.value | (reg->var_off.mask & S64_MIN)); @@ -2388,6 +2391,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg) reg->umin_value = max(reg->umin_value, reg->var_off.value); reg->umax_value = min(reg->umax_value, reg->var_off.value | reg->var_off.mask); + + /* Check if u64 and tnum overlap in a single value */ + tnum_next = tnum_step(reg->var_off, reg->umin_value); + umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value; + tmax = reg->var_off.value | reg->var_off.mask; + if (umin_in_tnum && tnum_next > reg->umax_value) { + /* The u64 range and the tnum only overlap in umin. + * u64: ---[xxxxxx]----- + * tnum: --xx----------x- + */ + ___mark_reg_known(reg, reg->umin_value); + } else if (!umin_in_tnum && tnum_next == tmax) { + /* The u64 range and the tnum only overlap in the maximum value + * represented by the tnum, called tmax. + * u64: ---[xxxxxx]----- + * tnum: xx-----x-------- + */ + ___mark_reg_known(reg, tmax); + } else if (!umin_in_tnum && tnum_next <= reg->umax_value && + tnum_step(reg->var_off, tnum_next) > reg->umax_value) { + /* The u64 range and the tnum only overlap in between umin + * (excluded) and umax. + * u64: ---[xxxxxx]----- + * tnum: xx----x-------x- + */ + ___mark_reg_known(reg, tnum_next); + } } static void __update_reg_bounds(struct bpf_reg_state *reg) From e6ad477d1bf8829973cddd9accbafa9d1a6cd15a Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 27 Feb 2026 22:36:30 +0100 Subject: [PATCH 523/828] selftests/bpf: Test refinement of single-value tnum This patch introduces selftests to cover the new bounds refinement logic introduced in the previous patch. Without the previous patch, the first two tests fail because of the invariant violation they trigger. The last test fails because the R10 access is not detected as dead code. In addition, all three tests fail because of R0 having a non-constant value in the verifier logs. In addition, the last two cases are covering the negative cases: when we shouldn't refine the bounds because the u64 and tnum overlap in at least two values. Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/90d880c8cf587b9f7dc715d8961cd1b8111d01a8.1772225741.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_bounds.c | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 560531404bce..97065a26cf70 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1863,4 +1863,141 @@ l1_%=: r0 = 1; \ : __clobber_all); } +/* This test covers the bounds deduction when the u64 range and the tnum + * overlap only at umax. After instruction 3, the ranges look as follows: + * + * 0 umin=0xe01 umax=0xf00 U64_MAX + * | [xxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * | x x | tnum values + * + * The verifier can therefore deduce that the R0=0xf0=240. + */ +SEC("socket") +__description("bounds refinement with single-value tnum on umax") +__msg("3: (15) if r0 == 0xe0 {{.*}} R0=240") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void bounds_refinement_tnum_umax(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 |= 0xe0; \ + r0 &= 0xf0; \ + if r0 == 0xe0 goto +2; \ + if r0 == 0xf0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction when the u64 range and the tnum + * overlap only at umin. After instruction 3, the ranges look as follows: + * + * 0 umin=0xe00 umax=0xeff U64_MAX + * | [xxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * | x x | tnum values + * + * The verifier can therefore deduce that the R0=0xe0=224. + */ +SEC("socket") +__description("bounds refinement with single-value tnum on umin") +__msg("3: (15) if r0 == 0xf0 {{.*}} R0=224") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void bounds_refinement_tnum_umin(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 |= 0xe0; \ + r0 &= 0xf0; \ + if r0 == 0xf0 goto +2; \ + if r0 == 0xe0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction when the only possible tnum value is + * in the middle of the u64 range. After instruction 3, the ranges look as + * follows: + * + * 0 umin=0x7cf umax=0x7df U64_MAX + * | [xxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * | x x x x x | tnum values + * | +--- 0x7e0 + * +--- 0x7d0 + * + * Since the lower four bits are zero, the tnum and the u64 range only overlap + * in R0=0x7d0=2000. Instruction 5 is therefore dead code. + */ +SEC("socket") +__description("bounds refinement with single-value tnum in middle of range") +__msg("3: (a5) if r0 < 0x7cf {{.*}} R0=2000") +__success __log_level(2) +__naked void bounds_refinement_tnum_middle(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + if r0 & 0x0f goto +4; \ + if r0 > 0x7df goto +3; \ + if r0 < 0x7cf goto +2; \ + if r0 == 0x7d0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test cover the negative case for the tnum/u64 overlap. Since + * they contain the same two values (i.e., {0, 1}), we can't deduce + * anything more. + */ +SEC("socket") +__description("bounds refinement: several overlaps between tnum and u64") +__msg("2: (25) if r0 > 0x1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1))") +__failure __log_level(2) +__naked void bounds_refinement_several_overlaps(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + if r0 < 0 goto +3; \ + if r0 > 1 goto +2; \ + if r0 == 1 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test cover the negative case for the tnum/u64 overlap. Since + * they overlap in the two values contained by the u64 range (i.e., + * {0xf, 0x10}), we can't deduce anything more. + */ +SEC("socket") +__description("bounds refinement: multiple overlaps between tnum and u64") +__msg("2: (25) if r0 > 0x10 {{.*}} R0=scalar(smin=umin=smin32=umin32=15,smax=umax=smax32=umax32=16,var_off=(0x0; 0x1f))") +__failure __log_level(2) +__naked void bounds_refinement_multiple_overlaps(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + if r0 < 0xf goto +3; \ + if r0 > 0x10 goto +2; \ + if r0 == 0x10 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; From 024cea2d647ed8ab942f19544b892d324dba42b4 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 27 Feb 2026 22:42:45 +0100 Subject: [PATCH 524/828] selftests/bpf: Avoid simplification of crafted bounds test The reg_bounds_crafted tests validate the verifier's range analysis logic. They focus on the actual ranges and thus ignore the tnum. As a consequence, they carry the assumption that the tested cases can be reproduced in userspace without using the tnum information. Unfortunately, the previous change the refinement logic breaks that assumption for one test case: (u64)2147483648 (u32) [4294967294; 0x100000000] The tested bytecode is shown below. Without our previous improvement, on the false branch of the condition, R7 is only known to have u64 range [0xfffffffe; 0x100000000]. With our improvement, and using the tnum information, we can deduce that R7 equals 0x100000000. 19: (bc) w0 = w6 ; R6=0x80000000 20: (bc) w0 = w7 ; R7=scalar(smin=umin=0xfffffffe,smax=umax=0x100000000,smin32=-2,smax32=0,var_off=(0x0; 0x1ffffffff)) 21: (be) if w6 <= w7 goto pc+3 ; R6=0x80000000 R7=0x100000000 R7's tnum is (0; 0x1ffffffff). On the false branch, regs_refine_cond_op refines R7's u32 range to [0; 0x7fffffff]. Then, __reg32_deduce_bounds refines the s32 range to 0 using u32 and finally also sets u32=0. From this, __reg_bound_offset improves the tnum to (0; 0x100000000). Finally, our previous patch uses this new tnum to deduce that it only intersect with u64=[0xfffffffe; 0x100000000] in a single value: 0x100000000. Because the verifier uses the tnum to reach this constant value, the selftest is unable to reproduce it by only simulating ranges. The solution implemented in this patch is to change the test case such that there is more than one overlap value between u64 and the tnum. The max. u64 value is thus changed from 0x100000000 to 0x300000000. Acked-by: Eduard Zingerman Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/50641c6a7ef39520595dcafa605692427c1006ec.1772225741.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index d93a0c7b1786..0322f817d07b 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -2091,7 +2091,7 @@ static struct subtest_case crafted_cases[] = { {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, {U64, U32, {0, 0x100000000}, {0, 0}}, - {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + {U64, U32, {0xfffffffe, 0x300000000}, {0x80000000, 0x80000000}}, {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, /* these are tricky cases where lower 32 bits allow to tighten 64 From 29252397bcc1e0a1f85e5c3bee59c325f5c26341 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Feb 2026 20:35:45 +0000 Subject: [PATCH 525/828] inet: annotate data-races around isk->inet_num UDP/TCP lookups are using RCU, thus isk->inet_num accesses should use READ_ONCE() and WRITE_ONCE() where needed. Fixes: 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260225203545.1512417-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet6_hashtables.h | 2 +- include/net/inet_hashtables.h | 2 +- include/net/ip.h | 2 +- net/ipv4/inet_hashtables.c | 8 ++++---- net/ipv4/tcp_diag.c | 2 +- net/ipv6/inet6_hashtables.c | 3 ++- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 282e29237d93..c16de5b7963f 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk, { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ac05a52d9e13..5a979dcab538 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -345,7 +345,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, int dif, int sdif) { if (!net_eq(sock_net(sk), net) || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || sk->sk_addrpair != cookie) return false; diff --git a/include/net/ip.h b/include/net/ip.h index 69d5cef46004..7f9abd457e01 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; - ipcm->protocol = inet->inet_num; + ipcm->protocol = READ_ONCE(inet->inet_num); } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fca980772c81..9bfccc283fa6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -200,7 +200,7 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { - inet_sk(sk)->inet_num = port; + WRITE_ONCE(inet_sk(sk)->inet_num, port); inet_csk(sk)->icsk_bind_hash = tb; inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); @@ -224,7 +224,7 @@ static void __inet_put_port(struct sock *sk) spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; inet_csk(sk)->icsk_bind_hash = NULL; - inet_sk(sk)->inet_num = 0; + WRITE_ONCE(inet_sk(sk)->inet_num, 0); sk->sk_userlocks &= ~SOCK_CONNECT_BIND; spin_lock(&head2->lock); @@ -352,7 +352,7 @@ static inline int compute_score(struct sock *sk, const struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && + if (net_eq(sock_net(sk), net) && READ_ONCE(sk->sk_num) == hnum && !ipv6_only_sock(sk)) { if (sk->sk_rcv_saddr != daddr) return -1; @@ -1206,7 +1206,7 @@ error: sk->sk_hash = 0; inet_sk(sk)->inet_sport = 0; - inet_sk(sk)->inet_num = 0; + WRITE_ONCE(inet_sk(sk)->inet_num, 0); if (tw) inet_twsk_bind_unhash(tw, hinfo); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index d83efd91f461..7935702e394b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -509,7 +509,7 @@ next_chunk: if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next_normal; - if (r->id.idiag_sport != htons(sk->sk_num) && + if (r->id.idiag_sport != htons(READ_ONCE(sk->sk_num)) && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != sk->sk_dport && diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 5e1da088d8e1..182d38e6d6d8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -95,7 +95,8 @@ static inline int compute_score(struct sock *sk, const struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && + if (net_eq(sock_net(sk), net) && + READ_ONCE(inet_sk(sk)->inet_num) == hnum && sk->sk_family == PF_INET6) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; From 0b3cd139be565b85f4a3579e376152b9def6256a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20K=C3=B6ppeler?= Date: Thu, 26 Feb 2026 12:40:15 +0100 Subject: [PATCH 526/828] net/sched: sch_cake: avoid sync overhead when unlimited MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip inter-instance sync when no rate limit is configured, as it serves no purpose and only adds overhead. Fixes: 1bddd758bac2 ("net/sched: sch_cake: share shaper state across sub-instances of cake_mq") Signed-off-by: Jonas Köppeler Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20260226-cake-mq-skip-sync-bandwidth-unlimited-v1-1-01830bb4db87@tu-berlin.de Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a01f14b1c216..b23fc7d9fafe 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2013,7 +2013,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) u64 delay; u32 len; - if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) { + if (q->config->is_shared && q->rate_ns && + now - q->last_checked_active >= q->config->sync_time) { struct net_device *dev = qdisc_dev(sch); struct cake_sched_data *other_priv; u64 new_rate = q->config->rate_bps; From 15c2715a52645fd8e6e18b7abc3449292d118c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20K=C3=B6ppeler?= Date: Thu, 26 Feb 2026 12:40:16 +0100 Subject: [PATCH 527/828] net/sched: sch_cake: fixup cake_mq rate adjustment for diffserv config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_mq's rate adjustment during the sync periods did not adjust the rates for every tin in a diffserv config. This lead to inconsistencies of rates between the tins. Fix this by setting the rates for all tins during synchronization. Fixes: 1bddd758bac2 ("net/sched: sch_cake: share shaper state across sub-instances of cake_mq") Signed-off-by: Jonas Köppeler Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20260226-cake-mq-skip-sync-bandwidth-unlimited-v1-2-01830bb4db87@tu-berlin.de Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 50 ++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index b23fc7d9fafe..9efe23f8371b 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -391,8 +391,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = { 1239850263, 1191209601, 1147878294, 1108955788 }; -static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, - u64 target_ns, u64 rtt_est_ns); +static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); + /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * @@ -2040,12 +2040,9 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) if (num_active_qs > 1) new_rate = div64_u64(q->config->rate_bps, num_active_qs); - /* mtu = 0 is used to only update the rate and not mess with cobalt params */ - cake_set_rate(b, new_rate, 0, 0, 0); + cake_configure_rates(sch, new_rate, true); q->last_checked_active = now; q->active_queues = num_active_qs; - q->rate_ns = b->tin_rate_ns; - q->rate_shft = b->tin_rate_shft; } begin: @@ -2362,12 +2359,10 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, b->cparams.p_dec = 1 << 20; /* 1/4096 */ } -static int cake_config_besteffort(struct Qdisc *sch) +static int cake_config_besteffort(struct Qdisc *sch, u64 rate, u32 mtu) { struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[0]; - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; q->tin_cnt = 1; @@ -2381,12 +2376,10 @@ static int cake_config_besteffort(struct Qdisc *sch) return 0; } -static int cake_config_precedence(struct Qdisc *sch) +static int cake_config_precedence(struct Qdisc *sch, u64 rate, u32 mtu) { /* convert high-level (user visible) parameters into internal format */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 256; u32 i; @@ -2457,7 +2450,7 @@ static int cake_config_precedence(struct Qdisc *sch) * Total 12 traffic classes. */ -static int cake_config_diffserv8(struct Qdisc *sch) +static int cake_config_diffserv8(struct Qdisc *sch, u64 rate, u32 mtu) { /* Pruned list of traffic classes for typical applications: * @@ -2474,8 +2467,6 @@ static int cake_config_diffserv8(struct Qdisc *sch) */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 256; u32 i; @@ -2505,7 +2496,7 @@ static int cake_config_diffserv8(struct Qdisc *sch) return 0; } -static int cake_config_diffserv4(struct Qdisc *sch) +static int cake_config_diffserv4(struct Qdisc *sch, u64 rate, u32 mtu) { /* Further pruned list of traffic classes for four-class system: * @@ -2518,8 +2509,6 @@ static int cake_config_diffserv4(struct Qdisc *sch) */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 1024; q->tin_cnt = 4; @@ -2547,7 +2536,7 @@ static int cake_config_diffserv4(struct Qdisc *sch) return 0; } -static int cake_config_diffserv3(struct Qdisc *sch) +static int cake_config_diffserv3(struct Qdisc *sch, u64 rate, u32 mtu) { /* Simplified Diffserv structure with 3 tins. * Latency Sensitive (CS7, CS6, EF, VA, TOS4) @@ -2555,8 +2544,6 @@ static int cake_config_diffserv3(struct Qdisc *sch) * Low Priority (LE, CS1) */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 1024; q->tin_cnt = 3; @@ -2581,32 +2568,33 @@ static int cake_config_diffserv3(struct Qdisc *sch) return 0; } -static void cake_reconfigure(struct Qdisc *sch) +static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust) { + u32 mtu = likely(rate_adjust) ? 0 : psched_mtu(qdisc_dev(sch)); struct cake_sched_data *qd = qdisc_priv(sch); struct cake_sched_config *q = qd->config; int c, ft; switch (q->tin_mode) { case CAKE_DIFFSERV_BESTEFFORT: - ft = cake_config_besteffort(sch); + ft = cake_config_besteffort(sch, rate, mtu); break; case CAKE_DIFFSERV_PRECEDENCE: - ft = cake_config_precedence(sch); + ft = cake_config_precedence(sch, rate, mtu); break; case CAKE_DIFFSERV_DIFFSERV8: - ft = cake_config_diffserv8(sch); + ft = cake_config_diffserv8(sch, rate, mtu); break; case CAKE_DIFFSERV_DIFFSERV4: - ft = cake_config_diffserv4(sch); + ft = cake_config_diffserv4(sch, rate, mtu); break; case CAKE_DIFFSERV_DIFFSERV3: default: - ft = cake_config_diffserv3(sch); + ft = cake_config_diffserv3(sch, rate, mtu); break; } @@ -2617,6 +2605,14 @@ static void cake_reconfigure(struct Qdisc *sch) qd->rate_ns = qd->tins[ft].tin_rate_ns; qd->rate_shft = qd->tins[ft].tin_rate_shft; +} + +static void cake_reconfigure(struct Qdisc *sch) +{ + struct cake_sched_data *qd = qdisc_priv(sch); + struct cake_sched_config *q = qd->config; + + cake_configure_rates(sch, qd->config->rate_bps, false); if (q->buffer_config_limit) { qd->buffer_limit = q->buffer_config_limit; From ba14798653bb815b4dcd116c5265a9f748bc0c7f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Feb 2026 17:19:17 +0100 Subject: [PATCH 528/828] selftests: netfilter: nft_queue.sh: avoid flakes on debug kernels Jakub reports test flakes on debug kernels: FAIL: test_udp_gro_ct: Expected software segmentation to occur, had 23 and 17 This test assumes that the kernels nfnetlink_queue module sees N GSO packets, segments them into M skbs and queues them to userspace for reinjection. Hence, if M >= N, no segmentation occurred. However, its possible that this happens: - nfnetlink_queue gets GSO packet - segments that into n skbs - userspace buffer is full, kernel drops the segmented skbs -> "toqueue" counter incremented by 1, "fromqueue" is unchanged. If this happens often enough in a single run, M >= N check triggers incorrectly. To solve this, allow the nf_queue.c test program to set the FAIL_OPEN flag so that the segmented skbs bypass the queueing step in the kernel if the receive buffer is full. Also, reduce number of sending socat instances, decrease their priority and increase nice value for the nf_queue program itself to reduce the probability of overruns happening in the first place. Fixes: 59ecffa3995e ("selftests: netfilter: nft_queue.sh: add udp fraglist gro test case") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20260218184114.0b405b72@kernel.org/ Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260226161920.1205-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/netfilter/nf_queue.c | 10 ++++++++-- tools/testing/selftests/net/netfilter/nft_queue.sh | 13 +++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c index 9e56b9d47037..116c0ca0eabb 100644 --- a/tools/testing/selftests/net/netfilter/nf_queue.c +++ b/tools/testing/selftests/net/netfilter/nf_queue.c @@ -18,6 +18,7 @@ struct options { bool count_packets; bool gso_enabled; + bool failopen; int verbose; unsigned int queue_num; unsigned int timeout; @@ -30,7 +31,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-o] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -236,6 +237,8 @@ struct mnl_socket *open_queue(void) flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; flags |= NFQA_CFG_F_UID_GID; + if (opts.failopen) + flags |= NFQA_CFG_F_FAIL_OPEN; mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); @@ -329,7 +332,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { + while ((c = getopt(argc, argv, "chvot:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -366,6 +369,9 @@ static void parse_opts(int argc, char **argv) case 'G': opts.gso_enabled = false; break; + case 'o': + opts.failopen = true; + break; case 'v': opts.verbose++; break; diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 139bc1211878..ea766bdc5d04 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -591,6 +591,7 @@ EOF test_udp_gro_ct() { local errprefix="FAIL: test_udp_gro_ct:" + local timeout=5 ip netns exec "$nsrouter" conntrack -F 2>/dev/null @@ -630,10 +631,10 @@ table inet udpq { } } EOF - timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + timeout "$timeout" ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & local rpid=$! - ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" & + ip netns exec "$nsrouter" nice -n -19 ./nf_queue -G -c -q 1 -o -t 2 > "$TMPFILE2" & local nfqpid=$! ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on @@ -643,8 +644,12 @@ EOF local bs=512 local count=$(((32 * 1024 * 1024) / bs)) - dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do - timeout 5 ip netns exec "$ns1" \ + + local nprocs=$(nproc) + [ $nprocs -gt 1 ] && nprocs=$((nprocs - 1)) + + dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 $nprocs); do + timeout "$timeout" nice -n 19 ip netns exec "$ns1" \ socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 & done From 11cb63b0d1a0685e0831ae3c77223e002ef18189 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 25 Feb 2026 10:43:48 -0300 Subject: [PATCH 529/828] net/sched: Only allow act_ct to bind to clsact/ingress qdiscs and shared blocks As Paolo said earlier [1]: "Since the blamed commit below, classify can return TC_ACT_CONSUMED while the current skb being held by the defragmentation engine. As reported by GangMin Kim, if such packet is that may cause a UaF when the defrag engine later on tries to tuch again such packet." act_ct was never meant to be used in the egress path, however some users are attaching it to egress today [2]. Attempting to reach a middle ground, we noticed that, while most qdiscs are not handling TC_ACT_CONSUMED, clsact/ingress qdiscs are. With that in mind, we address the issue by only allowing act_ct to bind to clsact/ingress qdiscs and shared blocks. That way it's still possible to attach act_ct to egress (albeit only with clsact). [1] https://lore.kernel.org/netdev/674b8cbfc385c6f37fb29a1de08d8fe5c2b0fbee.1771321118.git.pabeni@redhat.com/ [2] https://lore.kernel.org/netdev/cc6bfb4a-4a2b-42d8-b9ce-7ef6644fb22b@ovn.org/ Reported-by: GangMin Kim Fixes: 3f14b377d01d ("net/sched: act_ct: fix skb leak and crash on ooo frags") CC: stable@vger.kernel.org Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260225134349.1287037-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 1 + net/sched/act_ct.c | 6 ++++++ net/sched/cls_api.c | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/include/net/act_api.h b/include/net/act_api.h index e1e8f0f7dacb..d11b79107930 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -70,6 +70,7 @@ struct tc_action { #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) #define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) +#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index bd51522c7953..7d5e50c921a0 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1360,6 +1360,12 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, return -EINVAL; } + if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) { + NL_SET_ERR_MSG_MOD(extack, + "Attaching ct to a non ingress/clsact qdisc is unsupported"); + return -EOPNOTSUPP; + } + err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); if (err < 0) return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 343309e9009b..4829c27446e3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2228,6 +2228,11 @@ static bool is_qdisc_ingress(__u32 classid) return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); } +static bool is_ingress_or_clsact(struct tcf_block *block, struct Qdisc *q) +{ + return tcf_block_shared(block) || (q && !!(q->flags & TCQ_F_INGRESS)); +} + static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2420,6 +2425,8 @@ replay: flags |= TCA_ACT_FLAGS_NO_RTNL; if (is_qdisc_ingress(parent)) flags |= TCA_ACT_FLAGS_AT_INGRESS; + if (is_ingress_or_clsact(block, q)) + flags |= TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { From b14e82abf78affa50f4cabe8493e17f9adcfcec7 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 25 Feb 2026 10:43:49 -0300 Subject: [PATCH 530/828] selftests/tc-testing: Create tests to exercise act_ct binding restrictions Add 4 test cases to exercise new act_ct binding restrictions: - Try to attach act_ct to an ets qdisc - Attach act_ct to an ingress qdisc - Attach act_ct to a clsact/egress qdisc - Attach act_ct to a shared block Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260225134349.1287037-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/actions/ct.json | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json index 7d07c55bb668..33bb8f3ff8ed 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json @@ -505,5 +505,164 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "8883", + "name": "Try to attach act_ct to an ets qdisc", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 root handle 1: ets bands 2" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: prio 1 protocol ip matchall action ct index 42", + "expExitCode": "2", + "verifyCmd": "$TC -j filter ls dev $DEV1 parent 1: prio 1 protocol ip", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DEV1 root" + ] + }, + { + "id": "3b10", + "name": "Attach act_ct to an ingress qdisc", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 1 protocol ip matchall action ct index 42", + "expExitCode": "0", + "verifyCmd": "$TC -j filter ls dev $DEV1 ingress prio 1 protocol ip", + "matchJSON": [ + { + "kind": "matchall" + }, + { + "options": { + "actions": [ + { + "order": 1, + "kind": "ct", + "index": 42, + "ref": 1, + "bind": 1 + } + ] + } + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "0337", + "name": "Attach act_ct to a clsact/egress qdisc", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 clsact" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 egress prio 1 protocol ip matchall action ct index 42", + "expExitCode": "0", + "verifyCmd": "$TC -j filter ls dev $DEV1 egress prio 1 protocol ip", + "matchJSON": [ + { + "kind": "matchall" + }, + { + "options": { + "actions": [ + { + "order": 1, + "kind": "ct", + "index": 42, + "ref": 1, + "bind": 1 + } + ] + } + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "4f60", + "name": "Attach act_ct to a shared block", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress_block 21 clsact" + ], + "cmdUnderTest": "$TC filter add block 21 prio 1 protocol ip matchall action ct index 42", + "expExitCode": "0", + "verifyCmd": "$TC -j filter ls block 21 prio 1 protocol ip", + "matchJSON": [ + { + "kind": "matchall" + }, + { + "options": { + "actions": [ + { + "order": 1, + "kind": "ct", + "index": 42, + "ref": 1, + "bind": 1 + } + ] + } + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact" + ] } ] From 15fba71533bcdfaa8eeba69a5a5a2927afdf664a Mon Sep 17 00:00:00 2001 From: Valentin Spreckels Date: Thu, 26 Feb 2026 20:54:09 +0100 Subject: [PATCH 531/828] net: usb: r8152: add TRENDnet TUC-ET2G The TRENDnet TUC-ET2G is a RTL8156 based usb ethernet adapter. Add its vendor and product IDs. Signed-off-by: Valentin Spreckels Link: https://patch.msgid.link/20260226195409.7891-2-valentin@spreckels.dev Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 1 + include/linux/usb/r8152.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4af85728ac4f..0c83bbbea2e7 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10054,6 +10054,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, + { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) }, {} }; diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 2ca60828f28b..1502b2a355f9 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -32,6 +32,7 @@ #define VENDOR_ID_DLINK 0x2001 #define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 +#define VENDOR_ID_TRENDNET 0x20f4 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); From dabffd08545ffa1d7183bc45e387860984025291 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 26 Feb 2026 11:28:33 -0800 Subject: [PATCH 532/828] net: mana: Ring doorbell at 4 CQ wraparounds MANA hardware requires at least one doorbell ring every 8 wraparounds of the CQ. The driver rings the doorbell as a form of flow control to inform hardware that CQEs have been consumed. The NAPI poll functions mana_poll_tx_cq() and mana_poll_rx_cq() can poll up to CQE_POLLING_BUFFER (512) completions per call. If the CQ has fewer than 512 entries, a single poll call can process more than 4 wraparounds without ringing the doorbell. The doorbell threshold check also uses ">" instead of ">=", delaying the ring by one extra CQE beyond 4 wraparounds. Combined, these issues can cause the driver to exceed the 8-wraparound hardware limit, leading to missed completions and stalled queues. Fix this by capping the number of CQEs polled per call to 4 wraparounds of the CQ in both TX and RX paths. Also change the doorbell threshold from ">" to ">=" so the doorbell is rung as soon as 4 wraparounds are reached. Cc: stable@vger.kernel.org Fixes: 58a63729c957 ("net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings") Signed-off-by: Long Li Reviewed-by: Haiyang Zhang Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260226192833.1050807-1-longli@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 933e9d681ded..9017e806ecda 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1770,8 +1770,14 @@ static void mana_poll_tx_cq(struct mana_cq *cq) ndev = txq->ndev; apc = netdev_priv(ndev); + /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the + * doorbell can be rung in time for the hardware's requirement + * of at least one doorbell ring every 8 wraparounds. + */ comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, - CQE_POLLING_BUFFER); + min((cq->gdma_cq->queue_size / + COMP_ENTRY_SIZE) * 4, + CQE_POLLING_BUFFER)); if (comp_read < 1) return; @@ -2156,7 +2162,14 @@ static void mana_poll_rx_cq(struct mana_cq *cq) struct mana_rxq *rxq = cq->rxq; int comp_read, i; - comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the + * doorbell can be rung in time for the hardware's requirement + * of at least one doorbell ring every 8 wraparounds. + */ + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, + min((cq->gdma_cq->queue_size / + COMP_ENTRY_SIZE) * 4, + CQE_POLLING_BUFFER)); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); rxq->xdp_flush = false; @@ -2201,11 +2214,11 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); cq->work_done_since_doorbell = 0; napi_complete_done(&cq->napi, w); - } else if (cq->work_done_since_doorbell > - cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + } else if (cq->work_done_since_doorbell >= + (cq->gdma_cq->queue_size / COMP_ENTRY_SIZE) * 4) { /* MANA hardware requires at least one doorbell ring every 8 * wraparounds of CQ even if there is no need to arm the CQ. - * This driver rings the doorbell as soon as we have exceeded + * This driver rings the doorbell as soon as it has processed * 4 wraparounds. */ mana_gd_ring_cq(gdma_queue, 0); From 8168a7b72bdee3790b126f63bd30306759206b15 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Fri, 6 Feb 2026 21:26:45 +0000 Subject: [PATCH 533/828] crypto: ccp - allow callers to use HV-Fixed page API when SEV is disabled When SEV is disabled, the HV-Fixed page allocation call fails, which in turn causes SFS initialization to fail. Fix the HV-Fixed API so callers (for example, SFS) can use it even when SEV is disabled by performing normal page allocation and freeing. Fixes: e09701dcdd9c ("crypto: ccp - Add new HV-Fixed page allocation/free API") Cc: stable@vger.kernel.org Signed-off-by: Ashish Kalra Reviewed-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 096f993974d1..8b2dfc11289b 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1105,15 +1105,12 @@ struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages) { struct psp_device *psp_master = psp_get_master_device(); struct snp_hv_fixed_pages_entry *entry; - struct sev_device *sev; unsigned int order; struct page *page; - if (!psp_master || !psp_master->sev_data) + if (!psp_master) return NULL; - sev = psp_master->sev_data; - order = get_order(PMD_SIZE * num_2mb_pages); /* @@ -1126,7 +1123,8 @@ struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages) * This API uses SNP_INIT_EX to transition allocated pages to HV_Fixed * page state, fail if SNP is already initialized. */ - if (sev->snp_initialized) + if (psp_master->sev_data && + ((struct sev_device *)psp_master->sev_data)->snp_initialized) return NULL; /* Re-use freed pages that match the request */ @@ -1162,7 +1160,7 @@ void snp_free_hv_fixed_pages(struct page *page) struct psp_device *psp_master = psp_get_master_device(); struct snp_hv_fixed_pages_entry *entry, *nentry; - if (!psp_master || !psp_master->sev_data) + if (!psp_master) return; /* From 889b0e2721e793eb46cf7d17b965aa3252af3ec8 Mon Sep 17 00:00:00 2001 From: Alper Ak Date: Mon, 9 Feb 2026 13:30:42 +0300 Subject: [PATCH 534/828] crypto: ccp - Fix use-after-free on error path In the error path of sev_tsm_init_locked(), the code dereferences 't' after it has been freed with kfree(). The pr_err() statement attempts to access t->tio_en and t->tio_init_done after the memory has been released. Move the pr_err() call before kfree(t) to access the fields while the memory is still valid. This issue reported by Smatch static analyser Fixes:4be423572da1 ("crypto/ccp: Implement SEV-TIO PCIe IDE (phase1)") Signed-off-by: Alper Ak Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev-tsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/sev-dev-tsm.c b/drivers/crypto/ccp/sev-dev-tsm.c index adc9542ae806..b07ae529b591 100644 --- a/drivers/crypto/ccp/sev-dev-tsm.c +++ b/drivers/crypto/ccp/sev-dev-tsm.c @@ -378,9 +378,9 @@ void sev_tsm_init_locked(struct sev_device *sev, void *tio_status_page) return; error_exit: - kfree(t); pr_err("Failed to enable SEV-TIO: ret=%d en=%d initdone=%d SEV=%d\n", ret, t->tio_en, t->tio_init_done, boot_cpu_has(X86_FEATURE_SEV)); + kfree(t); } void sev_tsm_uninit(struct sev_device *sev) From d240b079a37e90af03fd7dfec94930eb6c83936e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 19 Feb 2026 00:54:00 +0100 Subject: [PATCH 535/828] crypto: atmel-sha204a - Fix OOM ->tfm_count leak If memory allocation fails, decrement ->tfm_count to avoid blocking future reads. Cc: stable@vger.kernel.org Fixes: da001fb651b0 ("crypto: atmel-i2c - add support for SHA204A random number generator") Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha204a.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 8adc7fe71c04..98d1023007e3 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -52,9 +52,10 @@ static int atmel_sha204a_rng_read_nonblocking(struct hwrng *rng, void *data, rng->priv = 0; } else { work_data = kmalloc_obj(*work_data, GFP_ATOMIC); - if (!work_data) + if (!work_data) { + atomic_dec(&i2c_priv->tfm_count); return -ENOMEM; - + } work_data->ctx = i2c_priv; work_data->client = i2c_priv->client; From 89ff45359abbf9d8d3c4aa3f5a57ed0be82b5a12 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 26 Feb 2026 16:48:41 -0800 Subject: [PATCH 536/828] accel/amdxdna: Fill invalid payload for failed command Newer userspace applications may read the payload of a failed command to obtain detailed error information. However, the driver and old firmware versions may not support returning advanced error information. In this case, initialize the command payload with an invalid value so userspace can detect that no detailed error information is available. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260227004841.3080241-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 23 ++++++++--------------- drivers/accel/amdxdna/amdxdna_ctx.c | 27 +++++++++++++++++++++++++++ drivers/accel/amdxdna/amdxdna_ctx.h | 3 +++ 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 25845bd5e507..afee5e667f77 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -186,13 +186,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) cmd_abo = job->cmd_bo; if (unlikely(job->job_timeout)) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT); ret = -EINVAL; goto out; } if (unlikely(!data) || unlikely(size != sizeof(u32))) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT); ret = -EINVAL; goto out; } @@ -202,7 +202,7 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) if (status == AIE2_STATUS_SUCCESS) amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); else - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR); out: aie2_sched_notify(job); @@ -244,13 +244,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) cmd_abo = job->cmd_bo; if (unlikely(job->job_timeout)) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT); ret = -EINVAL; goto out; } if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT); ret = -EINVAL; goto out; } @@ -270,19 +270,12 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) fail_cmd_idx, fail_cmd_status); if (fail_cmd_status == AIE2_STATUS_SUCCESS) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT); ret = -EINVAL; - goto out; + } else { + amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR); } - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); - if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { - struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); - - cc->error_index = fail_cmd_idx; - if (cc->error_index >= cc->command_count) - cc->error_index = 0; - } out: aie2_sched_notify(job); return ret; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 263d36072540..666dfd7b2a80 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -135,6 +135,33 @@ u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) return INVALID_CU_IDX; } +int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo, + struct amdxdna_sched_job *job, u32 cmd_idx, + enum ert_cmd_state error_state) +{ + struct amdxdna_client *client = job->hwctx->client; + struct amdxdna_cmd *cmd = abo->mem.kva; + struct amdxdna_cmd_chain *cc = NULL; + + cmd->header &= ~AMDXDNA_CMD_STATE; + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, error_state); + + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) { + cc = amdxdna_cmd_get_payload(abo, NULL); + cc->error_index = (cmd_idx < cc->command_count) ? cmd_idx : 0; + abo = amdxdna_gem_get_obj(client, cc->data[0], AMDXDNA_BO_CMD); + if (!abo) + return -EINVAL; + cmd = abo->mem.kva; + } + + memset(cmd->data, 0xff, abo->mem.size - sizeof(*cmd)); + if (cc) + amdxdna_gem_put_obj(abo); + + return 0; +} + /* * This should be called in close() and remove(). DO NOT call in other syscalls. * This guarantee that when hwctx and resources will be released, if user diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index 16c85f08f03c..fbdf9d000871 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -167,6 +167,9 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); +int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo, + struct amdxdna_sched_job *job, u32 cmd_idx, + enum ert_cmd_state error_state); void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); void amdxdna_hwctx_remove_all(struct amdxdna_client *client); From 778031e1658d206a52bf9491c91ae5d4f4a2509d Mon Sep 17 00:00:00 2001 From: Matt DeVillier Date: Thu, 26 Feb 2026 10:30:55 -0600 Subject: [PATCH 537/828] ALSA: hda/ca0132: Set HP/Speaker auto-detect default from headphone pin verb HP/Speaker auto-detect (VNID_HP_ASEL) has been off by default for every CA0132 device since the driver was added in 2012. vnode_lswitch is always initialized to 0 in ca0132_init_chip(), and no quirk or other code path enables it. As a result, headphone jack detection works only after the user manually turns on "HP/Speaker Auto Detect" in alsamixer, which is not obvious on laptops with combo jacks (e.g. Google Link, Alienware). Change the default to follow the headphone pin config: if the pin verb has presence detect enabled (no AC_DEFCFG_MISC_NO_PRESENCE) and the codec supports it (AC_PINCAP_PRES_DETECT), enable HP_ASEL by default. This lets firmware (coreboot, UEFI, etc.) express whether the headphone jack supports insertion detection. Devices with combo jacks can default to auto-detect; devices with fixed/no jack leave it off. Signed-off-by: Matt DeVillier Link: https://patch.msgid.link/20260226163055.825167-1-matt.devillier@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/ca0132.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/hda/codecs/ca0132.c b/sound/hda/codecs/ca0132.c index bf342a76807c..a0677d7da8e2 100644 --- a/sound/hda/codecs/ca0132.c +++ b/sound/hda/codecs/ca0132.c @@ -9816,6 +9816,15 @@ static void ca0132_config(struct hda_codec *codec) spec->dig_in = 0x09; break; } + + /* Default HP/Speaker auto-detect from headphone pin verb: enable if the + * pin config indicates presence detect (not AC_DEFCFG_MISC_NO_PRESENCE). + */ + if (spec->unsol_tag_hp && + (snd_hda_query_pin_caps(codec, spec->unsol_tag_hp) & AC_PINCAP_PRES_DETECT) && + !(get_defcfg_misc(snd_hda_codec_get_pincfg(codec, spec->unsol_tag_hp)) & + AC_DEFCFG_MISC_NO_PRESENCE)) + spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID] = 1; } static int ca0132_prepare_verbs(struct hda_codec *codec) From bd72a37b219b9b424bc5ec28f660ebdc129de7cb Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Fri, 27 Feb 2026 02:11:02 +0000 Subject: [PATCH 538/828] ALSA: usb-audio: Add iface reset and delay quirk for AB13X USB Audio Setting up the interface when suspended/resumeing fail on this card. Adding a reset and delay quirk will eliminate this problem. usb 1-1: New USB device found, idVendor=0624, idProduct=3d3f usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-1: Product: AB13X USB Audio usb 1-1: Manufacturer: Generic usb 1-1: SerialNumber: 20210726905926 Signed-off-by: Lianqin Hu Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/TYUPR06MB621795D087BF2D594027C235D273A@TYUPR06MB6217.apcprd06.prod.outlook.com --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c6a78efbcaa3..d54a1a44a69b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2219,6 +2219,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x05e1, 0x0480, /* Hauppauge Woodbury */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x0624, 0x3d3f, /* AB13X USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0644, 0x8043, /* TEAC UD-501/UD-501V2/UD-503/NT-503 */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY), From 068641bc9dc3d680d1ec4f6ee9199d4812041dff Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Fri, 27 Feb 2026 20:13:27 +0800 Subject: [PATCH 539/828] ALSA: hda/realtek: Add quirk for HP Pavilion 15-eh1xxx to enable mute LED The HP Pavilion 15-eh1xxx series uses the HP mainboard 88D1 with ALC245 and needs the ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT quirk to make the mute led working. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215978 Cc: Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260227121327.3751341-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 86bb22d19629..4c49f1195e1b 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6904,6 +6904,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88b3, "HP ENVY x360 Convertible 15-es0xxx", ALC245_FIXUP_HP_ENVY_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x88d1, "HP Pavilion 15-eh1xxx (mainboard 88D1)", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), SND_PCI_QUIRK(0x103c, 0x88dd, "HP Pavilion 15z-ec200", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x88eb, "HP Victus 16-e0xxx", ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED), From 05ac3846ffe53fc63e454eb195ce8a6bab7a6a88 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 27 Feb 2026 22:46:40 +0800 Subject: [PATCH 540/828] ALSA: hda/tas2781: A workaround solution to lower-vol issue among lower calibrated-impedance micro-speaker on TAS2781 On TAS2781, if the Speaker calibrated impedance is lower than default value hard-coded inside the TAS2781, it will cuase vol lower than normal. In order to fix this issue, the parameter of SineGainI need updating. Signed-off-by: Shenghao Ding Tested-by: Matthew Schwartz Link: https://patch.msgid.link/20260227144641.1243-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2781.h | 1 + .../hda/codecs/side-codecs/tas2781_hda_i2c.c | 15 +-- sound/soc/codecs/tas2781-fmwlib.c | 94 +++++++++++++++++++ 3 files changed, 98 insertions(+), 12 deletions(-) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 7c03bdc951bb..e847cf51878c 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -151,6 +151,7 @@ struct tasdevice { struct bulk_reg_val *cali_data_backup; struct bulk_reg_val alp_cali_bckp; struct tasdevice_fw *cali_data_fmw; + void *cali_specific; unsigned int dev_addr; unsigned int err_code; unsigned char cur_book; diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 74c3cf1e45e1..67240ce184e1 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -60,7 +60,6 @@ struct tas2781_hda_i2c_priv { int (*save_calibration)(struct tas2781_hda *h); int hda_chip_id; - bool skip_calibration; }; static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data) @@ -479,8 +478,7 @@ static void tasdevice_dspfw_init(void *context) /* If calibrated data occurs error, dsp will still works with default * calibrated data inside algo. */ - if (!hda_priv->skip_calibration) - hda_priv->save_calibration(tas_hda); + hda_priv->save_calibration(tas_hda); } static void tasdev_fw_ready(const struct firmware *fmw, void *context) @@ -535,7 +533,6 @@ static int tas2781_hda_bind(struct device *dev, struct device *master, void *master_data) { struct tas2781_hda *tas_hda = dev_get_drvdata(dev); - struct tas2781_hda_i2c_priv *hda_priv = tas_hda->hda_priv; struct hda_component_parent *parent = master_data; struct hda_component *comp; struct hda_codec *codec; @@ -564,14 +561,6 @@ static int tas2781_hda_bind(struct device *dev, struct device *master, break; } - /* - * Using ASUS ROG Xbox Ally X (RC73XA) UEFI calibration data - * causes audio dropouts during playback, use fallback data - * from DSP firmware as a workaround. - */ - if (codec->core.subsystem_id == 0x10431384) - hda_priv->skip_calibration = true; - guard(pm_runtime_active_auto)(dev); comp->dev = dev; @@ -643,6 +632,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) */ device_name = "TIAS2781"; hda_priv->hda_chip_id = HDA_TAS2781; + tas_hda->priv->chip_id = TAS2781; hda_priv->save_calibration = tas2781_save_calibration; tas_hda->priv->global_addr = TAS2781_GLOBAL_ADDR; } else if (strstarts(dev_name(&clt->dev), "i2c-TXNW2770")) { @@ -656,6 +646,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) "i2c-TXNW2781:00-tas2781-hda.0")) { device_name = "TXNW2781"; hda_priv->hda_chip_id = HDA_TAS2781; + tas_hda->priv->chip_id = TAS2781; hda_priv->save_calibration = tas2781_save_calibration; tas_hda->priv->global_addr = TAS2781_GLOBAL_ADDR; } else if (strstr(dev_name(&clt->dev), "INT8866")) { diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index c969eb38704e..5798d518d94c 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -32,6 +32,10 @@ #define TAS2781_YRAM1_PAGE 42 #define TAS2781_YRAM1_START_REG 88 +#define TAS2781_PG_REG TASDEVICE_REG(0x00, 0x00, 0x7c) +#define TAS2781_PG_1_0 0xA0 +#define TAS2781_PG_2_0 0xA8 + #define TAS2781_YRAM2_START_PAGE 43 #define TAS2781_YRAM2_END_PAGE 49 #define TAS2781_YRAM2_START_REG 8 @@ -98,6 +102,12 @@ struct blktyp_devidx_map { unsigned char dev_idx; }; +struct tas2781_cali_specific { + unsigned char sin_gni[4]; + int sin_gni_reg; + bool is_sin_gn_flush; +}; + static const char deviceNumber[TASDEVICE_DSP_TAS_MAX_DEVICE] = { 1, 2, 1, 2, 1, 1, 0, 2, 4, 3, 1, 2, 3, 4, 1, 2 }; @@ -2454,6 +2464,84 @@ static int tasdevice_load_data(struct tasdevice_priv *tas_priv, return ret; } +static int tas2781_cali_preproc(struct tasdevice_priv *priv, int i) +{ + struct tas2781_cali_specific *spec = priv->tasdevice[i].cali_specific; + struct calidata *cali_data = &priv->cali_data; + struct cali_reg *p = &cali_data->cali_reg_array; + unsigned char *data = cali_data->data; + int rc; + + /* + * On TAS2781, if the Speaker calibrated impedance is lower than + * default value hard-coded inside the TAS2781, it will cuase vol + * lower than normal. In order to fix this issue, the parameter of + * SineGainI need updating. + */ + if (spec == NULL) { + int k = i * (cali_data->cali_dat_sz_per_dev + 1); + int re_org, re_cal, corrected_sin_gn, pg_id; + unsigned char r0_deflt[4]; + + spec = devm_kzalloc(priv->dev, sizeof(*spec), GFP_KERNEL); + if (spec == NULL) + return -ENOMEM; + priv->tasdevice[i].cali_specific = spec; + rc = tasdevice_dev_bulk_read(priv, i, p->r0_reg, r0_deflt, 4); + if (rc < 0) { + dev_err(priv->dev, "invalid RE from %d = %d\n", i, rc); + return rc; + } + /* + * SineGainI need to be re-calculated, calculate the high 16 + * bits. + */ + re_org = r0_deflt[0] << 8 | r0_deflt[1]; + re_cal = data[k + 1] << 8 | data[k + 2]; + if (re_org > re_cal) { + rc = tasdevice_dev_read(priv, i, TAS2781_PG_REG, + &pg_id); + if (rc < 0) { + dev_err(priv->dev, "invalid PG id %d = %d\n", + i, rc); + return rc; + } + + spec->sin_gni_reg = (pg_id == TAS2781_PG_1_0) ? + TASDEVICE_REG(0, 0x1b, 0x34) : + TASDEVICE_REG(0, 0x18, 0x1c); + + rc = tasdevice_dev_bulk_read(priv, i, + spec->sin_gni_reg, + spec->sin_gni, 4); + if (rc < 0) { + dev_err(priv->dev, "wrong sinegaini %d = %d\n", + i, rc); + return rc; + } + corrected_sin_gn = re_org * ((spec->sin_gni[0] << 8) + + spec->sin_gni[1]); + corrected_sin_gn /= re_cal; + spec->sin_gni[0] = corrected_sin_gn >> 8; + spec->sin_gni[1] = corrected_sin_gn & 0xff; + + spec->is_sin_gn_flush = true; + } + } + + if (spec->is_sin_gn_flush) { + rc = tasdevice_dev_bulk_write(priv, i, spec->sin_gni_reg, + spec->sin_gni, 4); + if (rc < 0) { + dev_err(priv->dev, "update failed %d = %d\n", + i, rc); + return rc; + } + } + + return 0; +} + static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) { struct calidata *cali_data = &priv->cali_data; @@ -2469,6 +2557,12 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) } k++; + if (priv->chip_id == TAS2781) { + rc = tas2781_cali_preproc(priv, i); + if (rc < 0) + return; + } + rc = tasdevice_dev_bulk_write(priv, i, p->r0_reg, &(data[k]), 4); if (rc < 0) { dev_err(priv->dev, "chn %d r0_reg bulk_wr err = %d\n", i, rc); From d4d5633d8b19b0e745a7910aea49956b3b47900d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 27 Feb 2026 16:57:00 +0100 Subject: [PATCH 541/828] ALSA: firewire: dice: Fix printf warning with W=1 The use of snprintf() may cause a warning with W=1 due to the possibly truncated string. As the truncation doesn't really matter (and won't happen practically) in the case of dice driver, just shut it up by replacing with scnprintf(). Link: https://patch.msgid.link/20260227155705.1557224-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c index 85d265c7d544..f7a50bae4b55 100644 --- a/sound/firewire/dice/dice.c +++ b/sound/firewire/dice/dice.c @@ -122,7 +122,7 @@ static void dice_card_strings(struct snd_dice *dice) fw_csr_string(dev->config_rom + 5, CSR_VENDOR, vendor, sizeof(vendor)); strscpy(model, "?"); fw_csr_string(dice->unit->directory, CSR_MODEL, model, sizeof(model)); - snprintf(card->longname, sizeof(card->longname), + scnprintf(card->longname, sizeof(card->longname), "%s %s (serial %u) at %s, S%d", vendor, model, dev->config_rom[4] & 0x3fffff, dev_name(&dice->unit->device), 100 << dev->max_speed); From 0ed2e8bf61d6d5df1d78f4e24b682dff4c394e17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 28 Feb 2026 04:56:20 -0700 Subject: [PATCH 542/828] io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG Sync with a recent liburing fix, which corrects the comment explaining when the IORING_SETUP_TASKRUN_FLAG setup flag is valid to use. May be use with COOP_TASKRUN or DEFER_TASKRUN, not useful without either of this task_work mechanisms being used. Link: https://github.com/axboe/liburing/pull/1543 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6750c383a2ab..1ff16141c8a5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -188,7 +188,8 @@ enum io_uring_sqe_flags_bit { /* * If COOP_TASKRUN is set, get notified if task work is available for * running and a kernel transition would be needed to run it. This sets - * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + * IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN + * or DEFER_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ From 407fd8b8d8cce03856aa67329715de48b254b529 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 11 Feb 2026 19:03:03 +0100 Subject: [PATCH 543/828] KVM: remove CONFIG_KVM_GENERIC_MMU_NOTIFIER All architectures now use MMU notifier for KVM page table management. Remove the Kconfig symbol and the code that is used when it is disabled. Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/Kconfig | 1 - arch/loongarch/kvm/Kconfig | 1 - arch/mips/kvm/Kconfig | 1 - arch/powerpc/kvm/Kconfig | 4 ---- arch/powerpc/kvm/powerpc.c | 1 - arch/riscv/kvm/Kconfig | 1 - arch/s390/kvm/Kconfig | 2 -- arch/x86/kvm/Kconfig | 1 - include/linux/kvm_host.h | 7 +------ virt/kvm/Kconfig | 9 +-------- virt/kvm/kvm_main.c | 16 ---------------- 11 files changed, 2 insertions(+), 42 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 4f803fd1c99a..7d1f22fd490b 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,7 +21,6 @@ menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING - select KVM_GENERIC_MMU_NOTIFIER select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index ed4f724db774..8e5213609975 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -28,7 +28,6 @@ config KVM select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING - select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO select VIRT_XFER_TO_GUEST_WORK select SCHED_INFO diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index cc13cc35f208..b1b9a1d67758 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -23,7 +23,6 @@ config KVM select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_MMIO - select KVM_GENERIC_MMU_NOTIFIER select KVM_GENERIC_HARDWARE_ENABLING select HAVE_KVM_READONLY_MEM help diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index c9a2d50ff1b0..9a0d1c1aca6c 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -38,7 +38,6 @@ config KVM_BOOK3S_64_HANDLER config KVM_BOOK3S_PR_POSSIBLE bool select KVM_MMIO - select KVM_GENERIC_MMU_NOTIFIER config KVM_BOOK3S_HV_POSSIBLE bool @@ -81,7 +80,6 @@ config KVM_BOOK3S_64_HV tristate "KVM for POWER7 and later using hypervisor mode in host" depends on KVM_BOOK3S_64 && PPC_POWERNV select KVM_BOOK3S_HV_POSSIBLE - select KVM_GENERIC_MMU_NOTIFIER select KVM_BOOK3S_HV_PMU select CMA help @@ -203,7 +201,6 @@ config KVM_E500V2 depends on !CONTEXT_TRACKING_USER select KVM select KVM_MMIO - select KVM_GENERIC_MMU_NOTIFIER help Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -220,7 +217,6 @@ config KVM_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV - select KVM_GENERIC_MMU_NOTIFIER help Support running unmodified E500MC/E5500/E6500 guest kernels in virtual machines on E500MC/E5500/E6500 host processors. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9a89a6d98f97..3da40ea8c562 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -625,7 +625,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif case KVM_CAP_SYNC_MMU: - BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER)); r = 1; break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 77379f77840a..ec2cee0a39e0 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,7 +30,6 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO select VIRT_XFER_TO_GUEST_WORK - select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS help diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 917ac740513e..5b835bc6a194 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -28,9 +28,7 @@ config KVM select HAVE_KVM_INVALID_WAKEUPS select HAVE_KVM_NO_POLL select KVM_VFIO - select MMU_NOTIFIER select VIRT_XFER_TO_GUEST_WORK - select KVM_GENERIC_MMU_NOTIFIER select KVM_MMU_LOCKLESS_AGING help Support hosting paravirtualized guest machines using the SIE diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d916bd766c94..801bf9e520db 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,7 +20,6 @@ if VIRTUALIZATION config KVM_X86 def_tristate KVM if (KVM_INTEL != n || KVM_AMD != n) select KVM_COMMON - select KVM_GENERIC_MMU_NOTIFIER select KVM_ELIDE_TLB_FLUSH_IF_YOUNG select KVM_MMU_LOCKLESS_AGING select HAVE_KVM_IRQCHIP diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dde605cb894e..34759a262b28 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -253,7 +253,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { unsigned long attributes; }; @@ -275,7 +274,6 @@ struct kvm_gfn_range { bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -#endif enum { OUTSIDE_GUEST_MODE, @@ -849,13 +847,12 @@ struct kvm { struct hlist_head irq_ack_notifier_list; #endif -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; gfn_t mmu_invalidate_range_start; gfn_t mmu_invalidate_range_end; -#endif + struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; @@ -2118,7 +2115,6 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) @@ -2196,7 +2192,6 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; } -#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 267c7369c765..794976b88c6f 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -5,6 +5,7 @@ config KVM_COMMON bool select EVENTFD select INTERVAL_TREE + select MMU_NOTIFIER select PREEMPT_NOTIFIERS config HAVE_KVM_PFNCACHE @@ -93,24 +94,16 @@ config HAVE_KVM_PM_NOTIFIER config KVM_GENERIC_HARDWARE_ENABLING bool -config KVM_GENERIC_MMU_NOTIFIER - select MMU_NOTIFIER - bool - config KVM_ELIDE_TLB_FLUSH_IF_YOUNG - depends on KVM_GENERIC_MMU_NOTIFIER bool config KVM_MMU_LOCKLESS_AGING - depends on KVM_GENERIC_MMU_NOTIFIER bool config KVM_GENERIC_MEMORY_ATTRIBUTES - depends on KVM_GENERIC_MMU_NOTIFIER bool config KVM_GUEST_MEMFD - depends on KVM_GENERIC_MMU_NOTIFIER select XARRAY_MULTI bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 22f8a672e1fd..29ee01747347 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -502,7 +502,6 @@ void kvm_destroy_vcpus(struct kvm *kvm) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_destroy_vcpus); -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) { return container_of(mn, struct kvm, mmu_notifier); @@ -901,15 +900,6 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) return mmu_notifier_register(&kvm->mmu_notifier, current->mm); } -#else /* !CONFIG_KVM_GENERIC_MMU_NOTIFIER */ - -static int kvm_init_mmu_notifier(struct kvm *kvm) -{ - return 0; -} - -#endif /* CONFIG_KVM_GENERIC_MMU_NOTIFIER */ - #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER static int kvm_pm_notifier_call(struct notifier_block *bl, unsigned long state, @@ -1226,10 +1216,8 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); -#endif out_err_no_mmu_notifier: kvm_disable_virtualization(); out_err_no_disable: @@ -1292,7 +1280,6 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); /* * At this point, pending calls to invalidate_range_start() @@ -1311,9 +1298,6 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm->mn_active_invalidate_count = 0; else WARN_ON(kvm->mmu_invalidate_in_progress); -#else - kvm_flush_shadow_all(kvm); -#endif kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { From 70295a479da684905c18d96656d781823f418ec2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 11 Feb 2026 19:06:31 +0100 Subject: [PATCH 544/828] KVM: always define KVM_CAP_SYNC_MMU KVM_CAP_SYNC_MMU is provided by KVM's MMU notifiers, which are now always available. Move the definition from individual architectures to common code. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 10 ++++------ arch/arm64/kvm/arm.c | 1 - arch/loongarch/kvm/vm.c | 1 - arch/mips/kvm/mips.c | 1 - arch/powerpc/kvm/powerpc.c | 5 ----- arch/riscv/kvm/vm.c | 1 - arch/s390/kvm/kvm-s390.c | 1 - arch/x86/kvm/x86.c | 1 - virt/kvm/kvm_main.c | 1 + 9 files changed, 5 insertions(+), 17 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fc5736839edd..6f85e1b321dd 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1396,7 +1396,10 @@ or its flags may be modified, but it may not be resized. Memory for the region is taken starting at the address denoted by the field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including -anonymous memory, ordinary files, and hugetlbfs. +anonymous memory, ordinary files, and hugetlbfs. Changes in the backing +of the memory region are automatically reflected into the guest. +For example, an mmap() that affects the region will be made visible +immediately. Another example is madvise(MADV_DROP). On architectures that support a form of address tagging, userspace_addr must be an untagged address. @@ -1412,11 +1415,6 @@ use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it, to make a new slot read-only. In this case, writes to this memory will be posted to userspace as KVM_EXIT_MMIO exits. -When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of -the memory region are automatically reflected into the guest. For example, an -mmap() that affects the region will be made visible immediately. Another -example is madvise(MADV_DROP). - For TDX guest, deleting/moving memory region loses guest memory contents. Read only region isn't supported. Only as-id 0 is supported. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 29f0326f7e00..410ffd41fd73 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -358,7 +358,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_IOEVENTFD: case KVM_CAP_USER_MEMORY: - case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 63fd40530aa9..5282158b8122 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -118,7 +118,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: - case KVM_CAP_SYNC_MMU: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b0fb92fda4d4..29d9f630edfb 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1035,7 +1035,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: - case KVM_CAP_SYNC_MMU: case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3da40ea8c562..00302399fc37 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -623,11 +623,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!(hv_enabled && kvmppc_hv_ops->enable_nested && !kvmppc_hv_ops->enable_nested(NULL)); break; -#endif - case KVM_CAP_SYNC_MMU: - r = 1; - break; -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: r = hv_enabled; break; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 7cbd2340c190..58bce57dc55b 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -181,7 +181,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_IOEVENTFD: case KVM_CAP_USER_MEMORY: - case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_READONLY_MEM: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7a175d86cef0..bc7d6fa66eaf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -601,7 +601,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: - case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_S390_UCONTROL case KVM_CAP_S390_UCONTROL: #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3fb64905d190..a03530795707 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4805,7 +4805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: - case KVM_CAP_SYNC_MMU: case KVM_CAP_USER_NMI: case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_IOEVENTFD: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 29ee01747347..1bc1da66b4b0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4870,6 +4870,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { + case KVM_CAP_SYNC_MMU: case KVM_CAP_USER_MEMORY: case KVM_CAP_USER_MEMORY2: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: From 6996a2d2d0a64808c19c98002aeb5d9d1b2df6a4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 27 Feb 2026 03:55:35 +0000 Subject: [PATCH 545/828] udp: Unhash auto-bound connected sk from 4-tuple hash table when disconnected. Let's say we bind() an UDP socket to the wildcard address with a non-zero port, connect() it to an address, and disconnect it from the address. bind() sets SOCK_BINDPORT_LOCK on sk->sk_userlocks (but not SOCK_BINDADDR_LOCK), and connect() calls udp_lib_hash4() to put the socket into the 4-tuple hash table. Then, __udp_disconnect() calls sk->sk_prot->rehash(sk). It computes a new hash based on the wildcard address and moves the socket to a new slot in the 4-tuple hash table, leaving a garbage in the chain that no packet hits. Let's remove such a socket from 4-tuple hash table when disconnected. Note that udp_sk(sk)->udp_portaddr_hash needs to be udpated after udp_hash4_dec(hslot2) in udp_unhash4(). Fixes: 78c91ae2c6de ("ipv4/udp: Add 4-tuple hash for connected socket") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260227035547.3321327-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 014fdfdd331b..b60fad393e18 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2287,7 +2287,6 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); - udp_sk(sk)->udp_portaddr_hash = newhash; if (hslot2 != nhslot2 || rcu_access_pointer(sk->sk_reuseport_cb)) { @@ -2321,19 +2320,25 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) if (udp_hashed4(sk)) { spin_lock_bh(&hslot->lock); - udp_rehash4(udptable, sk, newhash4); - if (hslot2 != nhslot2) { - spin_lock(&hslot2->lock); - udp_hash4_dec(hslot2); - spin_unlock(&hslot2->lock); + if (inet_rcv_saddr_any(sk)) { + udp_unhash4(udptable, sk); + } else { + udp_rehash4(udptable, sk, newhash4); + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); - spin_lock(&nhslot2->lock); - udp_hash4_inc(nhslot2); - spin_unlock(&nhslot2->lock); + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); + } } spin_unlock_bh(&hslot->lock); } + + udp_sk(sk)->udp_portaddr_hash = newhash; } } EXPORT_IPV6_MOD(udp_lib_rehash); From 026dfef287c07f37d4d4eef7a0b5a4bfdb29b32d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 26 Feb 2026 16:33:59 -0800 Subject: [PATCH 546/828] tcp: give up on stronger sk_rcvbuf checks (for now) We hit another corner case which leads to TcpExtTCPRcvQDrop Connections which send RPCs in the 20-80kB range over loopback experience spurious drops. The exact conditions for most of the drops I investigated are that: - socket exchanged >1MB of data so its not completely fresh - rcvbuf is around 128kB (default, hasn't grown) - there is ~60kB of data in rcvq - skb > 64kB arrives The sum of skb->len (!) of both of the skbs (the one already in rcvq and the arriving one) is larger than rwnd. My suspicion is that this happens because __tcp_select_window() rounds the rwnd up to (1 << wscale) if less than half of the rwnd has been consumed. Eric suggests that given the number of Fixes we already have pointing to 1d2fbaad7cd8 it's probably time to give up on it, until a bigger revamp of rmem management. Also while we could risk tweaking the rwnd math, there are other drops on workloads I investigated, after the commit in question, not explained by this phenomenon. Suggested-by: Eric Dumazet Link: https://lore.kernel.org/20260225122355.585fd57b@kernel.org Fixes: 1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks") Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260227003359.2391017-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6404e53382ca..7b03f2460751 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5374,25 +5374,11 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); -/* Check if this incoming skb can be added to socket receive queues - * while satisfying sk->sk_rcvbuf limit. - * - * In theory we should use skb->truesize, but this can cause problems - * when applications use too small SO_RCVBUF values. - * When LRO / hw gro is used, the socket might have a high tp->scaling_ratio, - * allowing RWIN to be close to available space. - * Whenever the receive queue gets full, we can receive a small packet - * filling RWIN, but with a high skb->truesize, because most NIC use 4K page - * plus sk_buff metadata even when receiving less than 1500 bytes of payload. - * - * Note that we use skb->len to decide to accept or drop this packet, - * but sk->sk_rmem_alloc is the sum of all skb->truesize. - */ static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb) { unsigned int rmem = atomic_read(&sk->sk_rmem_alloc); - return rmem + skb->len <= sk->sk_rcvbuf; + return rmem <= sk->sk_rcvbuf; } static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb, From 1cc93c48b5d7add5ea038860539893ce1310d72d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 26 Feb 2026 19:34:46 -0800 Subject: [PATCH 547/828] selftests/net: packetdrill: remove tests for tcp_rcv_*big Since commit 1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks") has been reverted we need to remove the corresponding tests. Link: https://lore.kernel.org/20260227003359.2391017-1-kuba@kernel.org Link: https://patch.msgid.link/20260227033446.2596457-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_rcv_big_endseq.pkt | 44 ------------------- .../net/packetdrill/tcp_rcv_toobig.pkt | 33 -------------- 2 files changed, 77 deletions(-) delete mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt delete mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt deleted file mode 100644 index 3848b419e68c..000000000000 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - ---mss=1000 - -`./defaults.sh` - - 0 `nstat -n` - -// Establish a connection. - +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 - +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 - +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 - +0 bind(3, ..., ...) = 0 - +0 listen(3, 1) = 0 - - +0 < S 0:0(0) win 32792 - +0 > S. 0:0(0) ack 1 - +.1 < . 1:1(0) ack 1 win 257 - - +0 accept(3, ..., ...) = 4 - - +0 < P. 1:4001(4000) ack 1 win 257 - +0 > . 1:1(0) ack 4001 win 5000 - -// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW - +0 < P. 4001:54001(50000) ack 1 win 257 - +0 > . 1:1(0) ack 4001 win 5000 - -// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW - +1 < P. 5001:55001(50000) ack 1 win 257 - +0 > . 1:1(0) ack 4001 win 5000 - -// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW - +0 < P. 70001:80001(10000) ack 1 win 257 - +0 > . 1:1(0) ack 4001 win 5000 - - +0 read(4, ..., 100000) = 4000 - -// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd - +0 < P. 4001:54001(50000) ack 1 win 257 - +0 > . 1:1(0) ack 54001 win 0 - -// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. -+0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt deleted file mode 100644 index f575c0ff89da..000000000000 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - ---mss=1000 - -`./defaults.sh` - - 0 `nstat -n` - -// Establish a connection. - +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 - +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 - +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 - +0 bind(3, ..., ...) = 0 - +0 listen(3, 1) = 0 - - +0 < S 0:0(0) win 32792 - +0 > S. 0:0(0) ack 1 win 18980 - +.1 < . 1:1(0) ack 1 win 257 - - +0 accept(3, ..., ...) = 4 - - +0 < P. 1:20001(20000) ack 1 win 257 - +.04 > . 1:1(0) ack 20001 win 18000 - - +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 - +0 < P. 20001:80001(60000) ack 1 win 257 - +0 > . 1:1(0) ack 20001 win 18000 - - +0 read(4, ..., 20000) = 20000 -// A too big packet is accepted if the receive queue is empty - +0 < P. 20001:80001(60000) ack 1 win 257 - +0 > . 1:1(0) ack 80001 win 0 - From 60abb0ac11dccd6b98fd9182bc5f85b621688861 Mon Sep 17 00:00:00 2001 From: "Nikhil P. Rao" Date: Wed, 25 Feb 2026 00:00:26 +0000 Subject: [PATCH 548/828] xsk: Fix fragment node deletion to prevent buffer leak After commit b692bf9a7543 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node"), the list_node field is reused for both the xskb pool list and the buffer free list, this causes a buffer leak as described below. xp_free() checks if a buffer is already on the free list using list_empty(&xskb->list_node). When list_del() is used to remove a node from the xskb pool list, it doesn't reinitialize the node pointers. This means list_empty() will return false even after the node has been removed, causing xp_free() to incorrectly skip adding the buffer to the free list. Fix this by using list_del_init() instead of list_del() in all fragment handling paths, this ensures the list node is reinitialized after removal, allowing the list_empty() to work correctly. Fixes: b692bf9a7543 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node") Acked-by: Maciej Fijalkowski Signed-off-by: Nikhil P. Rao Link: https://patch.msgid.link/20260225000456.107806-2-nikhil.rao@amd.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 6 +++--- net/xdp/xsk.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 242e34f771cc..aefc368449d5 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -122,7 +122,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) goto out; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { - list_del(&pos->list_node); + list_del_init(&pos->list_node); xp_free(pos); } @@ -157,7 +157,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) frag = list_first_entry_or_null(&xskb->pool->xskb_list, struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->list_node); + list_del_init(&frag->list_node); ret = &frag->xdp; } @@ -168,7 +168,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - list_del(&xskb->list_node); + list_del_init(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3b46bc635c43..be882a8d473c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -186,7 +186,7 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) err = __xsk_rcv_zc(xs, pos, len, contd); if (err) goto err; - list_del(&pos->list_node); + list_del_init(&pos->list_node); } return 0; From f7387d6579d65efd490a864254101cb665f2e7a7 Mon Sep 17 00:00:00 2001 From: "Nikhil P. Rao" Date: Wed, 25 Feb 2026 00:00:27 +0000 Subject: [PATCH 549/828] xsk: Fix zero-copy AF_XDP fragment drop AF_XDP should ensure that only a complete packet is sent to application. In the zero-copy case, if the Rx queue gets full as fragments are being enqueued, the remaining fragments are dropped. For the multi-buffer case, add a check to ensure that the Rx queue has enough space for all fragments of a packet before starting to enqueue them. Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Signed-off-by: Nikhil P. Rao Link: https://patch.msgid.link/20260225000456.107806-3-nikhil.rao@amd.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index be882a8d473c..6149f6a79897 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -167,25 +167,31 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) struct xdp_buff_xsk *pos, *tmp; struct list_head *xskb_list; u32 contd = 0; + u32 num_desc; int err; - if (frags) - contd = XDP_PKT_CONTD; - - err = __xsk_rcv_zc(xs, xskb, len, contd); - if (err) - goto err; - if (likely(!frags)) + if (likely(!frags)) { + err = __xsk_rcv_zc(xs, xskb, len, contd); + if (err) + goto err; return 0; + } + contd = XDP_PKT_CONTD; + num_desc = xdp_get_shared_info_from_buff(xdp)->nr_frags + 1; + if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) { + xs->rx_queue_full++; + err = -ENOBUFS; + goto err; + } + + __xsk_rcv_zc(xs, xskb, len, contd); xskb_list = &xskb->pool->xskb_list; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { if (list_is_singular(xskb_list)) contd = 0; len = pos->xdp.data_end - pos->xdp.data; - err = __xsk_rcv_zc(xs, pos, len, contd); - if (err) - goto err; + __xsk_rcv_zc(xs, pos, len, contd); list_del_init(&pos->list_node); } From 74badb9c20b1a9c02a95c735c6d3cd6121679c93 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 26 Feb 2026 21:58:12 -0800 Subject: [PATCH 550/828] dpaa2-switch: Fix interrupt storm after receiving bad if_id in IRQ handler Commit 31a7a0bbeb00 ("dpaa2-switch: add bounds check for if_id in IRQ handler") introduces a range check for if_id to avoid an out-of-bounds access. If an out-of-bounds if_id is detected, the interrupt status is not cleared. This may result in an interrupt storm. Clear the interrupt status after detecting an out-of-bounds if_id to avoid the problem. Found by an experimental AI code review agent at Google. Fixes: 31a7a0bbeb00 ("dpaa2-switch: add bounds check for if_id in IRQ handler") Cc: Junrui Luo Signed-off-by: Guenter Roeck Reviewed-by: Ioana Ciornei Link: https://patch.msgid.link/20260227055812.1777915-1-linux@roeck-us.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index fc0e0f36186e..52c1cb9cb7e0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -1533,7 +1533,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) if_id = (status & 0xFFFF0000) >> 16; if (if_id >= ethsw->sw_attr.num_ifs) { dev_err(dev, "Invalid if_id %d in IRQ status\n", if_id); - goto out; + goto out_clear; } port_priv = ethsw->ports[if_id]; @@ -1553,6 +1553,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) dpaa2_switch_port_connect_mac(port_priv); } +out_clear: err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, status); if (err) From 101bacb303e89dc2e0640ae6a5e0fb97c4eb45bb Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 25 Feb 2026 20:32:40 +0800 Subject: [PATCH 551/828] atm: lec: fix null-ptr-deref in lec_arp_clear_vccs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzkaller reported a null-ptr-deref in lec_arp_clear_vccs(). This issue can be easily reproduced using the syzkaller reproducer. In the ATM LANE (LAN Emulation) module, the same atm_vcc can be shared by multiple lec_arp_table entries (e.g., via entry->vcc or entry->recv_vcc). When the underlying VCC is closed, lec_vcc_close() iterates over all ARP entries and calls lec_arp_clear_vccs() for each matched entry. For example, when lec_vcc_close() iterates through the hlists in priv->lec_arp_empty_ones or other ARP tables: 1. In the first iteration, for the first matched ARP entry sharing the VCC, lec_arp_clear_vccs() frees the associated vpriv (which is vcc->user_back) and sets vcc->user_back to NULL. 2. In the second iteration, for the next matched ARP entry sharing the same VCC, lec_arp_clear_vccs() is called again. It obtains a NULL vpriv from vcc->user_back (via LEC_VCC_PRIV(vcc)) and then attempts to dereference it via `vcc->pop = vpriv->old_pop`, leading to a null-ptr-deref crash. Fix this by adding a null check for vpriv before dereferencing it. If vpriv is already NULL, it means the VCC has been cleared by a previous call, so we can safely skip the cleanup and just clear the entry's vcc/recv_vcc pointers. The entire cleanup block (including vcc_release_async()) is placed inside the vpriv guard because a NULL vpriv indicates the VCC has already been fully released by a prior iteration — repeating the teardown would redundantly set flags and trigger callbacks on an already-closing socket. The Fixes tag points to the initial commit because the entry->vcc path has been vulnerable since the original code. The entry->recv_vcc path was later added by commit 8d9f73c0ad2f ("atm: fix a memory leak of vcc->user_back") with the same pattern, and both paths are fixed here. Reported-by: syzbot+72e3ea390c305de0e259@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68c95a83.050a0220.3c6139.0e5c.GAE@google.com/T/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Dan Carpenter Reviewed-by: Simon Horman Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260225123250.189289-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/atm/lec.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index a107375c0629..fb93c6e1c329 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1260,24 +1260,28 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry) struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); struct net_device *dev = (struct net_device *)vcc->proto_data; - vcc->pop = vpriv->old_pop; - if (vpriv->xoff) - netif_wake_queue(dev); - kfree(vpriv); - vcc->user_back = NULL; - vcc->push = entry->old_push; - vcc_release_async(vcc, -EPIPE); + if (vpriv) { + vcc->pop = vpriv->old_pop; + if (vpriv->xoff) + netif_wake_queue(dev); + kfree(vpriv); + vcc->user_back = NULL; + vcc->push = entry->old_push; + vcc_release_async(vcc, -EPIPE); + } entry->vcc = NULL; } if (entry->recv_vcc) { struct atm_vcc *vcc = entry->recv_vcc; struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - kfree(vpriv); - vcc->user_back = NULL; + if (vpriv) { + kfree(vpriv); + vcc->user_back = NULL; - entry->recv_vcc->push = entry->old_recv_push; - vcc_release_async(entry->recv_vcc, -EPIPE); + entry->recv_vcc->push = entry->old_recv_push; + vcc_release_async(entry->recv_vcc, -EPIPE); + } entry->recv_vcc = NULL; } } From 9197e5949a41cfb5d44a6b8a860766266340d558 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 28 Feb 2026 11:56:03 +0900 Subject: [PATCH 552/828] firewire: ohci: initialize page array to use alloc_pages_bulk() correctly The call of alloc_pages_bulk() skips to fill entries of page array when the entries already have values. While, 1394 OHCI PCI driver passes the page array without initializing. It could cause invalid state at PFN validation in vmap(). Fixes: f2ae92780ab9 ("firewire: ohci: split page allocation from dma mapping") Reported-by: John Ogness Reported-and-tested-by: Harald Arnesen Reported-and-tested-by: David Gow Closes: https://lore.kernel.org/lkml/87tsv1vig5.fsf@jogness.linutronix.de/ Signed-off-by: Takashi Sakamoto Signed-off-by: Linus Torvalds --- drivers/firewire/ohci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 1c868c1e4a49..8153d62c58f0 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -848,7 +848,7 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, { struct device *dev = ohci->card.device; unsigned int i; - struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES]; + struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES] = { NULL }; dma_addr_t dma_addrs[AR_BUFFERS]; void *vaddr; struct descriptor *d; From 147792c395db870756a0dc87ce656c75ae7ab7e8 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 26 Feb 2026 15:53:56 +0530 Subject: [PATCH 553/828] net: ti: icssg-prueth: Fix ping failure after offload mode setup when link speed is not 1G When both eth interfaces with links up are added to a bridge or hsr interface, ping fails if the link speed is not 1Gbps (e.g., 100Mbps). The issue is seen because when switching to offload (bridge/hsr) mode, prueth_emac_restart() restarts the firmware and clears DRAM with memset_io(), setting all memory to 0. This includes PORT_LINK_SPEED_OFFSET which firmware reads for link speed. The value 0 corresponds to FW_LINK_SPEED_1G (0x00), so for 1Gbps links the default value is correct and ping works. For 100Mbps links, the firmware needs FW_LINK_SPEED_100M (0x01) but gets 0 instead, causing ping to fail. The function emac_adjust_link() is called to reconfigure, but it detects no state change (emac->link is still 1, speed/duplex match PHY) so new_state remains false and icssg_config_set_speed() is never called to correct the firmware speed value. The fix resets emac->link to 0 before calling emac_adjust_link() in prueth_emac_common_start(). This forces new_state=true, ensuring icssg_config_set_speed() is called to write the correct speed value to firmware memory. Fixes: 06feac15406f ("net: ti: icssg-prueth: Fix emac link speed handling") Signed-off-by: MD Danish Anwar Link: https://patch.msgid.link/20260226102356.2141871-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 0939994c932f..42a881bee109 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -273,6 +273,14 @@ static int prueth_emac_common_start(struct prueth *prueth) if (ret) goto disable_class; + /* Reset link state to force reconfiguration in + * emac_adjust_link(). Without this, if the link was already up + * before restart, emac_adjust_link() won't detect any state + * change and will skip critical configuration like writing + * speed to firmware. + */ + emac->link = 0; + mutex_lock(&emac->ndev->phydev->lock); emac_adjust_link(emac->ndev); mutex_unlock(&emac->ndev->phydev->lock); From 9439a661c2e80485406ce2c90b107ca17858382d Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 26 Feb 2026 22:37:53 +0530 Subject: [PATCH 554/828] amd-xgbe: fix MAC_TCR_SS register width for 2.5G and 10M speeds Extend the MAC_TCR_SS (Speed Select) register field width from 2 bits to 3 bits to properly support all speed settings. The MAC_TCR register's SS field encoding requires 3 bits to represent all supported speeds: - 0x00: 10Gbps (XGMII) - 0x02: 2.5Gbps (GMII) / 100Mbps - 0x03: 1Gbps / 10Mbps - 0x06: 2.5Gbps (XGMII) - P100a only With only 2 bits, values 0x04-0x07 cannot be represented, which breaks 2.5G XGMII mode on newer platforms and causes incorrect speed select values to be programmed. Fixes: 07445f3c7ca1 ("amd-xgbe: Add support for 10 Mbps speed") Co-developed-by: Guruvendra Punugupati Signed-off-by: Guruvendra Punugupati Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260226170753.250312-1-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 711f295eb777..80c2c27ac9dc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -431,7 +431,7 @@ #define MAC_SSIR_SSINC_INDEX 16 #define MAC_SSIR_SSINC_WIDTH 8 #define MAC_TCR_SS_INDEX 29 -#define MAC_TCR_SS_WIDTH 2 +#define MAC_TCR_SS_WIDTH 3 #define MAC_TCR_TE_INDEX 0 #define MAC_TCR_TE_WIDTH 1 #define MAC_TCR_VNE_INDEX 24 From 1ac22c8eae81366101597d48360718dff9b9d980 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 23 Feb 2026 15:27:28 -0800 Subject: [PATCH 555/828] scsi: core: Fix refcount leak for tagset_refcnt This leak will cause a hang when tearing down the SCSI host. For example, iscsid hangs with the following call trace: [130120.652718] scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured PID: 2528 TASK: ffff9d0408974e00 CPU: 3 COMMAND: "iscsid" #0 [ffffb5b9c134b9e0] __schedule at ffffffff860657d4 #1 [ffffb5b9c134ba28] schedule at ffffffff86065c6f #2 [ffffb5b9c134ba40] schedule_timeout at ffffffff86069fb0 #3 [ffffb5b9c134bab0] __wait_for_common at ffffffff8606674f #4 [ffffb5b9c134bb10] scsi_remove_host at ffffffff85bfe84b #5 [ffffb5b9c134bb30] iscsi_sw_tcp_session_destroy at ffffffffc03031c4 [iscsi_tcp] #6 [ffffb5b9c134bb48] iscsi_if_recv_msg at ffffffffc0292692 [scsi_transport_iscsi] #7 [ffffb5b9c134bb98] iscsi_if_rx at ffffffffc02929c2 [scsi_transport_iscsi] #8 [ffffb5b9c134bbf0] netlink_unicast at ffffffff85e551d6 #9 [ffffb5b9c134bc38] netlink_sendmsg at ffffffff85e554ef Fixes: 8fe4ce5836e9 ("scsi: core: Fix a use-after-free") Cc: stable@vger.kernel.org Signed-off-by: Junxiao Bi Reviewed-by: Mike Christie Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260223232728.93350-1-junxiao.bi@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 60c06fa4ec32..2cfcf1f5d6a4 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -361,6 +361,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, * since we use this queue depth most of times. */ if (scsi_realloc_sdev_budget_map(sdev, depth)) { + kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags); put_device(&starget->dev); kfree(sdev); goto out; From dbd53975ed4132d161b6a97ebe785a262380182d Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Wed, 25 Feb 2026 13:56:22 +0530 Subject: [PATCH 556/828] scsi: mpi3mr: Clear reset history on ready and recheck state after timeout The driver retains reset history even after the IOC has successfully reached the READY state. That leaves stale reset information active during normal operation and can mislead recovery and diagnostics. In addition, if the IOC becomes READY just as the ready timeout loop exits, the driver still follows the failure path and may retry or report failure incorrectly. Clear reset history once READY is confirmed so driver state matches actual IOC status. After the timeout loop, recheck the IOC state and treat READY as success instead of failing. Signed-off-by: Ranjan Kumar Link: https://patch.msgid.link/20260225082622.82588-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 5875065e2849..c744210cc901 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1618,6 +1618,7 @@ retry_bring_ioc_ready: ioc_info(mrioc, "successfully transitioned to %s state\n", mpi3mr_iocstate_name(ioc_state)); + mpi3mr_clear_reset_history(mrioc); return 0; } ioc_status = readl(&mrioc->sysif_regs->ioc_status); @@ -1637,6 +1638,15 @@ retry_bring_ioc_ready: elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000; } while (elapsed_time_sec < mrioc->ready_timeout); + ioc_state = mpi3mr_get_iocstate(mrioc); + if (ioc_state == MRIOC_STATE_READY) { + ioc_info(mrioc, + "successfully transitioned to %s state after %llu seconds\n", + mpi3mr_iocstate_name(ioc_state), elapsed_time_sec); + mpi3mr_clear_reset_history(mrioc); + return 0; + } + out_failed: elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000; if ((retry < 2) && (elapsed_time_sec < (mrioc->ready_timeout - 60))) { From 80bf3b28d32b431f84f244a8469488eb6d96afbb Mon Sep 17 00:00:00 2001 From: Florian Fuchs Date: Fri, 27 Feb 2026 19:18:23 +0100 Subject: [PATCH 557/828] scsi: devinfo: Add BLIST_SKIP_IO_HINTS for Iomega ZIP The Iomega ZIP 100 (Z100P2) can't process IO Advice Hints Grouping mode page query. It immediately switches to the status phase 0xb8 after receiving the subpage code 0x05 of MODE_SENSE_10 command, which fails imm_out() and turns into DID_ERROR of this command, which leads to unusable device. This was tested with an Iomega ZIP 100 (Z100P2) connected with a StarTech PEX1P2 AX99100 PCIe parallel port card. Prior to this fix, Test Unit Ready fails and the drive can't be used: IMM: returned SCSI status b8 sd 7:0:6:0: [sdh] Test Unit Ready failed: Result: hostbyte=0x01 driverbyte=DRIVER_OK Signed-off-by: Florian Fuchs Link: https://patch.msgid.link/20260227181823.892932-1-fuchsfl@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 0dada89d8d99..68a992494b12 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -190,7 +190,7 @@ static struct { {"IBM", "2076", NULL, BLIST_NO_VPD_SIZE}, {"IBM", "2105", NULL, BLIST_RETRY_HWERROR}, {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN}, - {"IOMEGA", "ZIP", NULL, BLIST_NOTQ | BLIST_NOLUN}, + {"IOMEGA", "ZIP", NULL, BLIST_NOTQ | BLIST_NOLUN | BLIST_SKIP_IO_HINTS}, {"IOMEGA", "Io20S *F", NULL, BLIST_KEY}, {"INSITE", "Floptical F*8I", NULL, BLIST_KEY}, {"INSITE", "I325VM", NULL, BLIST_KEY}, From 14d4ac19d1895397532eec407433c5d74d9da53b Mon Sep 17 00:00:00 2001 From: Prithvi Tambewagh Date: Mon, 16 Feb 2026 11:50:02 +0530 Subject: [PATCH 558/828] scsi: target: Fix recursive locking in __configfs_open_file() In flush_write_buffer, &p->frag_sem is acquired and then the loaded store function is called, which, here, is target_core_item_dbroot_store(). This function called filp_open(), following which these functions were called (in reverse order), according to the call trace: down_read __configfs_open_file do_dentry_open vfs_open do_open path_openat do_filp_open file_open_name filp_open target_core_item_dbroot_store flush_write_buffer configfs_write_iter target_core_item_dbroot_store() tries to validate the new file path by trying to open the file path provided to it; however, in this case, the bug report shows: db_root: not a directory: /sys/kernel/config/target/dbroot indicating that the same configfs file was tried to be opened, on which it is currently working on. Thus, it is trying to acquire frag_sem semaphore of the same file of which it already holds the semaphore obtained in flush_write_buffer(), leading to acquiring the semaphore in a nested manner and a possibility of recursive locking. Fix this by modifying target_core_item_dbroot_store() to use kern_path() instead of filp_open() to avoid opening the file using filesystem-specific function __configfs_open_file(), and further modifying it to make this fix compatible. Reported-by: syzbot+f6e8174215573a84b797@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f6e8174215573a84b797 Tested-by: syzbot+f6e8174215573a84b797@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Prithvi Tambewagh Reviewed-by: Dmitry Bogdanov Link: https://patch.msgid.link/20260216062002.61937-1-activprithvi@gmail.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 17608ea39d5a..a1c91d4515bc 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -108,8 +108,8 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item, const char *page, size_t count) { ssize_t read_bytes; - struct file *fp; ssize_t r = -EINVAL; + struct path path = {}; mutex_lock(&target_devices_lock); if (target_devices) { @@ -131,17 +131,14 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item, db_root_stage[read_bytes - 1] = '\0'; /* validate new db root before accepting it */ - fp = filp_open(db_root_stage, O_RDONLY, 0); - if (IS_ERR(fp)) { + r = kern_path(db_root_stage, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); + if (r) { pr_err("db_root: cannot open: %s\n", db_root_stage); + if (r == -ENOTDIR) + pr_err("db_root: not a directory: %s\n", db_root_stage); goto unlock; } - if (!S_ISDIR(file_inode(fp)->i_mode)) { - filp_close(fp, NULL); - pr_err("db_root: not a directory: %s\n", db_root_stage); - goto unlock; - } - filp_close(fp, NULL); + path_put(&path); strscpy(db_root, db_root_stage); pr_debug("Target_Core_ConfigFS: db_root set to %s\n", db_root); From 4478e8eeb87120c11e90041864c2233238b2155a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 26 Feb 2026 11:17:49 -0800 Subject: [PATCH 559/828] lib/crypto: tests: Depend on library options rather than selecting them The convention for KUnit tests is to have the test kconfig options visible only when the code they depend on is already enabled. This way only the tests that are relevant to the particular kernel build can be enabled, either manually or via KUNIT_ALL_TESTS. Update lib/crypto/tests/Kconfig to follow that convention, i.e. depend on the corresponding library options rather than selecting them. This fixes an issue where enabling KUNIT_ALL_TESTS enabled non-test code. This does mean that it becomes a bit more difficult to enable *all* the crypto library tests (which is what I do as a maintainer of the code), since doing so will now require enabling other options that select the libraries. Regardless, we should follow the standard KUnit convention. I'll also add a .kunitconfig file that does enable all these options. Note: currently most of the crypto library options are selected by visible options in crypto/Kconfig, which can be used to enable them without too much trouble. If in the future we end up with more cases like CRYPTO_LIB_CURVE25519 which is selected only by WIREGUARD (thus making CRYPTO_LIB_CURVE25519_KUNIT_TEST effectively depend on WIREGUARD after this commit), we could consider adding a new kconfig option that enables all the library code specifically for testing. Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/r/CAMuHMdULzMdxuTVfg8_4jdgzbzjfx-PHkcgbGSthcUx_sHRNMg@mail.gmail.com Fixes: 4dcf6caddaa0 ("lib/crypto: tests: Add KUnit tests for SHA-224 and SHA-256") Fixes: 571eaeddb67d ("lib/crypto: tests: Add KUnit tests for SHA-384 and SHA-512") Fixes: 6dd4d9f7919e ("lib/crypto: tests: Add KUnit tests for Poly1305") Fixes: 66b130607908 ("lib/crypto: tests: Add KUnit tests for SHA-1 and HMAC-SHA1") Fixes: d6b6aac0cdb4 ("lib/crypto: tests: Add KUnit tests for MD5 and HMAC-MD5") Fixes: afc4e4a5f122 ("lib/crypto: tests: Migrate Curve25519 self-test to KUnit") Fixes: 6401fd334ddf ("lib/crypto: tests: Add KUnit tests for BLAKE2b") Fixes: 15c64c47e484 ("lib/crypto: tests: Add SHA3 kunit tests") Fixes: b3aed551b3fc ("lib/crypto: tests: Add KUnit tests for POLYVAL") Fixes: ed894faccb8d ("lib/crypto: tests: Add KUnit tests for ML-DSA verification") Fixes: 7246fe6cd644 ("lib/crypto: tests: Add KUnit tests for NH") Cc: stable@vger.kernel.org Reviewed-by: David Gow Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260226191749.39397-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 4970463ea0aa..0de289b429a9 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -2,10 +2,9 @@ config CRYPTO_LIB_BLAKE2B_KUNIT_TEST tristate "KUnit tests for BLAKE2b" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_BLAKE2B default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_BLAKE2B help KUnit tests for the BLAKE2b cryptographic hash function. @@ -14,71 +13,64 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST depends on KUNIT default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - # No need to select CRYPTO_LIB_BLAKE2S here, as that option doesn't + # No need to depend on CRYPTO_LIB_BLAKE2S here, as that option doesn't # exist; the BLAKE2s code is always built-in for the /dev/random driver. help KUnit tests for the BLAKE2s cryptographic hash function. config CRYPTO_LIB_CURVE25519_KUNIT_TEST tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_CURVE25519 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_CURVE25519 help KUnit tests for the Curve25519 Diffie-Hellman function. config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_MD5 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_MD5 help KUnit tests for the MD5 cryptographic hash function and its corresponding HMAC. config CRYPTO_LIB_MLDSA_KUNIT_TEST tristate "KUnit tests for ML-DSA" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_MLDSA default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_MLDSA help KUnit tests for the ML-DSA digital signature algorithm. config CRYPTO_LIB_NH_KUNIT_TEST tristate "KUnit tests for NH" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_NH default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS - select CRYPTO_LIB_NH help KUnit tests for the NH almost-universal hash function. config CRYPTO_LIB_POLY1305_KUNIT_TEST tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_POLY1305 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_POLY1305 help KUnit tests for the Poly1305 library functions. config CRYPTO_LIB_POLYVAL_KUNIT_TEST tristate "KUnit tests for POLYVAL" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_POLYVAL default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_POLYVAL help KUnit tests for the POLYVAL library functions. config CRYPTO_LIB_SHA1_KUNIT_TEST tristate "KUnit tests for SHA-1" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA1 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA1 help KUnit tests for the SHA-1 cryptographic hash function and its corresponding HMAC. @@ -87,10 +79,9 @@ config CRYPTO_LIB_SHA1_KUNIT_TEST # included, for consistency with the naming used elsewhere (e.g. CRYPTO_SHA256). config CRYPTO_LIB_SHA256_KUNIT_TEST tristate "KUnit tests for SHA-224 and SHA-256" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA256 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA256 help KUnit tests for the SHA-224 and SHA-256 cryptographic hash functions and their corresponding HMACs. @@ -99,20 +90,18 @@ config CRYPTO_LIB_SHA256_KUNIT_TEST # included, for consistency with the naming used elsewhere (e.g. CRYPTO_SHA512). config CRYPTO_LIB_SHA512_KUNIT_TEST tristate "KUnit tests for SHA-384 and SHA-512" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA512 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA512 help KUnit tests for the SHA-384 and SHA-512 cryptographic hash functions and their corresponding HMACs. config CRYPTO_LIB_SHA3_KUNIT_TEST tristate "KUnit tests for SHA-3" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA3 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA3 help KUnit tests for the SHA3 cryptographic hash and XOF functions, including SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128 and From 11439c4635edd669ae435eec308f4ab8a0804808 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Mar 2026 15:39:31 -0800 Subject: [PATCH 560/828] Linux 7.0-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e944c6e71e81..2446085983f7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From 54a86cf48eaa6d1ab5130d756b718775e81e1748 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 Feb 2026 00:25:37 +0000 Subject: [PATCH 561/828] ASoC: fsl_easrc: Fix event generation in fsl_easrc_iec958_put_bits() ALSA controls should return 1 if the value in the control changed but the control put operation fsl_easrc_iec958_put_bits() unconditionally returns 0, causing ALSA to not generate any change events. This is detected by mixer-test with large numbers of messages in the form: No event generated for Context 3 IEC958 CS5 Context 3 IEC958 CS5.0 orig 5224 read 5225, is_volatile 0 Add a suitable check. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20260205-asoc-fsl-easrc-fix-events-v1-1-39d4c766918b@kernel.org Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_easrc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c index e64a0d97afd0..5b6204923e8a 100644 --- a/sound/soc/fsl/fsl_easrc.c +++ b/sound/soc/fsl/fsl_easrc.c @@ -52,10 +52,13 @@ static int fsl_easrc_iec958_put_bits(struct snd_kcontrol *kcontrol, struct soc_mreg_control *mc = (struct soc_mreg_control *)kcontrol->private_value; unsigned int regval = ucontrol->value.integer.value[0]; + int ret; + + ret = (easrc_priv->bps_iec958[mc->regbase] != regval); easrc_priv->bps_iec958[mc->regbase] = regval; - return 0; + return ret; } static int fsl_easrc_iec958_get_bits(struct snd_kcontrol *kcontrol, From 31ddc62c1cd92e51b9db61d7954b85ae2ec224da Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 Feb 2026 00:25:38 +0000 Subject: [PATCH 562/828] ASoC: fsl_easrc: Fix event generation in fsl_easrc_iec958_set_reg() ALSA controls should return 1 if the value in the control changed but the control put operation fsl_easrc_set_reg() only returns 0 or a negative error code, causing ALSA to not generate any change events. Add a suitable check by using regmap_update_bits_check() with the underlying regmap, this is more clearly and simply correct than trying to verify that one of the generic ops is exactly equivalent to this one. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20260205-asoc-fsl-easrc-fix-events-v1-2-39d4c766918b@kernel.org Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_easrc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c index 5b6204923e8a..6c56134c60cc 100644 --- a/sound/soc/fsl/fsl_easrc.c +++ b/sound/soc/fsl/fsl_easrc.c @@ -96,14 +96,17 @@ static int fsl_easrc_set_reg(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); struct soc_mreg_control *mc = (struct soc_mreg_control *)kcontrol->private_value; + struct fsl_asrc *easrc = snd_soc_component_get_drvdata(component); unsigned int regval = ucontrol->value.integer.value[0]; + bool changed; int ret; - ret = snd_soc_component_write(component, mc->regbase, regval); - if (ret < 0) + ret = regmap_update_bits_check(easrc->regmap, mc->regbase, + GENMASK(31, 0), regval, &changed); + if (ret != 0) return ret; - return 0; + return changed; } #define SOC_SINGLE_REG_RW(xname, xreg) \ From 9351cf3fd92dc1349bb75f2f7f7324607dcf596f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 26 Feb 2026 11:01:37 +0000 Subject: [PATCH 563/828] ASoC: cs35l56: Only patch ASP registers if the DAI is part of a DAIlink Move the ASP register patches to a separate struct and apply this from the ASP DAI probe() function so that the registers are only patched if the DAI is part of a DAI link. Some systems use the ASP as a special-purpose interconnect and on these systems the ASP registers are configured by a third party (the firmware, the BIOS, or another device using the amp's secondary host control interface). If the machine driver does not hook up the ASP DAI then the ASP registers must be omitted from the patch to prevent overwriting the third party configuration. If the machine driver includes the ASP DAI in a DAI link, this implies that the machine driver and higher components (such as alsa-ucm) are taking ownership of the ASP. In this case the ASP registers are patched to known defaults and the machine driver should configure the ASP. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260226110137.1664562-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 16 +++++++++++++++- sound/soc/codecs/cs35l56.c | 8 ++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index ae1e1489b671..28f9f5940ab6 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -406,6 +406,7 @@ extern const char * const cs35l56_cal_set_status_text[3]; extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; +int cs35l56_set_asp_patch(struct cs35l56_base *cs35l56_base); int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 4707f28bfca2..af87ebae98cb 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -26,7 +26,7 @@ #include "cs35l56.h" -static const struct reg_sequence cs35l56_patch[] = { +static const struct reg_sequence cs35l56_asp_patch[] = { /* * Firmware can change these to non-defaults to satisfy SDCA. * Ensure that they are at known defaults. @@ -43,6 +43,20 @@ static const struct reg_sequence cs35l56_patch[] = { { CS35L56_ASP1TX2_INPUT, 0x00000000 }, { CS35L56_ASP1TX3_INPUT, 0x00000000 }, { CS35L56_ASP1TX4_INPUT, 0x00000000 }, +}; + +int cs35l56_set_asp_patch(struct cs35l56_base *cs35l56_base) +{ + return regmap_register_patch(cs35l56_base->regmap, cs35l56_asp_patch, + ARRAY_SIZE(cs35l56_asp_patch)); +} +EXPORT_SYMBOL_NS_GPL(cs35l56_set_asp_patch, "SND_SOC_CS35L56_SHARED"); + +static const struct reg_sequence cs35l56_patch[] = { + /* + * Firmware can change these to non-defaults to satisfy SDCA. + * Ensure that they are at known defaults. + */ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 2ff8b172b76e..3bf9e8fc34a2 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -348,6 +348,13 @@ static int cs35l56_dsp_event(struct snd_soc_dapm_widget *w, return wm_adsp_event(w, kcontrol, event); } +static int cs35l56_asp_dai_probe(struct snd_soc_dai *codec_dai) +{ + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); + + return cs35l56_set_asp_patch(&cs35l56->base); +} + static int cs35l56_asp_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); @@ -552,6 +559,7 @@ static int cs35l56_asp_dai_set_sysclk(struct snd_soc_dai *dai, } static const struct snd_soc_dai_ops cs35l56_ops = { + .probe = cs35l56_asp_dai_probe, .set_fmt = cs35l56_asp_dai_set_fmt, .set_tdm_slot = cs35l56_asp_dai_set_tdm_slot, .hw_params = cs35l56_asp_dai_hw_params, From 986841dcad257615a6e3f89231bb38e1f3506b77 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 25 Feb 2026 17:12:10 +0800 Subject: [PATCH 564/828] ASoC: rt1321: fix DMIC ch2/3 mask issue This patch fixed the DMIC ch2/3 mask missing problem. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20260225091210.3648905-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1320-sdw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index 50f65662e143..8bb7e8497c72 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c @@ -2629,7 +2629,7 @@ static int rt1320_sdw_hw_params(struct snd_pcm_substream *substream, struct sdw_port_config port_config; struct sdw_port_config dmic_port_config[2]; struct sdw_stream_runtime *sdw_stream; - int retval; + int retval, num_channels; unsigned int sampling_rate; dev_dbg(dai->dev, "%s %s", __func__, dai->name); @@ -2661,7 +2661,8 @@ static int rt1320_sdw_hw_params(struct snd_pcm_substream *substream, dmic_port_config[1].num = 10; break; case RT1321_DEV_ID: - dmic_port_config[0].ch_mask = BIT(0) | BIT(1); + num_channels = params_channels(params); + dmic_port_config[0].ch_mask = GENMASK(num_channels - 1, 0); dmic_port_config[0].num = 8; break; default: From 70eddf6a0a3fc6d3ab6f77251676da97cc7f12ae Mon Sep 17 00:00:00 2001 From: Oliver Freyermuth Date: Tue, 24 Feb 2026 20:02:24 +0100 Subject: [PATCH 565/828] ASoC: Intel: sof_sdw: Add quirk for Alienware Area 51 (2025) 0CCD SKU This adds the necessary quirk for the Alienware 18 Area 51 (2025). Complements commit 1b03391d073d ("ASoC: Intel: sof_sdw: Add quirk for Alienware Area 51 (2025) 0CCC SKU"). Signed-off-by: Oliver Freyermuth Tested-by: Oliver Freyermuth Link: https://patch.msgid.link/20260224190224.30630-1-o.freyermuth@googlemail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index f230991f5f8e..c18ec607e029 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -763,6 +763,14 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOC_SDW_CODEC_SPKR), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0CCD") + }, + .driver_data = (void *)(SOC_SDW_CODEC_SPKR), + }, /* Pantherlake devices*/ { .callback = sof_sdw_quirk_cb, From fd13fc700e3e239826a46448bf7f01847dd26f5a Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Tue, 24 Feb 2026 13:03:07 +0000 Subject: [PATCH 566/828] ASoC: amd: acp: Add ACP6.3 match entries for Cirrus Logic parts This adds some match entries for a few system configurations: cs42l43 link 0 UID 0 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs42l45 link 1 UID 0 cs35l63 link 0 UID 0 cs35l63 link 0 UID 2 cs35l63 link 0 UID 4 cs35l63 link 0 UID 6 cs42l45 link 0 UID 0 cs35l63 link 1 UID 0 cs35l63 link 1 UID 1 cs42l45 link 0 UID 0 cs35l63 link 1 UID 1 cs35l63 link 1 UID 3 cs42l45 link 1 UID 0 cs35l63 link 0 UID 0 cs35l63 link 0 UID 1 cs42l43 link 1 UID 0 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs35l63 link 0 UID 0 cs35l63 link 0 UID 2 cs35l63 link 0 UID 4 cs35l63 link 0 UID 6 cs42l43 link 0 UID 1 cs42l43b link 0 UID 1 cs42l45 link 0 UID 0 cs42l45 link 1 UID 0 Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20260224130307.526626-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/amd-acp63-acpi-match.c | 413 +++++++++++++++++++++++ 1 file changed, 413 insertions(+) diff --git a/sound/soc/amd/acp/amd-acp63-acpi-match.c b/sound/soc/amd/acp/amd-acp63-acpi-match.c index 9b6a49c051cd..1dbbaba3c75b 100644 --- a/sound/soc/amd/acp/amd-acp63-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp63-acpi-match.c @@ -30,6 +30,20 @@ static const struct snd_soc_acpi_endpoint spk_r_endpoint = { .group_id = 1 }; +static const struct snd_soc_acpi_endpoint spk_2_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 2, + .group_id = 1 +}; + +static const struct snd_soc_acpi_endpoint spk_3_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 3, + .group_id = 1 +}; + static const struct snd_soc_acpi_adr_device rt711_rt1316_group_adr[] = { { .adr = 0x000030025D071101ull, @@ -103,6 +117,345 @@ static const struct snd_soc_acpi_adr_device rt722_0_single_adr[] = { } }; +static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { + { /* Jack Playback Endpoint */ + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* DMIC Capture Endpoint */ + .num = 1, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Jack Capture Endpoint */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Speaker Playback Endpoint */ + .num = 3, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l56x4_l1u3210_adr[] = { + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x2_l0u01_adr[] = { + { + .adr = 0x00003001FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00003101FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x2_l1u01_adr[] = { + { + .adr = 0x00013001FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x2_l1u13_adr[] = { + { + .adr = 0x00013101FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013301FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x4_l0u0246_adr[] = { + { + .adr = 0x00003001FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00003201FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00003401FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00003601FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l43_l0u0_adr[] = { + { + .adr = 0x00003001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_l0u1_adr[] = { + { + .adr = 0x00003101FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43b_l0u1_adr[] = { + { + .adr = 0x00003101FA2A3B01ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_l1u0_cs35l56x4_l1u0123_adr[] = { + { + .adr = 0x00013001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l45_l0u0_adr[] = { + { + .adr = 0x00003001FA424501ull, + /* Re-use endpoints, but cs42l45 has no speaker */ + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints) - 1, + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l45" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l45_l1u0_adr[] = { + { + .adr = 0x00013001FA424501ull, + /* Re-use endpoints, but cs42l45 has no speaker */ + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints) - 1, + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l45" + } +}; + +static const struct snd_soc_acpi_link_adr acp63_cs35l56x4_l1u3210[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_l1u3210_adr), + .adr_d = cs35l56x4_l1u3210_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs35l63x4_l0u0246[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs35l63x4_l0u0246_adr), + .adr_d = cs35l63x4_l0u0246_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43_l0u1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_l0u1_adr), + .adr_d = cs42l43_l0u1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43b_l0u1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43b_l0u1_adr), + .adr_d = cs42l43b_l0u1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43_l0u0_cs35l56x4_l1u3210[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_l0u0_adr), + .adr_d = cs42l43_l0u0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_l1u3210_adr), + .adr_d = cs35l56x4_l1u3210_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43_l1u0_cs35l56x4_l1u0123[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l43_l1u0_cs35l56x4_l1u0123_adr), + .adr_d = cs42l43_l1u0_cs35l56x4_l1u0123_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l0u0[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l45_l0u0_adr), + .adr_d = cs42l45_l0u0_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l0u0_cs35l63x2_l1u01[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l45_l0u0_adr), + .adr_d = cs42l45_l0u0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l63x2_l1u01_adr), + .adr_d = cs35l63x2_l1u01_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l0u0_cs35l63x2_l1u13[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l45_l0u0_adr), + .adr_d = cs42l45_l0u0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l63x2_l1u13_adr), + .adr_d = cs35l63x2_l1u13_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l1u0[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l45_l1u0_adr), + .adr_d = cs42l45_l1u0_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l1u0_cs35l63x2_l0u01[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l45_l1u0_adr), + .adr_d = cs42l45_l1u0_adr, + }, + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs35l63x2_l0u01_adr), + .adr_d = cs35l63x2_l0u01_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l1u0_cs35l63x4_l0u0246[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l45_l1u0_adr), + .adr_d = cs42l45_l1u0_adr, + }, + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs35l63x4_l0u0246_adr), + .adr_d = cs35l63x4_l0u0246_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr acp63_rt722_only[] = { { .mask = BIT(0), @@ -135,6 +488,66 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp63_sdw_machines[] = { .links = acp63_4_in_1_sdca, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l43_l0u0_cs35l56x4_l1u3210, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l1u0_cs35l63x4_l0u0246, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l0u0_cs35l63x2_l1u01, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l0u0_cs35l63x2_l1u13, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l1u0_cs35l63x2_l0u01, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp63_cs42l43_l1u0_cs35l56x4_l1u0123, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp63_cs35l56x4_l1u3210, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs35l63x4_l0u0246, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs42l43_l0u1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs42l43b_l0u1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs42l45_l0u0, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp63_cs42l45_l1u0, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp63_sdw_machines); From ca5056f5a78ce62588878d138e8141f01d70e61b Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 26 Feb 2026 11:35:11 +0000 Subject: [PATCH 567/828] ASoC: cs35l56: Suppress pointless warning about number of GPIO pulls In cs35l56_process_xu_onchip_speaker_id() the warning that the number of pulls != number of GPIOs should only be printed if pulls are defined. Pull settings are optional because there would normally be an external resistor providing the pull. The warning would still be true if pulls are not defined, but in that case is just log noise. While we're changing that block of code, also fix the indenting of the arguments to the dev_warn(). Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260226113511.1768838-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 3bf9e8fc34a2..37909a319f88 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1625,9 +1625,9 @@ static int cs35l56_process_xu_onchip_speaker_id(struct cs35l56_private *cs35l56, if (num_pulls < 0) return num_pulls; - if (num_pulls != num_gpios) { + if (num_pulls && (num_pulls != num_gpios)) { dev_warn(cs35l56->base.dev, "%s count(%d) != %s count(%d)\n", - pull_name, num_pulls, gpio_name, num_gpios); + pull_name, num_pulls, gpio_name, num_gpios); } ret = cs35l56_check_and_save_onchip_spkid_gpios(&cs35l56->base, From 23942b71f07cc99e39d9216a5b370df494759d8c Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 23 Feb 2026 02:24:34 +0800 Subject: [PATCH 568/828] regulator: mt6363: Fix incorrect and redundant IRQ disposal in probe In mt6363_regulator_probe(), devm_add_action_or_reset() is used to automatically dispose of the IRQ mapping if the probe fails or the device is removed. The manual call to irq_dispose_mapping() in the error path was redundant as the reset action already triggers mt6363_irq_remove(). Furthermore, the manual call incorrectly passed the hardware IRQ number (info->hwirq) instead of the virtual IRQ mapping (info->virq). Remove the redundant and incorrect manual disposal. Fixes: 3c36965df808 ("regulator: Add support for MediaTek MT6363 SPMI PMIC Regulators") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260223-mt6363-v1-1-c99a2e8ac621@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/mt6363-regulator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/regulator/mt6363-regulator.c b/drivers/regulator/mt6363-regulator.c index 03af5fa53600..0aebcbda0a19 100644 --- a/drivers/regulator/mt6363-regulator.c +++ b/drivers/regulator/mt6363-regulator.c @@ -899,10 +899,8 @@ static int mt6363_regulator_probe(struct platform_device *pdev) "Failed to map IRQ%d\n", info->hwirq); ret = devm_add_action_or_reset(dev, mt6363_irq_remove, &info->virq); - if (ret) { - irq_dispose_mapping(info->hwirq); + if (ret) return ret; - } config.driver_data = info; INIT_DELAYED_WORK(&info->oc_work, mt6363_oc_irq_enable_work); From 7d0bf050a58747bb8f977a6281e63660a66d1c81 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Sat, 21 Feb 2026 08:07:12 +0000 Subject: [PATCH 569/828] smb/client: make SMB2 maperror KUnit tests a separate module Build the SMB2 maperror KUnit tests as `smb2maperror_test.ko`. Link: https://lore.kernel.org/linux-cifs/20260210081040.4156383-1-geert@linux-m68k.org/ Suggested-by: Geert Uytterhoeven Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/Makefile | 2 ++ fs/smb/client/smb2glob.h | 12 ++++++++++++ fs/smb/client/smb2maperror.c | 28 +++++++++++++++------------- fs/smb/client/smb2maperror_test.c | 12 +++++++++--- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 3abd357d6df6..26b6105f04d1 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile @@ -56,4 +56,6 @@ $(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c quiet_cmd_gen_smb2_mapping = GEN $@ cmd_gen_smb2_mapping = perl $(src)/gen_smb2_mapping $< $@ +obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o + clean-files += smb2_mapping_table.c diff --git a/fs/smb/client/smb2glob.h b/fs/smb/client/smb2glob.h index e56e4d402f13..19da74b1edab 100644 --- a/fs/smb/client/smb2glob.h +++ b/fs/smb/client/smb2glob.h @@ -46,4 +46,16 @@ enum smb2_compound_ops { #define END_OF_CHAIN 4 #define RELATED_REQUEST 8 +/* + ***************************************************************** + * Struct definitions go here + ***************************************************************** + */ + +struct status_to_posix_error { + __u32 smb2_status; + int posix_error; + char *status_string; +}; + #endif /* _SMB2_GLOB_H */ diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index cd036365201f..f4cff44e2796 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c @@ -8,7 +8,6 @@ * */ #include -#include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" #include "smb2proto.h" @@ -16,12 +15,6 @@ #include "../common/smb2status.h" #include "trace.h" -struct status_to_posix_error { - __u32 smb2_status; - int posix_error; - char *status_string; -}; - static const struct status_to_posix_error smb2_error_map_table[] = { /* * Automatically generated by the `gen_smb2_mapping` script, @@ -115,10 +108,19 @@ int __init smb2_init_maperror(void) return 0; } -#define SMB_CLIENT_KUNIT_AVAILABLE \ - ((IS_MODULE(CONFIG_CIFS) && IS_ENABLED(CONFIG_KUNIT)) || \ - (IS_BUILTIN(CONFIG_CIFS) && IS_BUILTIN(CONFIG_KUNIT))) +#if IS_ENABLED(CONFIG_SMB_KUNIT_TESTS) +/* Previous prototype for eliminating the build warning. */ +const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status); -#if SMB_CLIENT_KUNIT_AVAILABLE && IS_ENABLED(CONFIG_SMB_KUNIT_TESTS) -#include "smb2maperror_test.c" -#endif /* CONFIG_SMB_KUNIT_TESTS */ +const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status) +{ + return smb2_get_err_map(smb2_status); +} +EXPORT_SYMBOL_GPL(smb2_get_err_map_test); + +const struct status_to_posix_error *smb2_error_map_table_test = smb2_error_map_table; +EXPORT_SYMBOL_GPL(smb2_error_map_table_test); + +unsigned int smb2_error_map_num = ARRAY_SIZE(smb2_error_map_table); +EXPORT_SYMBOL_GPL(smb2_error_map_num); +#endif diff --git a/fs/smb/client/smb2maperror_test.c b/fs/smb/client/smb2maperror_test.c index 38ea6b846a99..8c47dea7a2c1 100644 --- a/fs/smb/client/smb2maperror_test.c +++ b/fs/smb/client/smb2maperror_test.c @@ -9,13 +9,18 @@ */ #include +#include "smb2glob.h" + +const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status); +extern const struct status_to_posix_error *smb2_error_map_table_test; +extern unsigned int smb2_error_map_num; static void test_cmp_map(struct kunit *test, const struct status_to_posix_error *expect) { const struct status_to_posix_error *result; - result = smb2_get_err_map(expect->smb2_status); + result = smb2_get_err_map_test(expect->smb2_status); KUNIT_EXPECT_PTR_NE(test, NULL, result); KUNIT_EXPECT_EQ(test, expect->smb2_status, result->smb2_status); KUNIT_EXPECT_EQ(test, expect->posix_error, result->posix_error); @@ -26,8 +31,8 @@ static void maperror_test_check_search(struct kunit *test) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(smb2_error_map_table); i++) - test_cmp_map(test, &smb2_error_map_table[i]); + for (i = 0; i < smb2_error_map_num; i++) + test_cmp_map(test, &smb2_error_map_table_test[i]); } static struct kunit_case maperror_test_cases[] = { @@ -43,3 +48,4 @@ static struct kunit_suite maperror_suite = { kunit_test_suite(maperror_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests of SMB2 maperror"); From 6f0402539b7da2b941a5a43754c15e53ac3cc276 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Wed, 25 Feb 2026 04:10:59 +0000 Subject: [PATCH 570/828] smb: update some doc references To make it easier to locate the documentation during development. Signed-off-by: ZhangGuoDong Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2pdu.h | 7 +++++-- fs/smb/server/smb2pdu.h | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 78bb99f29d38..30d70097fe2f 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -224,7 +224,7 @@ struct smb2_file_reparse_point_info { __le32 Tag; } __packed; -/* See MS-FSCC 2.4.21 */ +/* See MS-FSCC 2.4.26 */ struct smb2_file_id_information { __le64 VolumeSerialNumber; __u64 PersistentFileId; /* opaque endianness */ @@ -251,7 +251,10 @@ struct smb2_file_id_extd_directory_info { extern char smb2_padding[7]; -/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +/* + * See POSIX-SMB2 2.2.14.2.16 + * Link: https://gitlab.com/samba-team/smb3-posix-spec/-/blob/master/smb3_posix_extensions.md + */ struct create_posix_rsp { u32 nlink; u32 reparse_tag; diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 257c6d26df26..8b6eafb70dca 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -83,7 +83,10 @@ struct create_durable_rsp { } Data; } __packed; -/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +/* + * See POSIX-SMB2 2.2.14.2.16 + * Link: https://gitlab.com/samba-team/smb3-posix-spec/-/blob/master/smb3_posix_extensions.md + */ struct create_posix_rsp { struct create_context_hdr ccontext; __u8 Name[16]; From 48647d3f9a644d1e81af6558102d43cdb260597b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2026 10:42:30 +0100 Subject: [PATCH 571/828] slab: distinguish lock and trylock for sheaf_flush_main() sheaf_flush_main() can be called from __pcs_replace_full_main() where it's fine if the trylock fails, and pcs_flush_all() where it's not expected to and for some flush callers (when destroying the cache or memory hotremove) it would be actually a problem if it failed and left the main sheaf not flushed. The flush callers can however safely use local_lock() instead of trylock. The trylock failure should not happen in practice on !PREEMPT_RT, but can happen on PREEMPT_RT. The impact is limited in practice because when a trylock fails in the kmem_cache_destroy() path, it means someone is using the cache while destroying it, which is a bug on its own. The memory hotremove path is unlikely to be employed in a production RT config, but it's possible. To fix this, split the function into sheaf_flush_main() (using local_lock()) and sheaf_try_flush_main() (using local_trylock()) where both call __sheaf_flush_main_batch() to flush a single batch of objects. This will also allow lockdep to verify our context assumptions. The problem was raised in an off-list question by Marcelo. Fixes: 2d517aa09bbc ("slab: add opt-in caching layer of percpu sheaves") Cc: stable@vger.kernel.org Reported-by: Marcelo Tosatti Signed-off-by: Vlastimil Babka Reviewed-by: Harry Yoo Reviewed-by: Hao Li Link: https://patch.msgid.link/20260211-b4-sheaf-flush-v1-1-4e7f492f0055@suse.cz Signed-off-by: Vlastimil Babka (SUSE) --- mm/slub.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 0c906fefc31b..b1e9f16ba435 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2858,19 +2858,19 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); * object pointers are moved to a on-stack array under the lock. To bound the * stack usage, limit each batch to PCS_BATCH_MAX. * - * returns true if at least partially flushed + * Must be called with s->cpu_sheaves->lock locked, returns with the lock + * unlocked. + * + * Returns how many objects are remaining to be flushed */ -static bool sheaf_flush_main(struct kmem_cache *s) +static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s) { struct slub_percpu_sheaves *pcs; unsigned int batch, remaining; void *objects[PCS_BATCH_MAX]; struct slab_sheaf *sheaf; - bool ret = false; -next_batch: - if (!local_trylock(&s->cpu_sheaves->lock)) - return ret; + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); pcs = this_cpu_ptr(s->cpu_sheaves); sheaf = pcs->main; @@ -2888,10 +2888,37 @@ next_batch: stat_add(s, SHEAF_FLUSH, batch); - ret = true; + return remaining; +} - if (remaining) - goto next_batch; +static void sheaf_flush_main(struct kmem_cache *s) +{ + unsigned int remaining; + + do { + local_lock(&s->cpu_sheaves->lock); + + remaining = __sheaf_flush_main_batch(s); + + } while (remaining); +} + +/* + * Returns true if the main sheaf was at least partially flushed. + */ +static bool sheaf_try_flush_main(struct kmem_cache *s) +{ + unsigned int remaining; + bool ret = false; + + do { + if (!local_trylock(&s->cpu_sheaves->lock)) + return ret; + + ret = true; + remaining = __sheaf_flush_main_batch(s); + + } while (remaining); return ret; } @@ -5704,7 +5731,7 @@ alloc_empty: if (put_fail) stat(s, BARN_PUT_FAIL); - if (!sheaf_flush_main(s)) + if (!sheaf_try_flush_main(s)) return NULL; if (!local_trylock(&s->cpu_sheaves->lock)) From c35636e91e392e1540949bbc67932167cb48bc3a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 18 Feb 2026 11:58:06 +0100 Subject: [PATCH 572/828] can: bcm: fix locking for bcm_op runtime updates Commit c2aba69d0c36 ("can: bcm: add locking for bcm_op runtime updates") added a locking for some variables that can be modified at runtime when updating the sending bcm_op with a new TX_SETUP command in bcm_tx_setup(). Usually the RX_SETUP only handles and filters incoming traffic with one exception: When the RX_RTR_FRAME flag is set a predefined CAN frame is sent when a specific RTR frame is received. Therefore the rx bcm_op uses bcm_can_tx() which uses the bcm_tx_lock that was only initialized in bcm_tx_setup(). Add the missing spin_lock_init() when allocating the bcm_op in bcm_rx_setup() to handle the RTR case properly. Fixes: c2aba69d0c36 ("can: bcm: add locking for bcm_op runtime updates") Reported-by: syzbot+5b11eccc403dd1cea9f8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-can/699466e4.a70a0220.2c38d7.00ff.GAE@google.com/ Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20260218-bcm_spin_lock_init-v1-1-592634c8a5b5@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index b7324e9c955b..fd9fa072881e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1176,6 +1176,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (!op) return -ENOMEM; + spin_lock_init(&op->bcm_tx_lock); op->can_id = msg_head->can_id; op->nframes = msg_head->nframes; op->cfsiz = CFSIZ(msg_head->flags); From c77bfbdd6aac31b152ee81522cd90ad1de18738f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 26 Jan 2026 11:45:40 +0100 Subject: [PATCH 573/828] can: dummy_can: dummy_can_init(): fix packet statistics The former implementation was only counting the tx_packets value but not the tx_bytes as the skb was dropped on driver layer. Enable CAN echo support (IFF_ECHO) in dummy_can_init(), which activates the code for setting and retrieving the echo SKB and counts the tx_bytes correctly. Fixes: 816cf430e84b ("can: add dummy_can driver") Cc: Vincent Mailhol Signed-off-by: Oliver Hartkopp Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20260126104540.21024-1-socketcan@hartkopp.net [mkl: make commit message imperative] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dummy_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/dummy_can.c b/drivers/net/can/dummy_can.c index 41953655e3d3..cd23de488edc 100644 --- a/drivers/net/can/dummy_can.c +++ b/drivers/net/can/dummy_can.c @@ -241,6 +241,7 @@ static int __init dummy_can_init(void) dev->netdev_ops = &dummy_can_netdev_ops; dev->ethtool_ops = &dummy_can_ethtool_ops; + dev->flags |= IFF_ECHO; /* enable echo handling */ priv = netdev_priv(dev); priv->can.bittiming_const = &dummy_can_bittiming_const; priv->can.bitrate_max = 20 * MEGA /* BPS */; From ab3f894de216f4a62adc3b57e9191888cbf26885 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Mon, 9 Feb 2026 15:47:05 +0100 Subject: [PATCH 574/828] can: mcp251x: fix deadlock in error path of mcp251x_open The mcp251x_open() function call free_irq() in its error path with the mpc_lock mutex held. But if an interrupt already occurred the interrupt handler will be waiting for the mpc_lock and free_irq() will deadlock waiting for the handler to finish. This issue is similar to the one fixed in commit 7dd9c26bd6cf ("can: mcp251x: fix deadlock if an interrupt occurs during mcp251x_open") but for the error path. To solve this issue move the call to free_irq() after the lock is released. Setting `priv->force_quit = 1` beforehand ensure that the IRQ handler will exit right away once it acquired the lock. Signed-off-by: Alban Bedel Link: https://patch.msgid.link/20260209144706.2261954-1-alban.bedel@lht.dlh.de Fixes: bf66f3736a94 ("can: mcp251x: Move to threaded interrupts instead of workqueues.") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index fa97adf25b73..bb7782582f40 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1214,6 +1214,7 @@ static int mcp251x_open(struct net_device *net) { struct mcp251x_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; + bool release_irq = false; unsigned long flags = 0; int ret; @@ -1257,12 +1258,24 @@ static int mcp251x_open(struct net_device *net) return 0; out_free_irq: - free_irq(spi->irq, priv); + /* The IRQ handler might be running, and if so it will be waiting + * for the lock. But free_irq() must wait for the handler to finish + * so calling it here would deadlock. + * + * Setting priv->force_quit will let the handler exit right away + * without any access to the hardware. This make it safe to call + * free_irq() after the lock is released. + */ + priv->force_quit = 1; + release_irq = true; + mcp251x_hw_sleep(spi); out_close: mcp251x_power_enable(priv->transceiver, 0); close_candev(net); mutex_unlock(&priv->mcp_lock); + if (release_irq) + free_irq(spi->irq, priv); return ret; } From 5f4338e5633dc034a81000b2516a78cfb51c601d Mon Sep 17 00:00:00 2001 From: Sheetal Date: Mon, 2 Mar 2026 14:12:17 +0530 Subject: [PATCH 575/828] ALSA: hda/hdmi: Add Tegra238 HDA codec device ID Add Tegra238 HDA codec device in hda_device_id list. Signed-off-by: Sheetal Link: https://patch.msgid.link/20260302084217.3135982-1-sheetal@nvidia.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/hdmi/tegrahdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/hdmi/tegrahdmi.c b/sound/hda/codecs/hdmi/tegrahdmi.c index 5f6fe31aa202..ebb6410a4831 100644 --- a/sound/hda/codecs/hdmi/tegrahdmi.c +++ b/sound/hda/codecs/hdmi/tegrahdmi.c @@ -299,6 +299,7 @@ static const struct hda_device_id snd_hda_id_tegrahdmi[] = { HDA_CODEC_ID_MODEL(0x10de002f, "Tegra194 HDMI/DP2", MODEL_TEGRA), HDA_CODEC_ID_MODEL(0x10de0030, "Tegra194 HDMI/DP3", MODEL_TEGRA), HDA_CODEC_ID_MODEL(0x10de0031, "Tegra234 HDMI/DP", MODEL_TEGRA234), + HDA_CODEC_ID_MODEL(0x10de0032, "Tegra238 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0033, "SoC 33 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0034, "Tegra264 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0035, "SoC 35 HDMI/DP", MODEL_TEGRA234), From 968b098220e393a10488b6a5dddb302b2eaedf66 Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Fri, 13 Feb 2026 20:39:27 +0000 Subject: [PATCH 576/828] can: esd_usb: add endpoint type validation esd_usb_probe() constructs bulk pipes for two endpoints without verifying their transfer types: - usb_rcvbulkpipe(dev->udev, 1) for RX (version reply, async RX data) - usb_sndbulkpipe(dev->udev, 2) for TX (version query, CAN frames) A malformed USB device can present these endpoints with transfer types that differ from what the driver assumes, triggering the WARNING in usb_submit_urb(). Use usb_find_common_endpoints() to discover and validate the first bulk IN and bulk OUT endpoints at probe time, before any allocation. Found pipes are saved to struct esd_usb and code uses them directly instead of making pipes in place. Similar to - commit 136bed0bfd3b ("can: mcba_usb: properly check endpoint type") which established the usb_find_common_endpoints() + stored pipes pattern for CAN USB drivers. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Suggested-by: Vincent Mailhol Signed-off-by: Ziyi Guo Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20260213203927.599163-1-n7l8m4@u.northwestern.edu Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 2892a68f510a..d257440fa01f 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -272,6 +272,9 @@ struct esd_usb { struct usb_anchor rx_submitted; + unsigned int rx_pipe; + unsigned int tx_pipe; + int net_count; u32 version; int rxinitdone; @@ -537,7 +540,7 @@ static void esd_usb_read_bulk_callback(struct urb *urb) } resubmit_urb: - usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), + usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe, urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); @@ -626,9 +629,7 @@ static int esd_usb_send_msg(struct esd_usb *dev, union esd_usb_msg *msg) { int actual_length; - return usb_bulk_msg(dev->udev, - usb_sndbulkpipe(dev->udev, 2), - msg, + return usb_bulk_msg(dev->udev, dev->tx_pipe, msg, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ &actual_length, 1000); @@ -639,12 +640,8 @@ static int esd_usb_wait_msg(struct esd_usb *dev, { int actual_length; - return usb_bulk_msg(dev->udev, - usb_rcvbulkpipe(dev->udev, 1), - msg, - sizeof(*msg), - &actual_length, - 1000); + return usb_bulk_msg(dev->udev, dev->rx_pipe, msg, + sizeof(*msg), &actual_length, 1000); } static int esd_usb_setup_rx_urbs(struct esd_usb *dev) @@ -677,8 +674,7 @@ static int esd_usb_setup_rx_urbs(struct esd_usb *dev) urb->transfer_dma = buf_dma; - usb_fill_bulk_urb(urb, dev->udev, - usb_rcvbulkpipe(dev->udev, 1), + usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe, buf, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; @@ -903,7 +899,7 @@ static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb, /* hnd must not be 0 - MSB is stripped in txdone handling */ msg->tx.hnd = BIT(31) | i; /* returned in TX done message */ - usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, + usb_fill_bulk_urb(urb, dev->udev, dev->tx_pipe, buf, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ esd_usb_write_bulk_callback, context); @@ -1298,10 +1294,16 @@ done: static int esd_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep_in, *ep_out; struct esd_usb *dev; union esd_usb_msg *msg; int i, err; + err = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, + NULL, NULL); + if (err) + return err; + dev = kzalloc_obj(*dev); if (!dev) { err = -ENOMEM; @@ -1309,6 +1311,8 @@ static int esd_usb_probe(struct usb_interface *intf, } dev->udev = interface_to_usbdev(intf); + dev->rx_pipe = usb_rcvbulkpipe(dev->udev, ep_in->bEndpointAddress); + dev->tx_pipe = usb_sndbulkpipe(dev->udev, ep_out->bEndpointAddress); init_usb_anchor(&dev->rx_submitted); From 38a01c9700b0dcafe97dfa9dc7531bf4a245deff Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 17:51:17 +0100 Subject: [PATCH 577/828] can: ems_usb: ems_usb_read_bulk_callback(): check the proper length of a message When looking at the data in a USB urb, the actual_length is the size of the buffer passed to the driver, not the transfer_buffer_length which is set by the driver as the max size of the buffer. When parsing the messages in ems_usb_read_bulk_callback() properly check the size both at the beginning of parsing the message to make sure it is big enough for the expected structure, and at the end of the message to make sure we don't overflow past the end of the buffer for the next message. Cc: Vincent Mailhol Cc: Marc Kleine-Budde Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022316-answering-strainer-a5db@gregkh Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 4c219a5b139b..9b25dda7c183 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -445,6 +445,11 @@ static void ems_usb_read_bulk_callback(struct urb *urb) start = CPC_HEADER_SIZE; while (msg_count) { + if (start + CPC_MSG_HEADER_LEN > urb->actual_length) { + netdev_err(netdev, "format error\n"); + break; + } + msg = (struct ems_cpc_msg *)&ibuf[start]; switch (msg->type) { @@ -474,7 +479,7 @@ static void ems_usb_read_bulk_callback(struct urb *urb) start += CPC_MSG_HEADER_LEN + msg->length; msg_count--; - if (start > urb->transfer_buffer_length) { + if (start > urb->actual_length) { netdev_err(netdev, "format error\n"); break; } From 1e446fd0582ad8be9f6dafb115fc2e7245f9bea7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 17:30:20 +0100 Subject: [PATCH 578/828] can: ucan: Fix infinite loop from zero-length messages If a broken ucan device gets a message with the message length field set to 0, then the driver will loop for forever in ucan_read_bulk_callback(), hanging the system. If the length is 0, just skip the message and go on to the next one. This has been fixed in the kvaser_usb driver in the past in commit 0c73772cd2b8 ("can: kvaser_usb: leaf: Fix potential infinite loop in command parsers"), so there must be some broken devices out there like this somewhere. Cc: Marc Kleine-Budde Cc: Vincent Mailhol Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022319-huff-absurd-6a18@gregkh Fixes: 9f2d3eae88d2 ("can: ucan: add driver for Theobroma Systems UCAN devices") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ucan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index c79508b1c43e..0ea0ac75e42f 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -748,7 +748,7 @@ static void ucan_read_bulk_callback(struct urb *urb) len = le16_to_cpu(m->len); /* check sanity (length of content) */ - if (urb->actual_length - pos < len) { + if ((len == 0) || (urb->actual_length - pos < len)) { netdev_warn(up->netdev, "invalid message (short; no data; l:%d)\n", urb->actual_length); From 5eaad4f768266f1f17e01232ffe2ef009f8129b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 17:39:20 +0100 Subject: [PATCH 579/828] can: usb: etas_es58x: correctly anchor the urb in the read bulk callback When submitting an urb, that is using the anchor pattern, it needs to be anchored before submitting it otherwise it could be leaked if usb_kill_anchored_urbs() is called. This logic is correctly done elsewhere in the driver, except in the read bulk callback so do that here also. Cc: Vincent Mailhol Cc: Marc Kleine-Budde Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Reviewed-by: Vincent Mailhol Tested-by: Vincent Mailhol Link: https://patch.msgid.link/2026022320-poser-stiffly-9d84@gregkh Fixes: 8537257874e9 ("can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/etas_es58x/es58x_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 2d248deb69dc..b259f6109808 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -1461,12 +1461,18 @@ static void es58x_read_bulk_callback(struct urb *urb) } resubmit_urb: + usb_anchor_urb(urb, &es58x_dev->rx_urbs); ret = usb_submit_urb(urb, GFP_ATOMIC); + if (!ret) + return; + + usb_unanchor_urb(urb); + if (ret == -ENODEV) { for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) netif_device_detach(es58x_dev->netdev[i]); - } else if (ret) + } else dev_err_ratelimited(dev, "Failed resubmitting read bulk urb: %pe\n", ERR_PTR(ret)); From 7299b1b39a255f6092ce4ec0b65f66e9d6a357af Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:10:30 +0100 Subject: [PATCH 580/828] can: usb: f81604: handle short interrupt urb messages properly If an interrupt urb is received that is not the correct length, properly detect it and don't attempt to treat the data as valid. Cc: Ji-Ze Hong (Peter Hong) Cc: Marc Kleine-Budde Cc: Vincent Mailhol Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022331-opal-evaluator-a928@gregkh Fixes: 88da17436973 ("can: usb: f81604: add Fintek F81604 support") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/f81604.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index 76578063ac82..c61bd30d1765 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c @@ -620,6 +620,12 @@ static void f81604_read_int_callback(struct urb *urb) netdev_info(netdev, "%s: Int URB aborted: %pe\n", __func__, ERR_PTR(urb->status)); + if (urb->actual_length < sizeof(*data)) { + netdev_warn(netdev, "%s: short int URB: %u < %zu\n", + __func__, urb->actual_length, sizeof(*data)); + goto resubmit_urb; + } + switch (urb->status) { case 0: /* success */ break; From 51f94780720fa90c424f67e3e9784cb8ef8190e5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:10:31 +0100 Subject: [PATCH 581/828] can: usb: f81604: handle bulk write errors properly If a write urb fails then more needs to be done other than just logging the message, otherwise the transmission could be stalled. Properly increment the error counters and wake up the queues so that data will continue to flow. Cc: Ji-Ze Hong (Peter Hong) Cc: Marc Kleine-Budde Cc: Vincent Mailhol Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022334-slackness-dynamic-9195@gregkh Fixes: 88da17436973 ("can: usb: f81604: add Fintek F81604 support") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/f81604.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index c61bd30d1765..1cc927d79b6a 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c @@ -880,9 +880,27 @@ static void f81604_write_bulk_callback(struct urb *urb) if (!netif_device_present(netdev)) return; - if (urb->status) - netdev_info(netdev, "%s: Tx URB error: %pe\n", __func__, - ERR_PTR(urb->status)); + if (!urb->status) + return; + + switch (urb->status) { + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: + return; + default: + break; + } + + if (net_ratelimit()) + netdev_err(netdev, "%s: Tx URB error: %pe\n", __func__, + ERR_PTR(urb->status)); + + can_free_echo_skb(netdev, 0, NULL); + netdev->stats.tx_dropped++; + netdev->stats.tx_errors++; + + netif_wake_queue(netdev); } static void f81604_clear_reg_work(struct work_struct *work) From 952caa5da10bed22be09612433964f6877ba0dde Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:10:32 +0100 Subject: [PATCH 582/828] can: usb: f81604: correctly anchor the urb in the read bulk callback When submitting an urb, that is using the anchor pattern, it needs to be anchored before submitting it otherwise it could be leaked if usb_kill_anchored_urbs() is called. This logic is correctly done elsewhere in the driver, except in the read bulk callback so do that here also. Cc: Ji-Ze Hong (Peter Hong) Cc: Marc Kleine-Budde Cc: Vincent Mailhol Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022334-starlight-scaling-2cea@gregkh Fixes: 88da17436973 ("can: usb: f81604: add Fintek F81604 support") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/f81604.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index 1cc927d79b6a..f12318268e46 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c @@ -413,6 +413,7 @@ static void f81604_read_bulk_callback(struct urb *urb) { struct f81604_can_frame *frame = urb->transfer_buffer; struct net_device *netdev = urb->context; + struct f81604_port_priv *priv = netdev_priv(netdev); int ret; if (!netif_device_present(netdev)) @@ -445,10 +446,15 @@ static void f81604_read_bulk_callback(struct urb *urb) f81604_process_rx_packet(netdev, frame); resubmit_urb: + usb_anchor_urb(urb, &priv->urbs_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); + if (!ret) + return; + usb_unanchor_urb(urb); + if (ret == -ENODEV) netif_device_detach(netdev); - else if (ret) + else netdev_err(netdev, "%s: failed to resubmit read bulk urb: %pe\n", __func__, ERR_PTR(ret)); @@ -652,10 +658,15 @@ static void f81604_read_int_callback(struct urb *urb) f81604_handle_tx(priv, data); resubmit_urb: + usb_anchor_urb(urb, &priv->urbs_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); + if (!ret) + return; + usb_unanchor_urb(urb); + if (ret == -ENODEV) netif_device_detach(netdev); - else if (ret) + else netdev_err(netdev, "%s: failed to resubmit int urb: %pe\n", __func__, ERR_PTR(ret)); } From 4ca191cec17a997d0e3b2cd312f3a884288acc27 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 4 Feb 2026 09:01:00 -0600 Subject: [PATCH 583/828] x86/boot/sev: Move SEV decompressor variables into the .data section As part of the work to remove the dependency on calling into the decompressor code (startup_64()) for a UEFI boot, a call to rmpadjust() was removed from sev_enable() in favor of checking the value of the snp_vmpl variable. When booting through a non-UEFI path and calling startup_64(), the call to sev_enable() is performed before the BSS section is zeroed. With the removal of the rmpadjust() call and the corresponding check of the return code, the snp_vmpl variable is checked. Since the kernel is running at VMPL0, the snp_vmpl variable will not have been set and should be the default value of 0. However, since the call occurs before the BSS is zeroed, the snp_vmpl variable may not actually be zero, which will cause the guest boot to fail. Since the decompressor relocates itself, the BSS would need to be cleared both before and after the relocation, but this would, in effect, cause all of the changes to BSS variables before relocation to be lost after relocation. Instead, move the snp_vmpl variable into the .data section so that it is initialized and the value made safe during relocation. As a pre-caution against future changes, move other SEV-related decompressor variables into the .data section, too. Fixes: 68a501d7fd82 ("x86/boot: Drop redundant RMPADJUST in SEV SVSM presence check") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ard Biesheuvel Reviewed-by: Changyuan Lyu Tested-by: Kevin Hui Tested-by: Changyuan Lyu Cc: stable@vger.kernel.org Link: https://patch.msgid.link/5648b7de5b0a5d0dfef3785f9582b718678c6448.1770217260.git.thomas.lendacky@amd.com --- arch/x86/boot/compressed/sev.c | 8 ++++---- arch/x86/boot/startup/sev-shared.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index c8c1464b3a56..46b54720d91d 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -28,17 +28,17 @@ #include "sev.h" static struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); -struct ghcb *boot_ghcb; +struct ghcb *boot_ghcb __section(".data"); #undef __init #define __init #define __BOOT_COMPRESSED -u8 snp_vmpl; -u16 ghcb_version; +u8 snp_vmpl __section(".data"); +u16 ghcb_version __section(".data"); -u64 boot_svsm_caa_pa; +u64 boot_svsm_caa_pa __section(".data"); /* Include code for early handlers */ #include "../../boot/startup/sev-shared.c" diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index a0fa8bb2b945..d9ac3a929d33 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -31,7 +31,7 @@ static u32 cpuid_std_range_max __ro_after_init; static u32 cpuid_hyp_range_max __ro_after_init; static u32 cpuid_ext_range_max __ro_after_init; -bool sev_snp_needs_sfw; +bool sev_snp_needs_sfw __section(".data"); void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) From 9073428bb204d921ae15326bb7d4558d9d269aab Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 3 Feb 2026 16:24:03 -0600 Subject: [PATCH 584/828] x86/sev: Allow IBPB-on-Entry feature for SNP guests The SEV-SNP IBPB-on-Entry feature does not require a guest-side implementation. It was added in Zen5 h/w, after the first SNP Zen implementation, and thus was not accounted for when the initial set of SNP features were added to the kernel. In its abundant precaution, commit 8c29f0165405 ("x86/sev: Add SEV-SNP guest feature negotiation support") included SEV_STATUS' IBPB-on-Entry bit as a reserved bit, thereby masking guests from using the feature. Allow guests to make use of IBPB-on-Entry when supported by the hypervisor, as the bit is now architecturally defined and safe to expose. Fixes: 8c29f0165405 ("x86/sev: Add SEV-SNP guest feature negotiation support") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikunj A Dadhania Reviewed-by: Tom Lendacky Cc: stable@kernel.org Link: https://patch.msgid.link/20260203222405.4065706-2-kim.phillips@amd.com --- arch/x86/boot/compressed/sev.c | 1 + arch/x86/coco/sev/core.c | 1 + arch/x86/include/asm/msr-index.h | 5 ++++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 46b54720d91d..e468476e9e4a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -188,6 +188,7 @@ bool sev_es_check_ghcb_fault(unsigned long address) MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ MSR_AMD64_SNP_SECURE_AVIC | \ + MSR_AMD64_SNP_RESERVED_BITS19_22 | \ MSR_AMD64_SNP_RESERVED_MASK) #ifdef CONFIG_AMD_SECURE_AVIC diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 907981b94c40..7ed3da998489 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -89,6 +89,7 @@ static const char * const sev_status_feat_names[] = { [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC", + [MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT] = "IBPBOnEntry", }; /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index da5275d8eda6..6673601246b3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -740,7 +740,10 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) -#define MSR_AMD64_SNP_RESV_BIT 19 +#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19) +#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23 +#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT) +#define MSR_AMD64_SNP_RESV_BIT 24 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 #define MSR_AMD64_SAVIC_EN_BIT 0 From 2df6162785f31f1bbb598cfc3b08e4efc88f80b6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 19 Feb 2026 13:57:34 +0100 Subject: [PATCH 585/828] can: gs_usb: gs_can_open(): always configure bitrates before starting device So far the driver populated the struct can_priv::do_set_bittiming() and struct can_priv::fd::do_set_data_bittiming() callbacks. Before bringing up the interface, user space has to configure the bitrates. With these callbacks the configuration is directly forwarded into the CAN hardware. Then the interface can be brought up. An ifdown-ifup cycle (without changing the bit rates) doesn't re-configure the bitrates in the CAN hardware. This leads to a problem with the CANable-2.5 [1] firmware, which resets the configured bit rates during ifdown. To fix the problem remove both bit timing callbacks and always configure the bitrates in the struct net_device_ops::ndo_open() callback. [1] https://github.com/Elmue/CANable-2.5-firmware-Slcan-and-Candlelight Cc: stable@vger.kernel.org Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Link: https://patch.msgid.link/20260219-gs_usb-always-configure-bitrates-v2-1-671f8ba5b0a5@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 9d27d6f0c0b5..ec9a7cbbbc69 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -772,9 +772,8 @@ device_detach: } } -static int gs_usb_set_bittiming(struct net_device *netdev) +static int gs_usb_set_bittiming(struct gs_can *dev) { - struct gs_can *dev = netdev_priv(netdev); struct can_bittiming *bt = &dev->can.bittiming; struct gs_device_bittiming dbt = { .prop_seg = cpu_to_le32(bt->prop_seg), @@ -791,9 +790,8 @@ static int gs_usb_set_bittiming(struct net_device *netdev) GFP_KERNEL); } -static int gs_usb_set_data_bittiming(struct net_device *netdev) +static int gs_usb_set_data_bittiming(struct gs_can *dev) { - struct gs_can *dev = netdev_priv(netdev); struct can_bittiming *bt = &dev->can.fd.data_bittiming; struct gs_device_bittiming dbt = { .prop_seg = cpu_to_le32(bt->prop_seg), @@ -1057,6 +1055,20 @@ static int gs_can_open(struct net_device *netdev) if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) flags |= GS_CAN_MODE_HW_TIMESTAMP; + rc = gs_usb_set_bittiming(dev); + if (rc) { + netdev_err(netdev, "failed to set bittiming: %pe\n", ERR_PTR(rc)); + goto out_usb_kill_anchored_urbs; + } + + if (ctrlmode & CAN_CTRLMODE_FD) { + rc = gs_usb_set_data_bittiming(dev); + if (rc) { + netdev_err(netdev, "failed to set data bittiming: %pe\n", ERR_PTR(rc)); + goto out_usb_kill_anchored_urbs; + } + } + /* finally start device */ dev->can.state = CAN_STATE_ERROR_ACTIVE; dm.flags = cpu_to_le32(flags); @@ -1370,7 +1382,6 @@ static struct gs_can *gs_make_candev(unsigned int channel, dev->can.state = CAN_STATE_STOPPED; dev->can.clock.freq = le32_to_cpu(bt_const.fclk_can); dev->can.bittiming_const = &dev->bt_const; - dev->can.do_set_bittiming = gs_usb_set_bittiming; dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC; @@ -1394,7 +1405,6 @@ static struct gs_can *gs_make_candev(unsigned int channel, * GS_CAN_FEATURE_BT_CONST_EXT is set. */ dev->can.fd.data_bittiming_const = &dev->bt_const; - dev->can.fd.do_set_data_bittiming = gs_usb_set_data_bittiming; } if (feature & GS_CAN_FEATURE_TERMINATION) { From 7e1e6d6845329adb2da75110a061557e9c26d9b7 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 12 Feb 2026 11:30:00 -0500 Subject: [PATCH 586/828] dt-bindings: net: can: nxp,sja1000: add reference to mc-peripheral-props.yaml Add a reference to mc-peripheral-props.yaml to allow vendor-specific properties for memory access timings. Fix below CHECK_DTBS warings: arch/arm/boot/dts/nxp/imx/imx27-phytec-phycore-rdk.dtb: can@4,0 (nxp,sja1000): Unevaluated properties are not allowed ('fsl,weim-cs-timing' was unexpected) from schema $id: http://devicetree.org/schemas/net/can/nxp,sja1000.yaml Signed-off-by: Frank Li Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260212163000.1195586-1-Frank.Li@nxp.com Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml b/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml index ec0c2168e4b9..6bcfff970117 100644 --- a/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml +++ b/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml @@ -87,6 +87,7 @@ required: allOf: - $ref: can-controller.yaml# + - $ref: /schemas/memory-controllers/mc-peripheral-props.yaml - if: properties: compatible: From fb797a70108f3fda83fde6dea30bee4be7d5df8b Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 26 Feb 2026 20:52:16 -0500 Subject: [PATCH 587/828] drm: renesas: rz-du: mipi_dsi: Set DSI divider Before the MIPI DSI clock source can be configured, the target divide ratio needs to be set. Signed-off-by: Chris Brandt Reviewed-by: Biju Das Tested-by: Biju Das Fixes: 5a4326f2e3b1 ("clk: renesas: rzg2l: Remove DSI clock rate restrictions") Link: https://patch.msgid.link/20260227015216.2721504-1-chris.brandt@renesas.com Signed-off-by: Biju Das --- drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c index f74a0aa85ba8..29f2b7d24fe5 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c @@ -1122,6 +1122,7 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct rzg2l_mipi_dsi *dsi = host_to_rzg2l_mipi_dsi(host); + int bpp; int ret; if (device->lanes > dsi->num_data_lanes) { @@ -1131,7 +1132,8 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host, return -EINVAL; } - switch (mipi_dsi_pixel_format_to_bpp(device->format)) { + bpp = mipi_dsi_pixel_format_to_bpp(device->format); + switch (bpp) { case 24: break; case 18: @@ -1162,6 +1164,18 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host, drm_bridge_add(&dsi->bridge); + /* + * Report the required division ratio setting for the MIPI clock dividers. + * + * vclk * bpp = hsclk * 8 * num_lanes + * + * vclk * DSI_AB_divider = hsclk * 16 + * + * which simplifies to... + * DSI_AB_divider = bpp * 2 / num_lanes + */ + rzg2l_cpg_dsi_div_set_divider(bpp * 2 / dsi->lanes, PLL5_TARGET_DSI); + return 0; } From d973b1039ccde6b241b438d53297edce4de45b5c Mon Sep 17 00:00:00 2001 From: Sebastian Krzyszkowiak Date: Sat, 21 Feb 2026 17:28:04 +0100 Subject: [PATCH 588/828] wifi: rsi: Don't default to -EOPNOTSUPP in rsi_mac80211_config This triggers a WARN_ON in ieee80211_hw_conf_init and isn't the expected behavior from the driver - other drivers default to 0 too. Fixes: 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx drivers") Signed-off-by: Sebastian Krzyszkowiak Link: https://patch.msgid.link/20260221-rsi-config-ret-v1-1-9a8f805e2f31@puri.sm Signed-off-by: Johannes Berg --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 8c8e074a3a70..c7ae8031436a 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -668,7 +668,7 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; struct ieee80211_conf *conf = &hw->conf; - int status = -EOPNOTSUPP; + int status = 0; mutex_lock(&common->mutex); From a8df7892a9f42b2e2d5851f8835c734bd7fe8ad4 Mon Sep 17 00:00:00 2001 From: Sheetal Date: Mon, 2 Mar 2026 14:23:22 +0530 Subject: [PATCH 589/828] ASoC: dt-bindings: tegra: Add compatible for Tegra238 sound card Tegra238 requires different PLLA and PLLA_OUT0 clock rates compared to other Tegra platforms. Add Tegra238 compatible string to the APE tegra-audio-graph-card bindings. Signed-off-by: Sheetal Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260302085323.3139571-2-sheetal@nvidia.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml index da89523ccf5f..92bc3ef56f2c 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml @@ -23,6 +23,7 @@ properties: enum: - nvidia,tegra210-audio-graph-card - nvidia,tegra186-audio-graph-card + - nvidia,tegra238-audio-graph-card - nvidia,tegra264-audio-graph-card clocks: From 2d85ecd6fb0eb2fee0ffa040ec1ddea57b09bc38 Mon Sep 17 00:00:00 2001 From: Franz Schnyder Date: Wed, 18 Feb 2026 11:25:14 +0100 Subject: [PATCH 590/828] regulator: pf9453: Respect IRQ trigger settings from firmware The datasheet specifies, that the IRQ_B pin is pulled low when any unmasked interrupt bit status is changed, and it is released high once the application processor reads the INT1 register. As it specifies a level-low behavior, it should not force a falling-edge interrupt. Remove the IRQF_TRIGGER_FALLING to not force the falling-edge interrupt and instead rely on the flag from the device tree. Fixes: 0959b6706325 ("regulator: pf9453: add PMIC PF9453 support") Cc: stable@vger.kernel.org Signed-off-by: Franz Schnyder Link: https://patch.msgid.link/20260218102518.238943-2-fra.schnyder@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/pf9453-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/pf9453-regulator.c b/drivers/regulator/pf9453-regulator.c index 779a6fdb0574..eed3055d1c1c 100644 --- a/drivers/regulator/pf9453-regulator.c +++ b/drivers/regulator/pf9453-regulator.c @@ -809,7 +809,7 @@ static int pf9453_i2c_probe(struct i2c_client *i2c) } ret = devm_request_threaded_irq(pf9453->dev, pf9453->irq, NULL, pf9453_irq_handler, - (IRQF_TRIGGER_FALLING | IRQF_ONESHOT), + IRQF_ONESHOT, "pf9453-irq", pf9453); if (ret) return dev_err_probe(pf9453->dev, ret, "Failed to request IRQ: %d\n", pf9453->irq); From e176ad7b57a1a15ece213251b7f3103bd929e26c Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 2 Mar 2026 13:24:52 +0100 Subject: [PATCH 591/828] dt-bindings: hwmon: sl28cpld: Drop sa67mcu compatible I was just informed that this product is discontinued (without being ever released to the market). Pull the plug and let's not waste any more maintainers time and revert commit 0f6eae86e626 ("dt-bindings: hwmon: sl28cpld: add sa67mcu compatible"). Acked-by: Conor Dooley Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20260302122540.1377444-8-mwalle@kernel.org Signed-off-by: Guenter Roeck --- .../devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml b/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml index 966b221b6caa..5803a1770cad 100644 --- a/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml +++ b/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml @@ -16,7 +16,6 @@ description: | properties: compatible: enum: - - kontron,sa67mcu-hwmon - kontron,sl28cpld-fan reg: From 2e3649e237237258a08d75afef96648dd2b379f7 Mon Sep 17 00:00:00 2001 From: Julian Orth Date: Sun, 1 Mar 2026 13:34:42 +0100 Subject: [PATCH 592/828] drm/syncobj: Fix handle <-> fd ioctls with dirty stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider the following application: #include #include #include #include int main(void) { int fd = open("/dev/dri/renderD128", O_RDWR); struct drm_syncobj_create arg1; ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &arg1); struct drm_syncobj_handle arg2; memset(&arg2, 1, sizeof(arg2)); // simulate dirty stack arg2.handle = arg1.handle; arg2.flags = 0; arg2.fd = 0; arg2.pad = 0; // arg2.point = 0; // userspace is required to set point to 0 ioctl(fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &arg2); } The last ioctl returns EINVAL because args->point is not 0. However, userspace developed against older kernel versions is not aware of the new point field and might therefore not initialize it. The correct check would be if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE) return -EINVAL; However, there might already be userspace that relies on this not returning an error as long as point == 0. Therefore use the more lenient check. Fixes: c2d3a7300695 ("drm/syncobj: Extend EXPORT_SYNC_FILE for timeline syncobjs") Signed-off-by: Julian Orth Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20260301-point-v1-1-21fc5fd98614@gmail.com --- drivers/gpu/drm/drm_syncobj.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 250734dee928..49eccb43ce63 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -875,7 +875,7 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, return drm_syncobj_export_sync_file(file_private, args->handle, point, &args->fd); - if (args->point) + if (point) return -EINVAL; return drm_syncobj_handle_to_fd(file_private, args->handle, @@ -909,7 +909,7 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, args->handle, point); - if (args->point) + if (point) return -EINVAL; return drm_syncobj_fd_to_handle(file_private, args->fd, From cdc8a1e11f4d5b480ec750e28010c357185b95a6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 14 Jan 2026 16:45:46 -0800 Subject: [PATCH 593/828] drm/xe: Do not preempt fence signaling CS instructions If a batch buffer is complete, it makes little sense to preempt the fence signaling instructions in the ring, as the largest portion of the work (the batch buffer) is already done and fence signaling consists of only a few instructions. If these instructions are preempted, the GuC would need to perform a context switch just to signal the fence, which is costly and delays fence signaling. Avoid this scenario by disabling preemption immediately after the BB start instruction and re-enabling it after executing the fence signaling instructions. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Daniele Ceraolo Spurio Cc: Carlos Santa Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260115004546.58060-1-matthew.brost@intel.com (cherry picked from commit 2bcbf2dcde0c839a73af664a3c77d4e77d58a3eb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 248620b0901d..53d420d72164 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -280,6 +280,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -345,6 +348,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -397,6 +403,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + i = emit_render_cache_flush(job, dw, i); if (job->user_fence.used) From e377182f0266f46f02d01838e6bde67b9dac0d66 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 25 Feb 2026 01:34:49 +0000 Subject: [PATCH 594/828] drm/xe/configfs: Free ctx_restore_mid_bb in release ctx_restore_mid_bb memory is allocated in wa_bb_store(), but xe_config_device_release() only frees ctx_restore_post_bb. Free ctx_restore_mid_bb[0].cs as well to avoid leaking the allocation when the configfs device is removed. Fixes: b30d5de3d40c ("drm/xe/configfs: Add mid context restore bb") Signed-off-by: Shuicheng Lin Reviewed-by: Nitin Gote Link: https://patch.msgid.link/20260225013448.3547687-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper (cherry picked from commit a235e7d0098337c3f2d1e8f3610c719a589e115f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index c59b1414df22..7fd07d1280bb 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -830,6 +830,7 @@ static void xe_config_device_release(struct config_item *item) mutex_destroy(&dev->lock); + kfree(dev->config.ctx_restore_mid_bb[0].cs); kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); } From 99f9b5343cae80eb0dfe050baf6c86d722b3ba2e Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 26 Feb 2026 22:26:58 +0100 Subject: [PATCH 595/828] drm/xe/queue: Call fini on exec queue creation fail Every call to queue init should have a corresponding fini call. Skipping this would mean skipping removal of the queue from GuC list (which is part of guc_id allocation). A damaged queue stored in exec_queue_lookup list would lead to invalid memory reference, sooner or later. Call fini to free guc_id. This must be done before any internal LRCs are freed. Since the finalization with this extra call became very similar to __xe_exec_queue_fini(), reuse that. To make this reuse possible, alter xe_lrc_put() so it can survive NULL parameters, like other similar functions. v2: Reuse _xe_exec_queue_fini(). Make xe_lrc_put() aware of NULLs. Fixes: 3c1fa4aa60b1 ("drm/xe: Move queue init before LRC creation") Signed-off-by: Tomasz Lis Reviewed-by: Matthew Brost (v1) Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260226212701.2937065-2-tomasz.lis@intel.com (cherry picked from commit 393e5fea6f7d7054abc2c3d97a4cfe8306cd6079) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 +++++++++++------------ drivers/gpu/drm/xe/xe_lrc.h | 3 ++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ddae7fcfc97..8ecdf949f9e4 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -266,6 +266,16 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) { int i, err; @@ -320,21 +330,10 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) return 0; err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); + __xe_exec_queue_fini(q); return err; } -static void __xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - q->ops->fini(q); - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); -} - struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index c307a3fd9ea2..c1c615447c85 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -75,7 +75,8 @@ static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc) */ static inline void xe_lrc_put(struct xe_lrc *lrc) { - kref_put(&lrc->refcount, xe_lrc_destroy); + if (lrc) + kref_put(&lrc->refcount, xe_lrc_destroy); } /** From da46b5dfef48658d03347cda21532bcdbb521e67 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 1 Mar 2026 16:22:07 -0800 Subject: [PATCH 596/828] blktrace: fix __this_cpu_read/write in preemptible context tracing_record_cmdline() internally uses __this_cpu_read() and __this_cpu_write() on the per-CPU variable trace_cmdline_save, and trace_save_cmdline() explicitly asserts preemption is disabled via lockdep_assert_preemption_disabled(). These operations are only safe when preemption is off, as they were designed to be called from the scheduler context (probe_wakeup_sched_switch() / probe_wakeup()). __blk_add_trace() was calling tracing_record_cmdline(current) early in the blk_tracer path, before ring buffer reservation, from process context where preemption is fully enabled. This triggers the following using blktests/blktrace/002: blktrace/002 (blktrace ftrace corruption with sysfs trace) [failed] runtime 0.367s ... 0.437s something found in dmesg: [ 81.211018] run blktests blktrace/002 at 2026-02-25 22:24:33 [ 81.239580] null_blk: disk nullb1 created [ 81.357294] BUG: using __this_cpu_read() in preemptible [00000000] code: dd/2516 [ 81.362842] caller is tracing_record_cmdline+0x10/0x40 [ 81.362872] CPU: 16 UID: 0 PID: 2516 Comm: dd Tainted: G N 7.0.0-rc1lblk+ #84 PREEMPT(full) [ 81.362877] Tainted: [N]=TEST [ 81.362878] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 [ 81.362881] Call Trace: [ 81.362884] [ 81.362886] dump_stack_lvl+0x8d/0xb0 ... (See '/mnt/sda/blktests/results/nodev/blktrace/002.dmesg' for the entire message) [ 81.211018] run blktests blktrace/002 at 2026-02-25 22:24:33 [ 81.239580] null_blk: disk nullb1 created [ 81.357294] BUG: using __this_cpu_read() in preemptible [00000000] code: dd/2516 [ 81.362842] caller is tracing_record_cmdline+0x10/0x40 [ 81.362872] CPU: 16 UID: 0 PID: 2516 Comm: dd Tainted: G N 7.0.0-rc1lblk+ #84 PREEMPT(full) [ 81.362877] Tainted: [N]=TEST [ 81.362878] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 [ 81.362881] Call Trace: [ 81.362884] [ 81.362886] dump_stack_lvl+0x8d/0xb0 [ 81.362895] check_preemption_disabled+0xce/0xe0 [ 81.362902] tracing_record_cmdline+0x10/0x40 [ 81.362923] __blk_add_trace+0x307/0x5d0 [ 81.362934] ? lock_acquire+0xe0/0x300 [ 81.362940] ? iov_iter_extract_pages+0x101/0xa30 [ 81.362959] blk_add_trace_bio+0x106/0x1e0 [ 81.362968] submit_bio_noacct_nocheck+0x24b/0x3a0 [ 81.362979] ? lockdep_init_map_type+0x58/0x260 [ 81.362988] submit_bio_wait+0x56/0x90 [ 81.363009] __blkdev_direct_IO_simple+0x16c/0x250 [ 81.363026] ? __pfx_submit_bio_wait_endio+0x10/0x10 [ 81.363038] ? rcu_read_lock_any_held+0x73/0xa0 [ 81.363051] blkdev_read_iter+0xc1/0x140 [ 81.363059] vfs_read+0x20b/0x330 [ 81.363083] ksys_read+0x67/0xe0 [ 81.363090] do_syscall_64+0xbf/0xf00 [ 81.363102] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 81.363106] RIP: 0033:0x7f281906029d [ 81.363111] Code: 31 c0 e9 c6 fe ff ff 50 48 8d 3d 66 63 0a 00 e8 59 ff 01 00 66 0f 1f 84 00 00 00 00 00 80 3d 41 33 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 48 83 ec [ 81.363113] RSP: 002b:00007ffca127dd48 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 81.363120] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f281906029d [ 81.363122] RDX: 0000000000001000 RSI: 0000559f8bfae000 RDI: 0000000000000000 [ 81.363123] RBP: 0000000000001000 R08: 0000002863a10a81 R09: 00007f281915f000 [ 81.363124] R10: 00007f2818f77b60 R11: 0000000000000246 R12: 0000559f8bfae000 [ 81.363126] R13: 0000000000000000 R14: 0000000000000000 R15: 000000000000000a [ 81.363142] The same BUG fires from blk_add_trace_plug(), blk_add_trace_unplug(), and blk_add_trace_rq() paths as well. The purpose of tracing_record_cmdline() is to cache the task->comm for a given PID so that the trace can later resolve it. It is only meaningful when a trace event is actually being recorded. Ring buffer reservation via ring_buffer_lock_reserve() disables preemption, and preemption remains disabled until the event is committed :- __blk_add_trace() __trace_buffer_lock_reserve() __trace_buffer_lock_reserve() ring_buffer_lock_reserve() preempt_disable_notrace(); <--- With this fix blktests for blktrace pass: blktests (master) # ./check blktrace blktrace/001 (blktrace zone management command tracing) [passed] runtime 3.650s ... 3.647s blktrace/002 (blktrace ftrace corruption with sysfs trace) [passed] runtime 0.411s ... 0.384s Fixes: 7ffbd48d5cab ("tracing: Cache comms only after an event occurred") Reported-by: Shinichiro Kawasaki Suggested-by: Steven Rostedt Signed-off-by: Chaitanya Kulkarni Reviewed-by: Steven Rostedt (Google) Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e6988929ead2..18a9b84cc768 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -383,8 +383,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, cpu = raw_smp_processor_id(); if (blk_tracer) { - tracing_record_cmdline(current); - buffer = blk_tr->array_buffer.buffer; trace_ctx = tracing_gen_ctx_flags(0); switch (bt->version) { @@ -419,6 +417,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (!event) return; + tracing_record_cmdline(current); switch (bt->version) { case 1: record_blktrace_event(ring_buffer_event_data(event), From c36e28becd0586ac98318fd335e5e91d19cd2623 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 2 Mar 2026 14:32:04 +0000 Subject: [PATCH 597/828] io_uring/net: reject SEND_VECTORIZED when unsupported IORING_SEND_VECTORIZED with registered buffers is not implemented but could be. Don't silently ignore the flag in this case but reject it with an error. It only affects sendzc as normal sends don't support registered buffers. Fixes: 6f02527729bd3 ("io_uring/net: Allow to do vectorized send") Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index 8576c6cb2236..d27adbe3f20b 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -375,6 +375,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) kmsg->msg.msg_namelen = addr_len; } if (sr->flags & IORING_RECVSEND_FIXED_BUF) { + if (sr->flags & IORING_SEND_VECTORIZED) + return -EINVAL; req->flags |= REQ_F_IMPORT_BUFFER; return 0; } From 539d1b47e935e8384977dd7e5cec370c08b7a644 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 1 Mar 2026 18:29:43 +0530 Subject: [PATCH 598/828] block: break pcpu_alloc_mutex dependency on freeze_lock While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex after ->freeze_lock is acquired or queue is frozen. This potentially creates a circular dependency involving ->fs_reclaim if reclaim is triggered simultaneously in a code path which first acquires ->pcpu_ alloc_mutex. As the queue is already frozen while nr_hw_queue update allocates tagsets, the reclaim can't forward progress and thus it could cause a potential deadlock as reported in lockdep splat[1]. Fix this by pre-allocating tagset tags before we freeze queue during nr_hw_queue update. Later the allocated tagset tags could be safely installed and used after queue is frozen. Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1] Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Tested-by: Yi Zhang Reviewed-by: Yu Kuai [axboe: fix brace style issue] Signed-off-by: Jens Axboe --- block/blk-mq.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d5602ff62c7c..0b182013016a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set) } } -static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, - int new_nr_hw_queues) +static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags( + struct blk_mq_tag_set *set, + int new_nr_hw_queues) { struct blk_mq_tags **new_tags; int i; if (set->nr_hw_queues >= new_nr_hw_queues) - goto done; + return NULL; new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); if (!new_tags) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (set->tags) memcpy(new_tags, set->tags, set->nr_hw_queues * sizeof(*set->tags)); - kfree(set->tags); - set->tags = new_tags; for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { - if (!__blk_mq_alloc_map_and_rqs(set, i)) { - while (--i >= set->nr_hw_queues) - __blk_mq_free_map_and_rqs(set, i); - return -ENOMEM; + if (blk_mq_is_shared_tags(set->flags)) { + new_tags[i] = set->shared_tags; + } else { + new_tags[i] = blk_mq_alloc_map_and_rqs(set, i, + set->queue_depth); + if (!new_tags[i]) + goto out_unwind; } cond_resched(); } -done: - set->nr_hw_queues = new_nr_hw_queues; - return 0; + return new_tags; +out_unwind: + while (--i >= set->nr_hw_queues) { + if (!blk_mq_is_shared_tags(set->flags)) + blk_mq_free_map_and_rqs(set, new_tags[i], i); + } + kfree(new_tags); + return ERR_PTR(-ENOMEM); } /* @@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl; + struct blk_mq_tags **new_tags; bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues); + if (IS_ERR(new_tags)) + goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue_nomemsave(q); queues_frozen = true; - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) - goto switch_back; + if (new_tags) { + kfree(set->tags); + set->tags = new_tags; + } + set->nr_hw_queues = nr_hw_queues; fallback: blk_mq_update_queue_map(set); From b570f37a2ce480be26c665345c5514686a8a0274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 10 Feb 2026 12:56:53 +0100 Subject: [PATCH 599/828] mm: Fix a hmm_range_fault() livelock / starvation problem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If hmm_range_fault() fails a folio_trylock() in do_swap_page, trying to acquire the lock of a device-private folio for migration, to ram, the function will spin until it succeeds grabbing the lock. However, if the process holding the lock is depending on a work item to be completed, which is scheduled on the same CPU as the spinning hmm_range_fault(), that work item might be starved and we end up in a livelock / starvation situation which is never resolved. This can happen, for example if the process holding the device-private folio lock is stuck in migrate_device_unmap()->lru_add_drain_all() sinc lru_add_drain_all() requires a short work-item to be run on all online cpus to complete. A prerequisite for this to happen is: a) Both zone device and system memory folios are considered in migrate_device_unmap(), so that there is a reason to call lru_add_drain_all() for a system memory folio while a folio lock is held on a zone device folio. b) The zone device folio has an initial mapcount > 1 which causes at least one migration PTE entry insertion to be deferred to try_to_migrate(), which can happen after the call to lru_add_drain_all(). c) No or voluntary only preemption. This all seems pretty unlikely to happen, but indeed is hit by the "xe_exec_system_allocator" igt test. Resolve this by waiting for the folio to be unlocked if the folio_trylock() fails in do_swap_page(). Rename migration_entry_wait_on_locked() to softleaf_entry_wait_unlock() and update its documentation to indicate the new use-case. Future code improvements might consider moving the lru_add_drain_all() call in migrate_device_unmap() to be called *after* all pages have migration entries inserted. That would eliminate also b) above. v2: - Instead of a cond_resched() in hmm_range_fault(), eliminate the problem by waiting for the folio to be unlocked in do_swap_page() (Alistair Popple, Andrew Morton) v3: - Add a stub migration_entry_wait_on_locked() for the !CONFIG_MIGRATION case. (Kernel Test Robot) v4: - Rename migrate_entry_wait_on_locked() to softleaf_entry_wait_on_locked() and update docs (Alistair Popple) v5: - Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION version of softleaf_entry_wait_on_locked(). - Modify wording around function names in the commit message (Andrew Morton) Suggested-by: Alistair Popple Fixes: 1afaeb8293c9 ("mm/migrate: Trylock device page in do_swap_page") Cc: Ralph Campbell Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Andrew Morton Cc: Matthew Brost Cc: John Hubbard Cc: Alistair Popple Cc: linux-mm@kvack.org Cc: Signed-off-by: Thomas Hellström Cc: # v6.15+ Reviewed-by: John Hubbard #v3 Reviewed-by: Alistair Popple Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com (cherry picked from commit a69d1ab971a624c6f112cea61536569d579c3215) Signed-off-by: Rodrigo Vivi --- include/linux/migrate.h | 10 +++++++++- mm/filemap.c | 15 ++++++++++----- mm/memory.c | 3 ++- mm/migrate.c | 8 ++++---- mm/migrate_device.c | 2 +- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 26ca00c325d9..d5af2b7f577b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, @@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag return -ENOSYS; } +static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) + __releases(ptl) +{ + WARN_ON_ONCE(1); + + spin_unlock(ptl); +} + #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING diff --git a/mm/filemap.c b/mm/filemap.c index 6cd7974d4ada..406cef06b684 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1379,14 +1379,16 @@ repeat: #ifdef CONFIG_MIGRATION /** - * migration_entry_wait_on_locked - Wait for a migration entry to be removed - * @entry: migration swap entry. + * softleaf_entry_wait_on_locked - Wait for a migration entry or + * device_private entry to be removed. + * @entry: migration or device_private swap entry. * @ptl: already locked ptl. This function will drop the lock. * - * Wait for a migration entry referencing the given page to be removed. This is + * Wait for a migration entry referencing the given page, or device_private + * entry referencing a dvice_private page to be unlocked. This is * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except * this can be called without taking a reference on the page. Instead this - * should be called while holding the ptl for the migration entry referencing + * should be called while holding the ptl for @entry referencing * the page. * * Returns after unlocking the ptl. @@ -1394,7 +1396,7 @@ repeat: * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) * If a migration entry exists for the page the migration path must hold * a valid reference to the page, and it must take the ptl to remove the * migration entry. So the page is valid until the ptl is dropped. + * Similarly any path attempting to drop the last reference to a + * device-private page needs to grab the ptl to remove the device-private + * entry. */ spin_unlock(ptl); diff --git a/mm/memory.c b/mm/memory.c index 07778814b4a8..2f815a34d924 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4763,7 +4763,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(vmf->page); put_page(vmf->page); } else { - pte_unmap_unlock(vmf->pte, vmf->ptl); + pte_unmap(vmf->pte); + softleaf_entry_wait_on_locked(entry, vmf->ptl); } } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate.c b/mm/migrate.c index 1bf2cf8c44dd..2c3d489ecf51 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -500,7 +500,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, if (!softleaf_is_migration(entry)) goto out; - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; out: spin_unlock(ptl); @@ -532,10 +532,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p * If migration entry existed, safe to release vma lock * here because the pgtable page won't be freed without the * pgtable lock released. See comment right above pgtable - * lock release in migration_entry_wait_on_locked(). + * lock release in softleaf_entry_wait_on_locked(). */ hugetlb_vma_unlock_read(vma); - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; } @@ -553,7 +553,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) ptl = pmd_lock(mm, pmd); if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); + softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 0a8b31939640..8079676c8f1f 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, } if (softleaf_is_migration(entry)) { - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; } From 03464a48cc8636b6de2febbc5ef39093a9a15e82 Mon Sep 17 00:00:00 2001 From: David Gow Date: Tue, 10 Feb 2026 21:10:26 +0800 Subject: [PATCH 600/828] MAINTAINERS: Update email address for David Gow Update my email address for KUnit related things in MAINTAINERS (and add an entry to .mailmap so nothing gets lost). Signed-off-by: David Gow Signed-off-by: Shuah Khan --- .mailmap | 1 + MAINTAINERS | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index e1cf6bb85d33..7d27cd78cfdb 100644 --- a/.mailmap +++ b/.mailmap @@ -214,6 +214,7 @@ Daniel Thompson Danilo Krummrich David Brownell David Collins +David Gow David Heidelberg David Hildenbrand David Rheinsberg diff --git a/MAINTAINERS b/MAINTAINERS index 55af015174a5..372ee16c24f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13943,7 +13943,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins -M: David Gow +M: David Gow R: Rae Moar L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com @@ -14763,7 +14763,7 @@ F: drivers/misc/lis3lv02d/ F: drivers/platform/x86/hp/hp_accel.c LIST KUNIT TEST -M: David Gow +M: David Gow L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained From 7dd34dfc8dfa92a7244242098110388367996ac3 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 24 Feb 2026 19:37:56 +0900 Subject: [PATCH 601/828] rust: kunit: fix warning when !CONFIG_PRINTK If `CONFIG_PRINTK` is not set, then the following warnings are issued during build: warning: unused variable: `args` --> ../rust/kernel/kunit.rs:16:12 | 16 | pub fn err(args: fmt::Arguments<'_>) { | ^^^^ help: if this is intentional, prefix it with an underscore: `_args` | = note: `#[warn(unused_variables)]` (part of `#[warn(unused)]`) on by default warning: unused variable: `args` --> ../rust/kernel/kunit.rs:32:13 | 32 | pub fn info(args: fmt::Arguments<'_>) { | ^^^^ help: if this is intentional, prefix it with an underscore: `_args` Fix this by adding a no-op assignment using `args` when `CONFIG_PRINTK` is not set. Fixes: a66d733da801 ("rust: support running Rust documentation tests as KUnit ones") Signed-off-by: Alexandre Courbot Reviewed-by: Alice Ryhl Reviewed-by: David Gow Signed-off-by: Shuah Khan --- rust/kernel/kunit.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index f93f24a60bdd..a1edf7491579 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -14,6 +14,10 @@ use crate::prelude::*; /// Public but hidden since it should only be used from KUnit generated code. #[doc(hidden)] pub fn err(args: fmt::Arguments<'_>) { + // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning. + #[cfg(not(CONFIG_PRINTK))] + let _ = args; + // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we // are passing. #[cfg(CONFIG_PRINTK)] @@ -30,6 +34,10 @@ pub fn err(args: fmt::Arguments<'_>) { /// Public but hidden since it should only be used from KUnit generated code. #[doc(hidden)] pub fn info(args: fmt::Arguments<'_>) { + // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning. + #[cfg(not(CONFIG_PRINTK))] + let _ = args; + // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we // are passing. #[cfg(CONFIG_PRINTK)] From 40804c4974b8df2adab72f6475d343eaff72b7f6 Mon Sep 17 00:00:00 2001 From: Shuvam Pandey Date: Thu, 26 Feb 2026 21:14:10 +0545 Subject: [PATCH 602/828] kunit: tool: copy caller args in run_kernel to prevent mutation run_kernel() appended KUnit flags directly to the caller-provided args list. When exec_tests() calls run_kernel() repeatedly (e.g. with --run_isolated), each call mutated the same list, causing later runs to inherit stale filter_glob values and duplicate kunit.enable flags. Fix this by copying args at the start of run_kernel(). Add a regression test that calls run_kernel() twice with the same list and verifies the original remains unchanged. Fixes: ff9e09a3762f ("kunit: tool: support running each suite/test separately") Signed-off-by: Shuvam Pandey Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 6 ++++-- tools/testing/kunit/kunit_tool_test.py | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 260d8d9aa1db..2998e1bc088b 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -346,8 +346,10 @@ class LinuxSourceTree: return self.validate_config(build_dir) def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', filter: str='', filter_action: Optional[str]=None, timeout: Optional[int]=None) -> Iterator[str]: - if not args: - args = [] + # Copy to avoid mutating the caller-supplied list. exec_tests() reuses + # the same args across repeated run_kernel() calls (e.g. --run_isolated), + # so appending to the original would accumulate stale flags on each call. + args = list(args) if args else [] if filter_glob: args.append('kunit.filter_glob=' + filter_glob) if filter: diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index b67408147c1f..f6383884c599 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -503,6 +503,32 @@ class LinuxSourceTreeTest(unittest.TestCase): with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile: self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output') + def test_run_kernel_args_not_mutated(self): + """Verify run_kernel() copies args so callers can reuse them.""" + start_calls = [] + + def fake_start(start_args, unused_build_dir): + start_calls.append(list(start_args)) + return subprocess.Popen(['printf', 'KTAP version 1\n'], + text=True, stdout=subprocess.PIPE) + + with tempfile.TemporaryDirectory('') as build_dir: + tree = kunit_kernel.LinuxSourceTree(build_dir, + kunitconfig_paths=[os.devnull]) + with mock.patch.object(tree._ops, 'start', side_effect=fake_start), \ + mock.patch.object(kunit_kernel.subprocess, 'call'): + kernel_args = ['mem=1G'] + for _ in tree.run_kernel(args=kernel_args, build_dir=build_dir, + filter_glob='suite.test1'): + pass + for _ in tree.run_kernel(args=kernel_args, build_dir=build_dir, + filter_glob='suite.test2'): + pass + self.assertEqual(kernel_args, ['mem=1G'], + 'run_kernel() should not modify caller args') + self.assertIn('kunit.filter_glob=suite.test1', start_calls[0]) + self.assertIn('kunit.filter_glob=suite.test2', start_calls[1]) + def test_build_reconfig_no_config(self): with tempfile.TemporaryDirectory('') as build_dir: with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: From b11b9b6751b2cd74960dccd91667c5117fce743c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Feb 2026 10:48:53 +0100 Subject: [PATCH 603/828] kunit: reduce stack usage in kunit_run_tests() Some of the recent changes to the kunit framework caused the stack usage for kunit_run_tests() to grow higher than most other kernel functions, which triggers a warning when CONFIG_FRAME_WARN is set to a relatively low value: lib/kunit/test.c: In function 'kunit_run_tests': lib/kunit/test.c:801:1: error: the frame size of 1312 bytes is larger than 1280 bytes [-Werror=frame-larger-than=] Split out the inner loop into a separate function to ensure that each function remains under the limit, and pass the kunit_result_stats structures by reference to avoid excessive copies. Fixed checkpatch warnings at commit time: Shuah Khan Cc: Carlos Llamas Signed-off-by: Arnd Bergmann Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 231 +++++++++++++++++++++++++---------------------- 1 file changed, 125 insertions(+), 106 deletions(-) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 62eb529824c6..41e1c89799b6 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -94,7 +94,7 @@ struct kunit_result_stats { unsigned long total; }; -static bool kunit_should_print_stats(struct kunit_result_stats stats) +static bool kunit_should_print_stats(struct kunit_result_stats *stats) { if (kunit_stats_enabled == 0) return false; @@ -102,11 +102,11 @@ static bool kunit_should_print_stats(struct kunit_result_stats stats) if (kunit_stats_enabled == 2) return true; - return (stats.total > 1); + return (stats->total > 1); } static void kunit_print_test_stats(struct kunit *test, - struct kunit_result_stats stats) + struct kunit_result_stats *stats) { if (!kunit_should_print_stats(stats)) return; @@ -115,10 +115,10 @@ static void kunit_print_test_stats(struct kunit *test, KUNIT_SUBTEST_INDENT "# %s: pass:%lu fail:%lu skip:%lu total:%lu", test->name, - stats.passed, - stats.failed, - stats.skipped, - stats.total); + stats->passed, + stats->failed, + stats->skipped, + stats->total); } /* Append formatted message to log. */ @@ -600,26 +600,26 @@ static void kunit_run_case_catch_errors(struct kunit_suite *suite, } static void kunit_print_suite_stats(struct kunit_suite *suite, - struct kunit_result_stats suite_stats, - struct kunit_result_stats param_stats) + struct kunit_result_stats *suite_stats, + struct kunit_result_stats *param_stats) { if (kunit_should_print_stats(suite_stats)) { kunit_log(KERN_INFO, suite, "# %s: pass:%lu fail:%lu skip:%lu total:%lu", suite->name, - suite_stats.passed, - suite_stats.failed, - suite_stats.skipped, - suite_stats.total); + suite_stats->passed, + suite_stats->failed, + suite_stats->skipped, + suite_stats->total); } if (kunit_should_print_stats(param_stats)) { kunit_log(KERN_INFO, suite, "# Totals: pass:%lu fail:%lu skip:%lu total:%lu", - param_stats.passed, - param_stats.failed, - param_stats.skipped, - param_stats.total); + param_stats->passed, + param_stats->failed, + param_stats->skipped, + param_stats->total); } } @@ -681,13 +681,116 @@ static void kunit_init_parent_param_test(struct kunit_case *test_case, struct ku } } -int kunit_run_tests(struct kunit_suite *suite) +static noinline_for_stack void +kunit_run_param_test(struct kunit_suite *suite, struct kunit_case *test_case, + struct kunit *test, + struct kunit_result_stats *suite_stats, + struct kunit_result_stats *total_stats, + struct kunit_result_stats *param_stats) { char param_desc[KUNIT_PARAM_DESC_SIZE]; + const void *curr_param; + + kunit_init_parent_param_test(test_case, test); + if (test_case->status == KUNIT_FAILURE) { + kunit_update_stats(param_stats, test->status); + return; + } + /* Get initial param. */ + param_desc[0] = '\0'; + /* TODO: Make generate_params try-catch */ + curr_param = test_case->generate_params(test, NULL, param_desc); + test_case->status = KUNIT_SKIPPED; + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT + "KTAP version 1\n"); + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT + "# Subtest: %s", test_case->name); + if (test->params_array.params && + test_case->generate_params == kunit_array_gen_params) { + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT + KUNIT_SUBTEST_INDENT "1..%zd\n", + test->params_array.num_params); + } + + while (curr_param) { + struct kunit param_test = { + .param_value = curr_param, + .param_index = ++test->param_index, + .parent = test, + }; + kunit_init_test(¶m_test, test_case->name, NULL); + param_test.log = test_case->log; + kunit_run_case_catch_errors(suite, test_case, ¶m_test); + + if (param_desc[0] == '\0') { + snprintf(param_desc, sizeof(param_desc), + "param-%d", param_test.param_index); + } + + kunit_print_ok_not_ok(¶m_test, KUNIT_LEVEL_CASE_PARAM, + param_test.status, + param_test.param_index, + param_desc, + param_test.status_comment); + + kunit_update_stats(param_stats, param_test.status); + + /* Get next param. */ + param_desc[0] = '\0'; + curr_param = test_case->generate_params(test, curr_param, + param_desc); + } + /* + * TODO: Put into a try catch. Since we don't need suite->exit + * for it we can't reuse kunit_try_run_cleanup for this yet. + */ + if (test_case->param_exit) + test_case->param_exit(test); + /* TODO: Put this kunit_cleanup into a try-catch. */ + kunit_cleanup(test); +} + +static noinline_for_stack void +kunit_run_one_test(struct kunit_suite *suite, struct kunit_case *test_case, + struct kunit_result_stats *suite_stats, + struct kunit_result_stats *total_stats) +{ + struct kunit test = { .param_value = NULL, .param_index = 0 }; + struct kunit_result_stats param_stats = { 0 }; + + kunit_init_test(&test, test_case->name, test_case->log); + if (test_case->status == KUNIT_SKIPPED) { + /* Test marked as skip */ + test.status = KUNIT_SKIPPED; + kunit_update_stats(¶m_stats, test.status); + } else if (!test_case->generate_params) { + /* Non-parameterised test. */ + test_case->status = KUNIT_SKIPPED; + kunit_run_case_catch_errors(suite, test_case, &test); + kunit_update_stats(¶m_stats, test.status); + } else { + kunit_run_param_test(suite, test_case, &test, suite_stats, + total_stats, ¶m_stats); + } + kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE); + + kunit_print_test_stats(&test, ¶m_stats); + + kunit_print_ok_not_ok(&test, KUNIT_LEVEL_CASE, test_case->status, + kunit_test_case_num(suite, test_case), + test_case->name, + test.status_comment); + + kunit_update_stats(suite_stats, test_case->status); + kunit_accumulate_stats(total_stats, param_stats); +} + + +int kunit_run_tests(struct kunit_suite *suite) +{ struct kunit_case *test_case; struct kunit_result_stats suite_stats = { 0 }; struct kunit_result_stats total_stats = { 0 }; - const void *curr_param; /* Taint the kernel so we know we've run tests. */ add_taint(TAINT_TEST, LOCKDEP_STILL_OK); @@ -703,97 +806,13 @@ int kunit_run_tests(struct kunit_suite *suite) kunit_print_suite_start(suite); - kunit_suite_for_each_test_case(suite, test_case) { - struct kunit test = { .param_value = NULL, .param_index = 0 }; - struct kunit_result_stats param_stats = { 0 }; - - kunit_init_test(&test, test_case->name, test_case->log); - if (test_case->status == KUNIT_SKIPPED) { - /* Test marked as skip */ - test.status = KUNIT_SKIPPED; - kunit_update_stats(¶m_stats, test.status); - } else if (!test_case->generate_params) { - /* Non-parameterised test. */ - test_case->status = KUNIT_SKIPPED; - kunit_run_case_catch_errors(suite, test_case, &test); - kunit_update_stats(¶m_stats, test.status); - } else { - kunit_init_parent_param_test(test_case, &test); - if (test_case->status == KUNIT_FAILURE) { - kunit_update_stats(¶m_stats, test.status); - goto test_case_end; - } - /* Get initial param. */ - param_desc[0] = '\0'; - /* TODO: Make generate_params try-catch */ - curr_param = test_case->generate_params(&test, NULL, param_desc); - test_case->status = KUNIT_SKIPPED; - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "KTAP version 1\n"); - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "# Subtest: %s", test_case->name); - if (test.params_array.params && - test_case->generate_params == kunit_array_gen_params) { - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT - KUNIT_SUBTEST_INDENT "1..%zd\n", - test.params_array.num_params); - } - - while (curr_param) { - struct kunit param_test = { - .param_value = curr_param, - .param_index = ++test.param_index, - .parent = &test, - }; - kunit_init_test(¶m_test, test_case->name, NULL); - param_test.log = test_case->log; - kunit_run_case_catch_errors(suite, test_case, ¶m_test); - - if (param_desc[0] == '\0') { - snprintf(param_desc, sizeof(param_desc), - "param-%d", param_test.param_index); - } - - kunit_print_ok_not_ok(¶m_test, KUNIT_LEVEL_CASE_PARAM, - param_test.status, - param_test.param_index, - param_desc, - param_test.status_comment); - - kunit_update_stats(¶m_stats, param_test.status); - - /* Get next param. */ - param_desc[0] = '\0'; - curr_param = test_case->generate_params(&test, curr_param, - param_desc); - } - /* - * TODO: Put into a try catch. Since we don't need suite->exit - * for it we can't reuse kunit_try_run_cleanup for this yet. - */ - if (test_case->param_exit) - test_case->param_exit(&test); - /* TODO: Put this kunit_cleanup into a try-catch. */ - kunit_cleanup(&test); - } -test_case_end: - kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE); - - kunit_print_test_stats(&test, param_stats); - - kunit_print_ok_not_ok(&test, KUNIT_LEVEL_CASE, test_case->status, - kunit_test_case_num(suite, test_case), - test_case->name, - test.status_comment); - - kunit_update_stats(&suite_stats, test_case->status); - kunit_accumulate_stats(&total_stats, param_stats); - } + kunit_suite_for_each_test_case(suite, test_case) + kunit_run_one_test(suite, test_case, &suite_stats, &total_stats); if (suite->suite_exit) suite->suite_exit(suite); - kunit_print_suite_stats(suite, suite_stats, total_stats); + kunit_print_suite_stats(suite, &suite_stats, &total_stats); suite_end: kunit_print_suite_end(suite); From 9adfcef334bf9c6ef68eaecfca5f45d18614efe0 Mon Sep 17 00:00:00 2001 From: zhidao su Date: Mon, 2 Mar 2026 17:14:39 +0800 Subject: [PATCH 604/828] sched_ext: Use READ_ONCE() for the read side of dsq->nr update scx_bpf_dsq_nr_queued() reads dsq->nr via READ_ONCE() without holding any lock, making dsq->nr a lock-free concurrently accessed variable. However, dsq_mod_nr(), the sole writer of dsq->nr, only uses WRITE_ONCE() on the write side without the matching READ_ONCE() on the read side: WRITE_ONCE(dsq->nr, dsq->nr + delta); ^^^^^^^ plain read -- KCSAN data race The KCSAN documentation requires that if one accessor uses READ_ONCE() or WRITE_ONCE() on a variable to annotate lock-free access, all other accesses must also use the appropriate accessor. A plain read on the right-hand side of WRITE_ONCE() leaves the pair incomplete and will trigger KCSAN warnings. Fix by using READ_ONCE() for the read side of the update: WRITE_ONCE(dsq->nr, READ_ONCE(dsq->nr) + delta); This is consistent with scx_bpf_dsq_nr_queued() and makes the concurrent access annotation complete and KCSAN-clean. Signed-off-by: zhidao su Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 9280381f8923..4c2cf0d7d495 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -976,8 +976,12 @@ static bool scx_dsq_priq_less(struct rb_node *node_a, static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta) { - /* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */ - WRITE_ONCE(dsq->nr, dsq->nr + delta); + /* + * scx_bpf_dsq_nr_queued() reads ->nr without locking. Use READ_ONCE() + * on the read side and WRITE_ONCE() on the write side to properly + * annotate the concurrent lockless access and avoid KCSAN warnings. + */ + WRITE_ONCE(dsq->nr, READ_ONCE(dsq->nr) + delta); } static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p) From 494eaf4651975127d34d5ae6555c72dedba092c9 Mon Sep 17 00:00:00 2001 From: zhidao su Date: Mon, 2 Mar 2026 17:14:40 +0800 Subject: [PATCH 605/828] sched_ext: Replace naked scx_root dereferences in kobject callbacks scx_attr_ops_show() and scx_uevent() access scx_root->ops.name directly. This is problematic for two reasons: 1. The file-level comment explicitly identifies naked scx_root dereferences as a temporary measure that needs to be replaced with proper per-instance access. 2. scx_attr_events_show(), the neighboring sysfs show function in the same group, already uses the correct pattern: struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj); Having inconsistent access patterns in the same sysfs/uevent group is error-prone. The kobject embedded in struct scx_sched is initialized as: kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root"); so container_of(kobj, struct scx_sched, kobj) correctly retrieves the owning scx_sched instance in both callbacks. Replace the naked scx_root dereferences with container_of()-based access, consistent with scx_attr_events_show() and in preparation for proper multi-instance scx_sched support. Signed-off-by: zhidao su Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 4c2cf0d7d495..a566d2cc8a43 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3712,7 +3712,9 @@ static void scx_kobj_release(struct kobject *kobj) static ssize_t scx_attr_ops_show(struct kobject *kobj, struct kobj_attribute *ka, char *buf) { - return sysfs_emit(buf, "%s\n", scx_root->ops.name); + struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj); + + return sysfs_emit(buf, "%s\n", sch->ops.name); } SCX_ATTR(ops); @@ -3756,7 +3758,9 @@ static const struct kobj_type scx_ktype = { static int scx_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { - return add_uevent_var(env, "SCXOPS=%s", scx_root->ops.name); + const struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj); + + return add_uevent_var(env, "SCXOPS=%s", sch->ops.name); } static const struct kset_uevent_ops scx_uevent_ops = { From af4e9ef3d78420feb8fe58cd9a1ab80c501b3c08 Mon Sep 17 00:00:00 2001 From: David Laight Date: Mon, 2 Mar 2026 13:27:51 +0000 Subject: [PATCH 606/828] uaccess: Fix scoped_user_read_access() for 'pointer to const' If a 'const struct foo __user *ptr' is used for the address passed to scoped_user_read_access() then you get a warning/error uaccess.h:691:1: error: initialization discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] for the void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl) assignment. Fix by using 'auto' for both _tmpptr and the redeclaration of uptr. Replace the CLASS() with explicit __cleanup() functions on uptr. Fixes: e497310b4ffb ("uaccess: Provide scoped user access regions") Signed-off-by: David Laight Reviewed-and-tested-by: Christophe Leroy (CS GROUP) Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 54 +++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1f3804245c06..809e4f7dfdbd 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -647,36 +647,22 @@ static inline void user_access_restore(unsigned long flags) { } /* Define RW variant so the below _mode macro expansion works */ #define masked_user_rw_access_begin(u) masked_user_access_begin(u) #define user_rw_access_begin(u, s) user_access_begin(u, s) -#define user_rw_access_end() user_access_end() /* Scoped user access */ -#define USER_ACCESS_GUARD(_mode) \ -static __always_inline void __user * \ -class_user_##_mode##_begin(void __user *ptr) \ -{ \ - return ptr; \ -} \ - \ -static __always_inline void \ -class_user_##_mode##_end(void __user *ptr) \ -{ \ - user_##_mode##_access_end(); \ -} \ - \ -DEFINE_CLASS(user_ ##_mode## _access, void __user *, \ - class_user_##_mode##_end(_T), \ - class_user_##_mode##_begin(ptr), void __user *ptr) \ - \ -static __always_inline class_user_##_mode##_access_t \ -class_user_##_mode##_access_ptr(void __user *scope) \ -{ \ - return scope; \ -} -USER_ACCESS_GUARD(read) -USER_ACCESS_GUARD(write) -USER_ACCESS_GUARD(rw) -#undef USER_ACCESS_GUARD +/* Cleanup wrapper functions */ +static __always_inline void __scoped_user_read_access_end(const void *p) +{ + user_read_access_end(); +}; +static __always_inline void __scoped_user_write_access_end(const void *p) +{ + user_write_access_end(); +}; +static __always_inline void __scoped_user_rw_access_end(const void *p) +{ + user_access_end(); +}; /** * __scoped_user_access_begin - Start a scoped user access @@ -750,13 +736,13 @@ USER_ACCESS_GUARD(rw) * * Don't use directly. Use scoped_masked_user_$MODE_access() instead. */ -#define __scoped_user_access(mode, uptr, size, elbl) \ -for (bool done = false; !done; done = true) \ - for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ - !done; done = true) \ - for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \ - /* Force modified pointer usage within the scope */ \ - for (const typeof(uptr) uptr = _tmpptr; !done; done = true) +#define __scoped_user_access(mode, uptr, size, elbl) \ +for (bool done = false; !done; done = true) \ + for (auto _tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ + !done; done = true) \ + /* Force modified pointer usage within the scope */ \ + for (const auto uptr __cleanup(__scoped_user_##mode##_access_end) = \ + _tmpptr; !done; done = true) /** * scoped_user_read_access_size - Start a scoped user read access with given size From 6270ee26e1edd862ea17e3eba148ca8fb2c99dc9 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 26 Feb 2026 13:38:57 -0800 Subject: [PATCH 607/828] accel/amdxdna: Fix NULL pointer dereference of mgmt_chann mgmt_chann may be set to NULL if the firmware returns an unexpected error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL pointer dereference in aie2_hw_stop(). Fix this by introducing a dedicated helper to destroy mgmt_chann and by adding proper NULL checks before accessing it. Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260226213857.3068474-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++----- drivers/accel/amdxdna/aie2_pci.c | 7 ++----- drivers/accel/amdxdna/aie2_pci.h | 1 + 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 277a27bce850..22e1a85a7ae0 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, return -ENODEV; ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); - if (ret == -ETIME) { - xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); - ndev->mgmt_chann = NULL; - } + if (ret == -ETIME) + aie2_destroy_mgmt_chann(ndev); if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) { XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", @@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev) ndev->exec_msg_ops = &legacy_exec_message_ops; } +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (!ndev->mgmt_chann) + return; + + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_destroy_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; +} + static inline struct amdxdna_gem_obj * aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) { diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 85079b6fc5d9..977ce21eaf9f 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL); aie2_mgmt_fw_fini(ndev); - xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); - ndev->mgmt_chann = NULL; + aie2_destroy_mgmt_chann(ndev); drmm_kfree(&xdna->ddev, ndev->mbox); ndev->mbox = NULL; aie2_psp_stop(ndev->psp_hdl); @@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) return 0; destroy_mgmt_chann: - xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); + aie2_destroy_mgmt_chann(ndev); stop_psp: aie2_psp_stop(ndev->psp_hdl); fini_smu: diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index b20a3661078c..e72311c77996 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, /* aie2_message.c */ void aie2_msg_init(struct amdxdna_dev_hdl *ndev); +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev); int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); From 3ebc98c1ae7efda949a015990280a097f4a5453a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Mar 2026 09:16:22 +0100 Subject: [PATCH 608/828] ftrace: Add missing ftrace_lock to update_ftrace_direct_add/del Ihor and Kumar reported splat from ftrace_get_addr_curr [1], which happened because of the missing ftrace_lock in update_ftrace_direct_add/del functions allowing concurrent access to ftrace internals. The ftrace_update_ops function must be guarded by ftrace_lock, adding that. Fixes: 05dc5e9c1fe1 ("ftrace: Add update_ftrace_direct_add function") Fixes: 8d2c1233f371 ("ftrace: Add update_ftrace_direct_del function") Reported-by: Ihor Solodrai Reported-by: Kumar Kartikeya Dwivedi Closes: https://lore.kernel.org/bpf/1b58ffb2-92ae-433a-ba46-95294d6edea2@linux.dev/ Tested-by: Ihor Solodrai Signed-off-by: Jiri Olsa Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20260302081622.165713-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 827fb9a0bf0d..8baf61c9be6d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6404,6 +6404,7 @@ int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash) new_filter_hash = old_filter_hash; } } else { + guard(mutex)(&ftrace_lock); err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH); /* * new_filter_hash is dup-ed, so we need to release it anyway, @@ -6530,6 +6531,7 @@ int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash) ops->func_hash->filter_hash = NULL; } } else { + guard(mutex)(&ftrace_lock); err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH); /* * new_filter_hash is dup-ed, so we need to release it anyway, From 800ca7b88a0c0eba6fa721e043eb3ed5b10ecc4f Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Sat, 21 Feb 2026 12:36:39 +0100 Subject: [PATCH 609/828] ACPICA: Update the _CPC definition to match ACPI 6.6 Update the _CPC definition to also support return package sub-type of a Package (with Integer and Buffer) as per ACPI Spec 6.6. Link: https://github.com/acpica/acpica/commit/17a761944cc2 Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2829238.mvXUDI8C0e@rafael.j.wysocki --- drivers/acpi/acpica/acpredef.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 5f70b196e0aa..6c9b5bf7d392 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -379,8 +379,9 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = { {{"_CPC", METHOD_0ARGS, METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}}, /* Variable-length (Ints/Bufs) */ - PACKAGE_INFO(ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER, 0, - 0, 0, 0), + PACKAGE_INFO(ACPI_PTYPE1_VAR, + ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER | + ACPI_RTYPE_PACKAGE, 0, 0, 0, 0), {{"_CR3", METHOD_0ARGS, /* ACPI 6.0 */ METHOD_RETURNS(ACPI_RTYPE_INTEGER)}}, From fdb12c8a24a453bdd6759979b6ef1e04ebd4beb4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 27 Feb 2026 22:40:48 -0700 Subject: [PATCH 610/828] kbuild: Leave objtool binary around with 'make clean' The difference between 'make clean' and 'make mrproper' is documented in 'make help' as: clean - Remove most generated files but keep the config and enough build support to build external modules mrproper - Remove all generated files + config + various backup files After commit 68b4fe32d737 ("kbuild: Add objtool to top-level clean target"), running 'make clean' then attempting to build an external module with the resulting build directory fails with $ make ARCH=x86_64 O=build clean $ make -C build M=... MO=... ... /bin/sh: line 1: .../build/tools/objtool/objtool: No such file or directory as 'make clean' removes the objtool binary. Split the objtool clean target into mrproper and clean like Kbuild does and remove all generated artifacts with 'make clean' except for the objtool binary, which is removed with 'make mrproper'. To avoid a small race when running the objtool clean target through both objtool_mrproper and objtool_clean when running 'make mrproper', modify objtool's clean up find command to avoid using find's '-delete' command by piping the files into 'xargs rm -f' like the rest of Kbuild does. Cc: stable@vger.kernel.org Fixes: 68b4fe32d737 ("kbuild: Add objtool to top-level clean target") Reported-by: Michal Suchanek Closes: https://lore.kernel.org/20260225112633.6123-1-msuchanek@suse.de/ Reported-by: Rainer Fiebig Closes: https://lore.kernel.org/62d12399-76e5-3d40-126a-7490b4795b17@mailbox.org/ Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Reviewed-by: Nicolas Schier Tested-by: Nicolas Schier Link: https://patch.msgid.link/20260227-avoid-objtool-binary-removal-clean-v1-1-122f3e55eae9@kernel.org Signed-off-by: Nathan Chancellor --- Makefile | 8 ++++---- tools/objtool/Makefile | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e944c6e71e81..d76d706a5580 100644 --- a/Makefile +++ b/Makefile @@ -1497,13 +1497,13 @@ ifneq ($(wildcard $(resolve_btfids_O)),) $(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean endif -PHONY += objtool_clean +PHONY += objtool_clean objtool_mrproper objtool_O = $(abspath $(objtree))/tools/objtool -objtool_clean: +objtool_clean objtool_mrproper: ifneq ($(wildcard $(objtool_O)),) - $(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) clean + $(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) $(patsubst objtool_%,%,$@) endif tools/: FORCE @@ -1686,7 +1686,7 @@ PHONY += $(mrproper-dirs) mrproper $(mrproper-dirs): $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@) -mrproper: clean $(mrproper-dirs) +mrproper: clean objtool_mrproper $(mrproper-dirs) $(call cmd,rmfiles) @find . $(RCS_FIND_IGNORE) \ \( -name '*.rmeta' \) \ diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 6964175abdfd..76bcd4e85de3 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -142,13 +142,15 @@ $(LIBSUBCMD)-clean: $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) clean: $(LIBSUBCMD)-clean - $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) - $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)find $(OUTPUT) \( -name '*.o' -o -name '\.*.cmd' -o -name '\.*.d' \) -type f -print | xargs $(RM) $(Q)$(RM) $(OUTPUT)arch/x86/lib/cpu-feature-names.c $(OUTPUT)fixdep $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.objtool $(Q)$(RM) -r -- $(OUTPUT)feature +mrproper: clean + $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) + FORCE: -.PHONY: clean FORCE +.PHONY: clean mrproper FORCE From c28b3ec3ca034fd1abc832fef46ce36eb13f8fad Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 27 Feb 2026 12:26:04 -0700 Subject: [PATCH 611/828] drm/amd/display: Use mpc.preblend flag to indicate 3D LUT [WHAT] New ASIC's 3D LUT is indicated by mpc.preblend. Fixes: 0de2b1afea8d ("drm/amd/display: add 3D LUT colorop") Reviewed-by: Melissa Wen Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 43175f6164d32cb96362d16e357689f74298145c) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 6 ++++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 2ba98f384685..cd1e58b8defc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1706,6 +1706,7 @@ __set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state, struct dc_transfer_func *tf = &dc_plane_state->in_shaper_func; struct drm_atomic_state *state = plane_state->state; const struct amdgpu_device *adev = drm_to_adev(colorop->dev); + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; const struct drm_device *dev = colorop->dev; const struct drm_color_lut32 *lut3d; uint32_t lut3d_size; @@ -1722,7 +1723,7 @@ __set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state, } if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_3D_LUT) { - if (!adev->dm.dc->caps.color.dpp.hw_3d_lut) { + if (!has_3dlut) { drm_dbg(dev, "3D LUT is not supported by hardware\n"); return -EINVAL; } @@ -1875,6 +1876,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, struct drm_colorop *colorop = plane_state->color_pipeline; struct drm_device *dev = plane_state->plane->dev; struct amdgpu_device *adev = drm_to_adev(dev); + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; int ret; /* 1D Curve - DEGAM TF */ @@ -1907,7 +1909,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, if (ret) return ret; - if (adev->dm.dc->caps.color.dpp.hw_3d_lut) { + if (has_3dlut) { /* 1D Curve & LUT - SHAPER TF & LUT */ colorop = colorop->next; if (!colorop) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index f25c0ede7199..d59ba82d3d7c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -60,6 +60,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr struct drm_colorop *ops[MAX_COLOR_PIPELINE_OPS]; struct drm_device *dev = plane->dev; struct amdgpu_device *adev = drm_to_adev(dev); + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; int ret; int i = 0; @@ -112,7 +113,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr i++; - if (adev->dm.dc->caps.color.dpp.hw_3d_lut) { + if (has_3dlut) { /* 1D curve - SHAPER TF */ ops[i] = kzalloc_obj(*ops[0]); if (!ops[i]) { From a4fa2355e0add57253468ef13bd08f11285f3b6e Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 27 Feb 2026 12:30:38 -0700 Subject: [PATCH 612/828] drm/amd/display: Enable DEGAMMA and reject COLOR_PIPELINE+DEGAMMA_LUT [WHAT] Create DEGAMMA properties even if color pipeline is enabled, and enforce the mutual exclusion in atomic check by rejecting any commit that attempts to enable both COLOR_PIPELINE on the plane and DEGAMMA_LUT on the CRTC simultaneously. Fixes: 18a4127e9315 ("drm/amd/display: Disable CRTC degamma when color pipeline is enabled") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4963 Reviewed-by: Melissa Wen Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 196a6aa727f1f15eb54dda5e60a41543ea9397ee) --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 16 ++++++++-------- .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 8 ++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 130190e8a1b2..304437c2284d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -765,15 +765,15 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, dm->adev->mode_info.crtcs[crtc_index] = acrtc; /* Don't enable DRM CRTC degamma property for - * 1. Degamma is replaced by color pipeline. - * 2. DCE since it doesn't support programmable degamma anywhere. - * 3. DCN401 since pre-blending degamma LUT doesn't apply to cursor. + * 1. DCE since it doesn't support programmable degamma anywhere. + * 2. DCN401 since pre-blending degamma LUT doesn't apply to cursor. + * Note: DEGAMMA properties are created even if the primary plane has the + * COLOR_PIPELINE property. User space can use either the DEGAMMA properties + * or the COLOR_PIPELINE property. An atomic commit which attempts to enable + * both is rejected. */ - if (plane->color_pipeline_property) - has_degamma = false; - else - has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch && - dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01; + has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch && + dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01; drm_crtc_enable_color_mgmt(&acrtc->base, has_degamma ? MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 70587e5a8d46..127207e18dcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1256,6 +1256,14 @@ static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; + /* Reject commits that attempt to use both COLOR_PIPELINE and CRTC DEGAMMA_LUT */ + if (new_plane_state->color_pipeline && new_crtc_state->degamma_lut) { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] COLOR_PIPELINE and CRTC DEGAMMA_LUT cannot be enabled simultaneously\n", + plane->base.id, plane->name); + return -EINVAL; + } + ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, new_plane_state, &scaling_info); if (ret) return ret; From 389c2024cab817366e6b8345f679f41064fa94d6 Mon Sep 17 00:00:00 2001 From: sguttula Date: Sat, 21 Feb 2026 10:47:59 +0530 Subject: [PATCH 613/828] drm/amdgpu: Enable DPG support for VCN5 This will set DPG flags for enabling power gating on GFX11_5_4 Signed-off-by: sguttula Reviewed-by: Pratik Vishwakarma Signed-off-by: Alex Deucher (cherry picked from commit a503c266d70d3363ba6bffb883cd6ecdb092670c) --- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 8122a5cacf07..a0ad1f8a76f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -858,7 +858,9 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; - adev->pg_flags = AMD_PG_SUPPORT_VCN | + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x1; From 30d937f63bd19bbcaafa4b892eb251f8bbbf04ef Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 18 Feb 2026 14:34:28 -0500 Subject: [PATCH 614/828] drm/amd/display: Fallback to boot snapshot for dispclk [WHY & HOW] If the dentist is unavailable, fallback to reading CLKIP via the boot snapshot to get the current dispclk. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dillon Varone Signed-off-by: Alex Hung Cc: Mario Limonciello Cc: Alex Deucher Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 2ab77600d1e55a042c02437326d3c7563e853c6c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b91517b9fedc..eb198d52a115 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -72,7 +72,11 @@ void dcn401_initialize_min_clocks(struct dc *dc) * audio corruption. Read current DISPCLK from DENTIST and request the same * freq to ensure that the timing is valid and unchanged. */ - clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + if (dc->clk_mgr->funcs->get_dispclk_from_dentist) { + clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + } else { + clocks->dispclk_khz = dc->clk_mgr->boot_snapshot.dispclk * 1000; + } } clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; clocks->fclk_p_state_change_support = true; From 3b46d61890632c8f8b117147b6923bff4b42ccb7 Mon Sep 17 00:00:00 2001 From: Vladimir Yakovlev Date: Tue, 3 Mar 2026 01:20:17 +0300 Subject: [PATCH 615/828] spi: spi-dw-dma: fix print error log when wait finish transaction If an error occurs, the device may not have a current message. In this case, the system will crash. In this case, it's better to use dev from the struct ctlr (struct spi_controller*). Signed-off-by: Vladimir Yakovlev Link: https://patch.msgid.link/20260302222017.992228-2-vovchkir@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-dw-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index 65adec7c7524..fe726b9b1780 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -271,7 +271,7 @@ static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed) msecs_to_jiffies(ms)); if (ms == 0) { - dev_err(&dws->ctlr->cur_msg->spi->dev, + dev_err(&dws->ctlr->dev, "DMA transaction timed out\n"); return -ETIMEDOUT; } From 20d6f07004d639967dcb00994d56ce6d16118e9e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 28 Feb 2026 20:01:40 -0800 Subject: [PATCH 616/828] lib/crypto: tests: Add a .kunitconfig file Add a .kunitconfig file to the lib/crypto/ directory so that the crypto library tests can be run more easily using kunit.py. Example with UML: tools/testing/kunit/kunit.py run --kunitconfig=lib/crypto Example with QEMU: tools/testing/kunit/kunit.py run --kunitconfig=lib/crypto --arch=arm64 --make_options LLVM=1 Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260301040140.490310-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/.kunitconfig | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 lib/crypto/.kunitconfig diff --git a/lib/crypto/.kunitconfig b/lib/crypto/.kunitconfig new file mode 100644 index 000000000000..6b2ce28ae509 --- /dev/null +++ b/lib/crypto/.kunitconfig @@ -0,0 +1,34 @@ +CONFIG_KUNIT=y + +# These kconfig options select all the CONFIG_CRYPTO_LIB_* symbols that have a +# corresponding KUnit test. Those symbols cannot be directly enabled here, +# since they are hidden symbols. +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ADIANTUM=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_HCTR2=y +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MLDSA=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_SHA3=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_WIREGUARD=y + +CONFIG_CRYPTO_LIB_BLAKE2B_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_MLDSA_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_NH_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_POLYVAL_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA256_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA512_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA3_KUNIT_TEST=y From 5d75c7bcc40a90f77f315e1c91dfb2f1b189a435 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Feb 2026 15:46:05 +0100 Subject: [PATCH 617/828] crypto: Clean up help text for CRYPTO_BLAKE2B Btrfs stopped using this BLAKE2b implementation in commit fe11ac191ce0ad91 ("btrfs: switch to library APIs for checksums"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/98b983d2f2bddf0e5e8e1c970446c3c64527ef89.1772116160.git.geert+renesas@glider.be Signed-off-by: Eric Biggers --- crypto/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index e2b4106ac961..8bc95e69faa5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -876,8 +876,6 @@ config CRYPTO_BLAKE2B - blake2b-384 - blake2b-512 - Used by the btrfs filesystem. - See https://blake2.net for further information. config CRYPTO_CMAC From a70d9d655fd0549dd7cd9de437eb3fbe2e78c8ab Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Feb 2026 15:46:06 +0100 Subject: [PATCH 618/828] crypto: Clean up help text for CRYPTO_SHA256 NFS, Ceph, SMB, and Btrfs stopped using this SHA-256 implementation in commits c2c90a8b2620626c ("nfsd: use SHA-256 library API instead of crypto_shash API"), 27c0a7b05d13a0dc ("libceph: Use HMAC-SHA256 library instead of crypto_shash"), 924067ef183bd17f ("ksmbd: Use HMAC-SHA256 library for message signing and key generation"), and fe11ac191ce0ad91 ("btrfs: switch to library APIs for checksums"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/bf8e1c229b36fc5349e29701e962d0dfd4fd21b6.1772116160.git.geert+renesas@glider.be Signed-off-by: Eric Biggers --- crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 8bc95e69faa5..c84fda3acdbd 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -963,7 +963,6 @@ config CRYPTO_SHA256 10118-3), including HMAC support. This is required for IPsec AH (XFRM_AH) and IPsec ESP (XFRM_ESP). - Used by the btrfs filesystem, Ceph, NFS, and SMB. config CRYPTO_SHA512 tristate "SHA-384 and SHA-512" From a9ad29b7ad6c5e36ffa543ceb3c4439e63186339 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Feb 2026 15:46:07 +0100 Subject: [PATCH 619/828] crypto: Clean up help text for CRYPTO_XXHASH Btrfs stopped using this xxHash implementation in commit fe11ac191ce0ad91 ("btrfs: switch to library APIs for checksums"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/3b632975201074ccaa129f4901a66aff87b19742.1772116160.git.geert+renesas@glider.be Signed-off-by: Eric Biggers --- crypto/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index c84fda3acdbd..49293b656aff 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1036,8 +1036,6 @@ config CRYPTO_XXHASH Extremely fast, working at speeds close to RAM limits. - Used by the btrfs filesystem. - endmenu menu "CRCs (cyclic redundancy checks)" From 0ef6eb10f2e0fe38bb795b2ecdb01b8c9b536ea8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Feb 2026 15:46:08 +0100 Subject: [PATCH 620/828] crypto: Clean up help text for CRYPTO_CRC32C Ext4, jbd2, iSCSI, NVMeoF/TCP, and Btrfs stopped using this CRC32c implementation in commits f2b4fa19647e18a2 ("ext4: switch to using the crc32c library"), dd348f054b24a3f5 ("jbd2: switch to using the crc32c library"), 92186c1455a2d356 ("scsi: iscsi_tcp: Switch to using the crc32c library"), 427fff9aff295e2c ("nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()"), and fe11ac191ce0ad91 ("btrfs: switch to library APIs for checksums"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/f567add7840bc612382237b3e76f3a8bdbd671e6.1772116160.git.geert+renesas@glider.be Signed-off-by: Eric Biggers --- crypto/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 49293b656aff..5e66de2948ed 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1053,8 +1053,6 @@ config CRYPTO_CRC32C on Communications, Vol. 41, No. 6, June 1993, selected for use with iSCSI. - Used by btrfs, ext4, jbd2, NVMeoF/TCP, and iSCSI. - config CRYPTO_CRC32 tristate "CRC32" select CRYPTO_HASH From f33ac74f9cc1cdadd3921246832b2084a5dec53a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Feb 2026 15:46:09 +0100 Subject: [PATCH 621/828] crypto: Clean up help text for CRYPTO_CRC32 F2fs and RoCEv2 stopped using this CRC32 implementation in commits 3ca4bec40ee211cd ("f2fs: switch to using the crc32 library") and ccca5e8aa1457231 ("RDMA/rxe: switch to using the crc32 library"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/0f76ebf05bb1b6ca50db97988f9ac20944534b4c.1772116160.git.geert+renesas@glider.be Signed-off-by: Eric Biggers --- crypto/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 5e66de2948ed..b4bb85e8e226 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1060,8 +1060,6 @@ config CRYPTO_CRC32 help CRC32 CRC algorithm (IEEE 802.3) - Used by RoCEv2 and f2fs. - endmenu menu "Compression" From 7cbe98f7bef965241a5908d50d557008cf998aee Mon Sep 17 00:00:00 2001 From: Mieczyslaw Nalewaj Date: Sun, 1 Mar 2026 18:13:14 -0300 Subject: [PATCH 622/828] net: dsa: realtek: rtl8365mb: fix rtl8365mb_phy_ocp_write return value Function rtl8365mb_phy_ocp_write() always returns 0, even when an error occurs during register access. This patch fixes the return value to propagate the actual error code from regmap operations. Link: https://lore.kernel.org/netdev/a2dfde3c-d46f-434b-9d16-1e251e449068@yahoo.com/ Fixes: 2796728460b8 ("net: dsa: realtek: rtl8365mb: serialize indirect PHY register access") Signed-off-by: Mieczyslaw Nalewaj Reviewed-by: Andrew Lunn Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260301-realtek_namiltd_fix1-v1-1-43a6bb707f9c@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/realtek/rtl8365mb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index c575e164368c..f938a3f701cc 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -769,7 +769,7 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy, out: rtl83xx_unlock(priv); - return 0; + return ret; } static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum) From 3f10543c5bdd11568d1a54f5b1d955f5652e3095 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Sun, 1 Mar 2026 09:41:33 +0100 Subject: [PATCH 623/828] selftests/net: packetdrill: restore tcp_rcv_big_endseq.pkt Commit 1cc93c48b5d7 ("selftests/net: packetdrill: remove tests for tcp_rcv_*big") removed the test for the reverted commit 1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks") but also the one for commit 9ca48d616ed7 ("tcp: do not accept packets beyond window"). Restore the test with the necessary adaptation: expect a delayed ACK instead of an immediate one, since tcp_can_ingest() does not fail anymore for the last data packet. Signed-off-by: Simon Baatz Link: https://patch.msgid.link/20260301-tcp_rcv_big_endseq-v1-1-86ab7415ab58@gmail.com Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_rcv_big_endseq.pkt | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..6c0f32c40f19 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd + +0 < P. 4001:54001(50000) ack 1 win 257 + * > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` From 1939d9816dbfc057508267f0c5214f9478fbd6d1 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Sun, 1 Mar 2026 19:16:51 +0000 Subject: [PATCH 624/828] MAINTAINERS: ena: update AMAZON ETHERNET maintainers Remove Shay Agroskin and Saeed Bishara. Promote David Arinzon to maintainer. Signed-off-by: Arthur Kiyanovski Link: https://patch.msgid.link/20260301191652.5916-1-akiyano@amazon.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2265e2c9bfbe..7829ff2180ae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -993,10 +993,8 @@ F: Documentation/devicetree/bindings/thermal/amazon,al-thermal.yaml F: drivers/thermal/thermal_mmio.c AMAZON ETHERNET DRIVERS -M: Shay Agroskin M: Arthur Kiyanovski -R: David Arinzon -R: Saeed Bishara +M: David Arinzon L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst From a300000233a9ff842e2fb450fb9a79f7827a586d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 21 Feb 2026 12:45:25 -0800 Subject: [PATCH 625/828] fsverity: add dependency on 64K or smaller pages Currently, all filesystems that support fsverity (ext4, f2fs, and btrfs) cache the Merkle tree in the pagecache at a 64K aligned offset after the end of the file data. This offset needs to be a multiple of the page size, which is guaranteed only when the page size is 64K or smaller. 64K was chosen to be the "largest reasonable page size". But it isn't the largest *possible* page size: the hexagon and powerpc ports of Linux support 256K pages, though that configuration is rarely used. For now, just disable support for FS_VERITY in these odd configurations to ensure it isn't used in cases where it would have incorrect behavior. Fixes: 671e67b47e9f ("fs-verity: add Kconfig and the helper functions for hashing") Reported-by: Christoph Hellwig Closes: https://lore.kernel.org/r/20260119063349.GA643@lst.de Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20260221204525.30426-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig index 76d1c5971b82..b20882963ffb 100644 --- a/fs/verity/Kconfig +++ b/fs/verity/Kconfig @@ -2,6 +2,9 @@ config FS_VERITY bool "FS Verity (read-only file-based authenticity protection)" + # Filesystems cache the Merkle tree at a 64K aligned offset in the + # pagecache. That approach assumes the page size is at most 64K. + depends on PAGE_SHIFT <= 16 select CRYPTO_HASH_INFO select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 From 93992667d0ab695ac30ceec91a516fd4bf725d75 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Tue, 3 Mar 2026 01:32:59 +0800 Subject: [PATCH 626/828] ALSA: doc: usb-audio: Add doc for QUIRK_FLAG_SKIP_IFACE_SETUP QUIRK_FLAG_SKIP_IFACE_SETUP was introduced into usb-audio before without appropriate documentation, so add it. Fixes: 38c322068a26 ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP") Cc: stable@vger.kernel.org Signed-off-by: Rong Zhang Link: https://patch.msgid.link/20260302173300.322673-1-i@rong.moe Signed-off-by: Takashi Iwai --- Documentation/sound/alsa-configuration.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index 0a4eaa7d66dd..55b845d38236 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -2372,6 +2372,10 @@ quirk_flags audible volume * bit 25: ``mixer_capture_min_mute`` Similar to bit 24 but for capture streams + * bit 26: ``skip_iface_setup`` + Skip the probe-time interface setup (usb_set_interface, + init_pitch, init_sample_rate); redundant with + snd_usb_endpoint_prepare() at stream-open time This module supports multiple devices, autoprobe and hotplugging. From 83307aebe6a1a4fddf5dec6071716ce251da9cc9 Mon Sep 17 00:00:00 2001 From: wangdicheng Date: Tue, 3 Mar 2026 13:42:42 +0800 Subject: [PATCH 627/828] ALSA: hda/senary: Use codec->core.afg for GPIO access Replace the hardcoded GPIO node ID (0x01) with codec->core.afg. This follows the standard HDA driver practice and makes the driver more robust against different hardware configurations. Signed-off-by: wangdicheng Link: https://patch.msgid.link/20260303054242.318062-1-wangdich9700@163.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/senarytech.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/hda/codecs/senarytech.c b/sound/hda/codecs/senarytech.c index 3a50d4b3a064..8822d4dc5e36 100644 --- a/sound/hda/codecs/senarytech.c +++ b/sound/hda/codecs/senarytech.c @@ -19,9 +19,6 @@ #include "hda_jack.h" #include "generic.h" -/* GPIO node ID */ -#define SENARY_GPIO_NODE 0x01 - struct senary_spec { struct hda_gen_spec gen; @@ -123,11 +120,11 @@ static void senary_init_gpio_led(struct hda_codec *codec) unsigned int mask = spec->gpio_mute_led_mask | spec->gpio_mic_led_mask; if (mask) { - snd_hda_codec_write(codec, SENARY_GPIO_NODE, 0, AC_VERB_SET_GPIO_MASK, + snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_GPIO_MASK, mask); - snd_hda_codec_write(codec, SENARY_GPIO_NODE, 0, AC_VERB_SET_GPIO_DIRECTION, + snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_GPIO_DIRECTION, mask); - snd_hda_codec_write(codec, SENARY_GPIO_NODE, 0, AC_VERB_SET_GPIO_DATA, + snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_GPIO_DATA, spec->gpio_led); } } From 8fb54c7307f6add04246d2f7845e42ecd41950fe Mon Sep 17 00:00:00 2001 From: MeiChia Chiu Date: Tue, 3 Mar 2026 13:47:25 +0800 Subject: [PATCH 628/828] wifi: mac80211: fix missing ieee80211_eml_params member initialization The missing initialization causes driver to misinterpret the EML control bitmap, resulting in incorrect link bitmap handling. Fixes: 0d95280a2d54e ("wifi: mac80211: Add eMLSR/eMLMR action frame parsing support") Signed-off-by: MeiChia Chiu Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260303054725.471548-1-MeiChia.Chiu@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/eht.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 75096b2195d2..078e1e23d8d1 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -154,6 +154,7 @@ void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, u8 *ptr = mgmt->u.action.u.eml_omn.variable; struct ieee80211_eml_params eml_params = { .link_id = status->link_id, + .control = control, }; struct sta_info *sta; int opt_len = 0; From a116bac87118903925108e57781bbfc7a7eea27b Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Mon, 2 Mar 2026 16:23:09 -0800 Subject: [PATCH 629/828] dma-buf: Include ioctl.h in UAPI header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit include/uapi/linux/dma-buf.h uses several macros from ioctl.h to define its ioctl commands. However, it does not include ioctl.h itself. So, if userspace source code tries to include the dma-buf.h file without including ioctl.h, it can result in build failures. Therefore, include ioctl.h in the dma-buf UAPI header. Signed-off-by: Isaac J. Manjarres Reviewed-by: T.J. Mercier Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20260303002309.1401849-1-isaacmanjarres@google.com --- include/uapi/linux/dma-buf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 5a6fda66d9ad..e827c9d20c5d 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -20,6 +20,7 @@ #ifndef _DMA_BUF_UAPI_H_ #define _DMA_BUF_UAPI_H_ +#include #include /** From 3f27958b729a2337336a6b50e0d9aee5fbbce816 Mon Sep 17 00:00:00 2001 From: zhidao su Date: Tue, 3 Mar 2026 14:09:58 +0800 Subject: [PATCH 630/828] sched_ext: Use READ_ONCE() for plain reads of scx_watchdog_timeout scx_watchdog_timeout is written with WRITE_ONCE() in scx_enable(): WRITE_ONCE(scx_watchdog_timeout, timeout); However, three read-side accesses use plain reads without the matching READ_ONCE(): /* check_rq_for_timeouts() - L2824 */ last_runnable + scx_watchdog_timeout /* scx_watchdog_workfn() - L2852 */ scx_watchdog_timeout / 2 /* scx_enable() - L5179 */ scx_watchdog_timeout / 2 The KCSAN documentation requires that if one accessor uses WRITE_ONCE() to annotate lock-free access, all other accesses must also use the appropriate accessor. Plain reads alongside WRITE_ONCE() leave the pair incomplete and can trigger KCSAN warnings. Note that scx_tick() already uses the correct READ_ONCE() annotation: last_check + READ_ONCE(scx_watchdog_timeout) Fix the three remaining plain reads to match, making all accesses to scx_watchdog_timeout consistently annotated and KCSAN-clean. Signed-off-by: zhidao su Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a566d2cc8a43..2ba69b302368 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2739,7 +2739,7 @@ static bool check_rq_for_timeouts(struct rq *rq) unsigned long last_runnable = p->scx.runnable_at; if (unlikely(time_after(jiffies, - last_runnable + scx_watchdog_timeout))) { + last_runnable + READ_ONCE(scx_watchdog_timeout)))) { u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable); scx_exit(sch, SCX_EXIT_ERROR_STALL, 0, @@ -2767,7 +2767,7 @@ static void scx_watchdog_workfn(struct work_struct *work) cond_resched(); } queue_delayed_work(system_unbound_wq, to_delayed_work(work), - scx_watchdog_timeout / 2); + READ_ONCE(scx_watchdog_timeout) / 2); } void scx_tick(struct rq *rq) @@ -5081,7 +5081,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) WRITE_ONCE(scx_watchdog_timeout, timeout); WRITE_ONCE(scx_watchdog_timestamp, jiffies); queue_delayed_work(system_unbound_wq, &scx_watchdog_work, - scx_watchdog_timeout / 2); + READ_ONCE(scx_watchdog_timeout) / 2); /* * Once __scx_enabled is set, %current can be switched to SCX anytime. From 9af832c0a76eedce169c4c6360e4e20d8a0c9ab1 Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 3 Mar 2026 15:23:15 +0800 Subject: [PATCH 631/828] tools/sched_ext: Add -fms-extensions to bpf build flags Similar to commit 835a50753579 ("selftests/bpf: Add -fms-extensions to bpf build flags") and commit 639f58a0f480 ("bpftool: Fix build warnings due to MS extensions") The kernel is now built with -fms-extensions, therefore generated vmlinux.h contains types like: struct aes_key { struct aes_enckey; union aes_invkey_arch inv_k; }; struct ns_common { ... union { struct ns_tree; struct callback_head ns_rcu; }; }; Which raise warning like below when building scx scheduler: tools/sched_ext/build/include/vmlinux.h:50533:3: warning: declaration does not declare anything [-Wmissing-declarations] 50533 | struct ns_tree; | ^ Fix it by using -fms-extensions and -Wno-microsoft-anon-tag flags to build bpf programs that #include "vmlinux.h" Signed-off-by: Zhao Mengmeng Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/sched_ext/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile index 47ad7444677e..21554f089692 100644 --- a/tools/sched_ext/Makefile +++ b/tools/sched_ext/Makefile @@ -122,6 +122,8 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ -I../../include \ $(call get_sys_includes,$(CLANG)) \ -Wall -Wno-compare-distinct-pointer-types \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -O2 -mcpu=v3 # sort removes libbpf duplicates when not cross-building From 01a867c2e090cb440c8f27158e8650c8ddefec8e Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 3 Mar 2026 15:23:16 +0800 Subject: [PATCH 632/828] selftests/sched_ext: Add -fms-extensions to bpf build flags Similar to commit 835a50753579 ("selftests/bpf: Add -fms-extensions to bpf build flags") and commit 639f58a0f480 ("bpftool: Fix build warnings due to MS extensions") Fix "declaration does not declare anything" warning by using -fms-extensions and -Wno-microsoft-anon-tag flags to build bpf programs that #include "vmlinux.h" Signed-off-by: Zhao Mengmeng Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 2c601a7eaff5..006300ac6dff 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -93,6 +93,8 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ $(CLANG_SYS_INCLUDES) \ -Wall -Wno-compare-distinct-pointer-types \ -Wno-incompatible-function-pointer-types \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -O2 -mcpu=v3 # sort removes libbpf duplicates when not cross-building From 75ad51825933575906394d0d5db04586b02db00a Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 3 Mar 2026 15:23:17 +0800 Subject: [PATCH 633/828] selftests/sched_ext: Fix peek_dsq.bpf.c compile error for clang 17 When compiling sched_ext selftests using clang 17.0.6, it raised compiler crash and build error: Error at line 68: Unsupport signed division for DAG: 0x55b2f9a60240: i64 = sdiv 0x55b2f9a609b0, Constant:i64<100>, peek_dsq.bpf.c:68:25 @[ peek_dsq.bpf.c:95:4 @[ peek_dsq.bpf.c:169:8 @[ peek _dsq.bpf.c:140:6 ] ] ]Please convert to unsigned div/mod After digging, it's not a compiler error, clang supported Signed division only when using -mcpu=v4, while we use -mcpu=v3 currently, the better way is to use unsigned div, see [1] for details. [1] https://github.com/llvm/llvm-project/issues/70433 Signed-off-by: Zhao Mengmeng Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/peek_dsq.bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c index a3faf5bb49d6..784f2f6c1af9 100644 --- a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c @@ -58,14 +58,14 @@ static void record_peek_result(long pid) { u32 slot_key; long *slot_pid_ptr; - int ix; + u32 ix; if (pid <= 0) return; /* Find an empty slot or one with the same PID */ bpf_for(ix, 0, 10) { - slot_key = (pid + ix) % MAX_SAMPLES; + slot_key = ((u64)pid + ix) % MAX_SAMPLES; slot_pid_ptr = bpf_map_lookup_elem(&peek_results, &slot_key); if (!slot_pid_ptr) continue; From 7ae0d8f1abbbba6f98cac735145e1206927c67d9 Mon Sep 17 00:00:00 2001 From: wangdicheng Date: Tue, 3 Mar 2026 16:15:16 +0800 Subject: [PATCH 634/828] ALSA: hda/senary: Ensure EAPD is enabled during init The driver sets spec->gen.own_eapd_ctl to take manual control of the EAPD (External Amplifier). However, senary_init does not turn on the EAPD, while senary_shutdown turns it off. Since the generic driver skips EAPD handling when own_eapd_ctl is set, the EAPD remains off after initialization (e.g., after resume), leaving the codec in a non-functional state. Explicitly call senary_auto_turn_eapd in senary_init to ensure the EAPD is enabled and the codec is functional. Signed-off-by: wangdicheng Link: https://patch.msgid.link/20260303081516.583438-1-wangdich9700@163.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/senarytech.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/hda/codecs/senarytech.c b/sound/hda/codecs/senarytech.c index 8822d4dc5e36..6239a25bb8f3 100644 --- a/sound/hda/codecs/senarytech.c +++ b/sound/hda/codecs/senarytech.c @@ -25,6 +25,7 @@ struct senary_spec { /* extra EAPD pins */ unsigned int num_eapds; hda_nid_t eapds[4]; + bool dynamic_eapd; hda_nid_t mute_led_eapd; unsigned int parse_flags; /* flag for snd_hda_parse_pin_defcfg() */ @@ -131,8 +132,12 @@ static void senary_init_gpio_led(struct hda_codec *codec) static int senary_init(struct hda_codec *codec) { + struct senary_spec *spec = codec->spec; + snd_hda_gen_init(codec); senary_init_gpio_led(codec); + if (!spec->dynamic_eapd) + senary_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, true); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); return 0; From 479d589b40b836442bbdadc3fdb37f001bb67f26 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Thu, 26 Feb 2026 16:03:01 +0800 Subject: [PATCH 635/828] bpf/bonding: reject vlan+srcmac xmit_hash_policy change when XDP is loaded bond_option_mode_set() already rejects mode changes that would make a loaded XDP program incompatible via bond_xdp_check(). However, bond_option_xmit_hash_policy_set() has no such guard. For 802.3ad and balance-xor modes, bond_xdp_check() returns false when xmit_hash_policy is vlan+srcmac, because the 802.1q payload is usually absent due to hardware offload. This means a user can: 1. Attach a native XDP program to a bond in 802.3ad/balance-xor mode with a compatible xmit_hash_policy (e.g. layer2+3). 2. Change xmit_hash_policy to vlan+srcmac while XDP remains loaded. This leaves bond->xdp_prog set but bond_xdp_check() now returning false for the same device. When the bond is later destroyed, dev_xdp_uninstall() calls bond_xdp_set(dev, NULL, NULL) to remove the program, which hits the bond_xdp_check() guard and returns -EOPNOTSUPP, triggering: WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)) Fix this by rejecting xmit_hash_policy changes to vlan+srcmac when an XDP program is loaded on a bond in 802.3ad or balance-xor mode. commit 39a0876d595b ("net, bonding: Disallow vlan+srcmac with XDP") introduced bond_xdp_check() which returns false for 802.3ad/balance-xor modes when xmit_hash_policy is vlan+srcmac. The check was wired into bond_xdp_set() to reject XDP attachment with an incompatible policy, but the symmetric path -- preventing xmit_hash_policy from being changed to an incompatible value after XDP is already loaded -- was left unguarded in bond_option_xmit_hash_policy_set(). Note: commit 094ee6017ea0 ("bonding: check xdp prog when set bond mode") later added a similar guard to bond_option_mode_set(), but bond_option_xmit_hash_policy_set() remained unprotected. Reported-by: syzbot+5a287bcdc08104bc3132@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6995aff6.050a0220.2eeac1.014e.GAE@google.com/T/ Fixes: 39a0876d595b ("net, bonding: Disallow vlan+srcmac with XDP") Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260226080306.98766-2-jiayuan.chen@linux.dev Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 9 +++++++-- drivers/net/bonding/bond_options.c | 2 ++ include/net/bonding.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a1de08ee3815..14ed91391fcc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -324,7 +324,7 @@ static bool bond_sk_check(struct bonding *bond) } } -bool bond_xdp_check(struct bonding *bond, int mode) +bool __bond_xdp_check(int mode, int xmit_policy) { switch (mode) { case BOND_MODE_ROUNDROBIN: @@ -335,7 +335,7 @@ bool bond_xdp_check(struct bonding *bond, int mode) /* vlan+srcmac is not supported with XDP as in most cases the 802.1q * payload is not in the packet due to hardware offload. */ - if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) + if (xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) return true; fallthrough; default: @@ -343,6 +343,11 @@ bool bond_xdp_check(struct bonding *bond, int mode) } } +bool bond_xdp_check(struct bonding *bond, int mode) +{ + return __bond_xdp_check(mode, bond->params.xmit_policy); +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dcee384c2f06..7380cc4ee75a 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1575,6 +1575,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond, static int bond_option_xmit_hash_policy_set(struct bonding *bond, const struct bond_opt_value *newval) { + if (bond->xdp_prog && !__bond_xdp_check(BOND_MODE(bond), newval->value)) + return -EOPNOTSUPP; netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n", newval->string, newval->value); bond->params.xmit_policy = newval->value; diff --git a/include/net/bonding.h b/include/net/bonding.h index 4ad5521e7731..395c6e281c5f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -699,6 +699,7 @@ void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); +bool __bond_xdp_check(int mode, int xmit_policy); bool bond_xdp_check(struct bonding *bond, int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void); From 181cafbd8a01d22f3078a84f079c4a7cc0653068 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Thu, 26 Feb 2026 16:03:02 +0800 Subject: [PATCH 636/828] selftests/bpf: add test for xdp_bonding xmit_hash_policy compat Add a selftest to verify that changing xmit_hash_policy to vlan+srcmac is rejected when a native XDP program is loaded on a bond in 802.3ad mode. Without the fix in bond_option_xmit_hash_policy_set(), the change succeeds silently, creating an inconsistent state that triggers a kernel WARNING in dev_xdp_uninstall() when the bond is torn down. The test attaches native XDP to a bond0 (802.3ad, layer2+3), then attempts to switch xmit_hash_policy to vlan+srcmac and asserts the operation fails. It also verifies the change succeeds after XDP is detached, confirming the rejection is specific to the XDP-loaded state. Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260226080306.98766-3-jiayuan.chen@linux.dev Signed-off-by: Paolo Abeni --- .../selftests/bpf/prog_tests/xdp_bonding.c | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index fb952703653e..e8ea26464349 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -610,6 +610,61 @@ out: system("ip link del bond"); } +/* + * Test that changing xmit_hash_policy to vlan+srcmac is rejected when a + * native XDP program is loaded on a bond in 802.3ad or balance-xor mode. + * These modes support XDP only when xmit_hash_policy != vlan+srcmac; freely + * changing the policy creates an inconsistency that triggers a WARNING in + * dev_xdp_uninstall() during device teardown. + */ +static void test_xdp_bonding_xmit_policy_compat(struct skeletons *skeletons) +{ + struct nstoken *nstoken = NULL; + int bond_ifindex = -1; + int xdp_fd, err; + + SYS(out, "ip netns add ns_xmit_policy"); + nstoken = open_netns("ns_xmit_policy"); + if (!ASSERT_OK_PTR(nstoken, "open ns_xmit_policy")) + goto out; + + /* 802.3ad with layer2+3 policy: native XDP is supported */ + SYS(out, "ip link add bond0 type bond mode 802.3ad xmit_hash_policy layer2+3"); + SYS(out, "ip link add veth0 type veth peer name veth0p"); + SYS(out, "ip link set veth0 master bond0"); + SYS(out, "ip link set bond0 up"); + + bond_ifindex = if_nametoindex("bond0"); + if (!ASSERT_GT(bond_ifindex, 0, "bond0 ifindex")) + goto out; + + xdp_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog); + if (!ASSERT_GE(xdp_fd, 0, "xdp_dummy fd")) + goto out; + + err = bpf_xdp_attach(bond_ifindex, xdp_fd, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "attach XDP to bond0")) + goto out; + + /* With XDP loaded, switching to vlan+srcmac must be rejected */ + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); + ASSERT_NEQ(err, 0, "vlan+srcmac change with XDP loaded should fail"); + + /* Detach XDP first, then the same change must succeed */ + ASSERT_OK(bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL), + "detach XDP from bond0"); + + bond_ifindex = -1; + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); + ASSERT_OK(err, "vlan+srcmac change without XDP should succeed"); + +out: + if (bond_ifindex > 0) + bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL); + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_xmit_policy"); +} + static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { @@ -677,6 +732,9 @@ void serial_test_xdp_bonding(void) test_case->xmit_policy); } + if (test__start_subtest("xdp_bonding_xmit_policy_compat")) + test_xdp_bonding_xmit_policy_compat(&skeletons); + if (test__start_subtest("xdp_bonding_redirect_multi")) test_xdp_bonding_redirect_multi(&skeletons); From 18c04a808c436d629d5812ce883e3822a5f5a47f Mon Sep 17 00:00:00 2001 From: Vimlesh Kumar Date: Fri, 27 Feb 2026 09:13:57 +0000 Subject: [PATCH 637/828] octeon_ep: Relocate counter updates before NAPI Relocate IQ/OQ IN/OUT_CNTS updates to occur before NAPI completion, and replace napi_complete with napi_complete_done. Moving the IQ/OQ counter updates before napi_complete_done ensures 1. Counter registers are updated before re-enabling interrupts. 2. Prevents a race where new packets arrive but counters aren't properly synchronized. napi_complete_done (vs napi_complete) allows for better interrupt coalescing. Fixes: 37d79d0596062 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Sathesh Edara Signed-off-by: Shinas Rasheed Signed-off-by: Vimlesh Kumar Link: https://patch.msgid.link/20260227091402.1773833-2-vimleshk@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeon_ep/octep_main.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index ec55eb2a6c04..bb5f21b33508 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -554,12 +554,12 @@ static void octep_clean_irqs(struct octep_device *oct) } /** - * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. * * @iq: Octeon Tx queue data structure. * @oq: Octeon Rx queue data structure. */ -static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) +static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq) { u32 pkts_pend = oq->pkts_pending; @@ -575,7 +575,17 @@ static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) } /* Flush the previous wrties before writing to RESEND bit */ - wmb(); + smp_wmb(); +} + +/** + * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ +static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) +{ writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); } @@ -601,7 +611,8 @@ static int octep_napi_poll(struct napi_struct *napi, int budget) if (tx_pending || rx_done >= budget) return budget; - napi_complete(napi); + octep_update_pkt(ioq_vector->iq, ioq_vector->oq); + napi_complete_done(napi, rx_done); octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); return rx_done; } From 43b3160cb639079a15daeb5f080120afbfbfc918 Mon Sep 17 00:00:00 2001 From: Vimlesh Kumar Date: Fri, 27 Feb 2026 09:13:58 +0000 Subject: [PATCH 638/828] octeon_ep: avoid compiler and IQ/OQ reordering Utilize READ_ONCE and WRITE_ONCE APIs for IO queue Tx/Rx variable access to prevent compiler optimization and reordering. Additionally, ensure IO queue OUT/IN_CNT registers are flushed by performing a read-back after writing. The compiler could reorder reads/writes to pkts_pending, last_pkt_count, etc., causing stale values to be used when calculating packets to process or register updates to send to hardware. The Octeon hardware requires a read-back after writing to OUT_CNT/IN_CNT registers to ensure the write has been flushed through any posted write buffers before the interrupt resend bit is set. Without this, we have observed cases where the hardware didn't properly update its internal state. wmb/rmb only provides ordering guarantees but doesn't prevent the compiler from performing optimizations like caching in registers, load tearing etc. Fixes: 37d79d0596062 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Sathesh Edara Signed-off-by: Shinas Rasheed Signed-off-by: Vimlesh Kumar Link: https://patch.msgid.link/20260227091402.1773833-3-vimleshk@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeon_ep/octep_main.c | 21 +++++++++------ .../net/ethernet/marvell/octeon_ep/octep_rx.c | 27 +++++++++++++------ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index bb5f21b33508..028dd55604ea 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -561,17 +561,22 @@ static void octep_clean_irqs(struct octep_device *oct) */ static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq) { - u32 pkts_pend = oq->pkts_pending; + u32 pkts_pend = READ_ONCE(oq->pkts_pending); + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); + u32 pkts_processed = READ_ONCE(iq->pkts_processed); + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; + if (pkts_processed) { + writel(pkts_processed, iq->inst_cnt_reg); + readl(iq->inst_cnt_reg); + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); + WRITE_ONCE(iq->pkts_processed, 0); } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; + if (last_pkt_count - pkts_pend) { + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); + readl(oq->pkts_sent_reg); + WRITE_ONCE(oq->last_pkt_count, pkts_pend); } /* Flush the previous wrties before writing to RESEND bit */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index f2a7c6a76c74..74de19166488 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -324,10 +324,16 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, struct octep_oq *oq) { u32 pkt_count, new_pkts; + u32 last_pkt_count, pkts_pending; pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; + last_pkt_count = READ_ONCE(oq->last_pkt_count); + new_pkts = pkt_count - last_pkt_count; + if (pkt_count < last_pkt_count) { + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", + oq->q_no, pkt_count, last_pkt_count); + } /* Clear the hardware packets counter register if the rx queue is * being processed continuously with-in a single interrupt and * reached half its max value. @@ -338,8 +344,9 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, pkt_count = readl(oq->pkts_sent_reg); new_pkts += pkt_count; } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; + WRITE_ONCE(oq->last_pkt_count, pkt_count); + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); return new_pkts; } @@ -414,7 +421,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, u16 rx_ol_flags; u32 read_idx; - read_idx = oq->host_read_idx; + read_idx = READ_ONCE(oq->host_read_idx); rx_bytes = 0; desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { @@ -499,7 +506,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, napi_gro_receive(oq->napi, skb); } - oq->host_read_idx = read_idx; + WRITE_ONCE(oq->host_read_idx, read_idx); oq->refill_count += desc_used; oq->stats->packets += pkt; oq->stats->bytes += rx_bytes; @@ -522,22 +529,26 @@ int octep_oq_process_rx(struct octep_oq *oq, int budget) { u32 pkts_available, pkts_processed, total_pkts_processed; struct octep_device *oct = oq->octep_dev; + u32 pkts_pending; pkts_available = 0; pkts_processed = 0; total_pkts_processed = 0; while (total_pkts_processed < budget) { /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) + pkts_pending = READ_ONCE(oq->pkts_pending); + if (pkts_pending == 0) octep_oq_check_hw_for_pkts(oct, oq); + pkts_pending = READ_ONCE(oq->pkts_pending); pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); + pkts_pending); if (!pkts_available) break; pkts_processed = __octep_oq_process_rx(oct, oq, pkts_available); - oq->pkts_pending -= pkts_processed; + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); total_pkts_processed += pkts_processed; } From 2ae7d20fb24f598f60faa8f6ecc856dac782261a Mon Sep 17 00:00:00 2001 From: Vimlesh Kumar Date: Fri, 27 Feb 2026 09:13:59 +0000 Subject: [PATCH 639/828] octeon_ep_vf: Relocate counter updates before NAPI Relocate IQ/OQ IN/OUT_CNTS updates to occur before NAPI completion. Moving the IQ/OQ counter updates before napi_complete_done ensures 1. Counter registers are updated before re-enabling interrupts. 2. Prevents a race where new packets arrive but counters aren't properly synchronized. Fixes: 1cd3b407977c3 ("octeon_ep_vf: add Tx/Rx processing and interrupt support") Signed-off-by: Sathesh Edara Signed-off-by: Shinas Rasheed Signed-off-by: Vimlesh Kumar Link: https://patch.msgid.link/20260227091402.1773833-4-vimleshk@marvell.com Signed-off-by: Paolo Abeni --- .../marvell/octeon_ep_vf/octep_vf_main.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 562fe945b422..0e76f671a226 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -286,12 +286,13 @@ static void octep_vf_clean_irqs(struct octep_vf_device *oct) } /** - * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. * * @iq: Octeon Tx queue data structure. * @oq: Octeon Rx queue data structure. */ -static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq) + +static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq) { u32 pkts_pend = oq->pkts_pending; @@ -308,6 +309,17 @@ static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq * /* Flush the previous wrties before writing to RESEND bit */ smp_wmb(); +} + +/** + * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ +static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, + struct octep_vf_oq *oq) +{ writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); } @@ -333,6 +345,7 @@ static int octep_vf_napi_poll(struct napi_struct *napi, int budget) if (tx_pending || rx_done >= budget) return budget; + octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq); if (likely(napi_complete_done(napi, rx_done))) octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); From 6c73126ecd1080351b468fe43353b2f705487f44 Mon Sep 17 00:00:00 2001 From: Vimlesh Kumar Date: Fri, 27 Feb 2026 09:14:00 +0000 Subject: [PATCH 640/828] octeon_ep_vf: avoid compiler and IQ/OQ reordering Utilize READ_ONCE and WRITE_ONCE APIs for IO queue Tx/Rx variable access to prevent compiler optimization and reordering. Additionally, ensure IO queue OUT/IN_CNT registers are flushed by performing a read-back after writing. The compiler could reorder reads/writes to pkts_pending, last_pkt_count, etc., causing stale values to be used when calculating packets to process or register updates to send to hardware. The Octeon hardware requires a read-back after writing to OUT_CNT/IN_CNT registers to ensure the write has been flushed through any posted write buffers before the interrupt resend bit is set. Without this, we have observed cases where the hardware didn't properly update its internal state. wmb/rmb only provides ordering guarantees but doesn't prevent the compiler from performing optimizations like caching in registers, load tearing etc. Fixes: 1cd3b407977c3 ("octeon_ep_vf: add Tx/Rx processing and interrupt support") Signed-off-by: Sathesh Edara Signed-off-by: Shinas Rasheed Signed-off-by: Vimlesh Kumar Link: https://patch.msgid.link/20260227091402.1773833-5-vimleshk@marvell.com Signed-off-by: Paolo Abeni --- .../marvell/octeon_ep_vf/octep_vf_main.c | 21 ++++++++------ .../marvell/octeon_ep_vf/octep_vf_rx.c | 28 +++++++++++++------ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 0e76f671a226..7f1b93cffb98 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -294,17 +294,22 @@ static void octep_vf_clean_irqs(struct octep_vf_device *oct) static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq) { - u32 pkts_pend = oq->pkts_pending; + u32 pkts_pend = READ_ONCE(oq->pkts_pending); + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); + u32 pkts_processed = READ_ONCE(iq->pkts_processed); + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; + if (pkts_processed) { + writel(pkts_processed, iq->inst_cnt_reg); + readl(iq->inst_cnt_reg); + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); + WRITE_ONCE(iq->pkts_processed, 0); } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; + if (last_pkt_count - pkts_pend) { + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); + readl(oq->pkts_sent_reg); + WRITE_ONCE(oq->last_pkt_count, pkts_pend); } /* Flush the previous wrties before writing to RESEND bit */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c index 6f865dbbba6c..b579d5b545c4 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c @@ -325,9 +325,16 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, struct octep_vf_oq *oq) { u32 pkt_count, new_pkts; + u32 last_pkt_count, pkts_pending; pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; + last_pkt_count = READ_ONCE(oq->last_pkt_count); + new_pkts = pkt_count - last_pkt_count; + + if (pkt_count < last_pkt_count) { + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", + oq->q_no, pkt_count, last_pkt_count); + } /* Clear the hardware packets counter register if the rx queue is * being processed continuously with-in a single interrupt and @@ -339,8 +346,9 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, pkt_count = readl(oq->pkts_sent_reg); new_pkts += pkt_count; } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; + WRITE_ONCE(oq->last_pkt_count, pkt_count); + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); return new_pkts; } @@ -369,7 +377,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, struct sk_buff *skb; u32 read_idx; - read_idx = oq->host_read_idx; + read_idx = READ_ONCE(oq->host_read_idx); rx_bytes = 0; desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { @@ -463,7 +471,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, napi_gro_receive(oq->napi, skb); } - oq->host_read_idx = read_idx; + WRITE_ONCE(oq->host_read_idx, read_idx); oq->refill_count += desc_used; oq->stats->packets += pkt; oq->stats->bytes += rx_bytes; @@ -486,22 +494,26 @@ int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget) { u32 pkts_available, pkts_processed, total_pkts_processed; struct octep_vf_device *oct = oq->octep_vf_dev; + u32 pkts_pending; pkts_available = 0; pkts_processed = 0; total_pkts_processed = 0; while (total_pkts_processed < budget) { /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) + pkts_pending = READ_ONCE(oq->pkts_pending); + if (pkts_pending == 0) octep_vf_oq_check_hw_for_pkts(oct, oq); + pkts_pending = READ_ONCE(oq->pkts_pending); pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); + pkts_pending); if (!pkts_available) break; pkts_processed = __octep_vf_oq_process_rx(oct, oq, pkts_available); - oq->pkts_pending -= pkts_processed; + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); total_pkts_processed += pkts_processed; } From d98c24617a831e92e7224a07dcaed2dd0b02af96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 14:00:24 -0800 Subject: [PATCH 641/828] wifi: cw1200: Fix locking in error paths cw1200_wow_suspend() must only return with priv->conf_mutex locked if it returns zero. This mutex must be unlocked if an error is returned. Add mutex_unlock() calls to the error paths from which that call is missing. This has been detected by the Clang thread-safety analyzer. Fixes: a910e4a94f69 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20260223220102.2158611-25-bart.vanassche@linux.dev Signed-off-by: Johannes Berg --- drivers/net/wireless/st/cw1200/pm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index 120f0379f81d..84eb15d729c7 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c @@ -264,12 +264,14 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) wiphy_err(priv->hw->wiphy, "PM request failed: %d. WoW is disabled.\n", ret); cw1200_wow_resume(hw); + mutex_unlock(&priv->conf_mutex); return -EBUSY; } /* Force resume if event is coming from the device. */ if (atomic_read(&priv->bh_rx)) { cw1200_wow_resume(hw); + mutex_unlock(&priv->conf_mutex); return -EAGAIN; } From 72c6df8f284b3a49812ce2ac136727ace70acc7c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 14:00:25 -0800 Subject: [PATCH 642/828] wifi: wlcore: Fix a locking bug Make sure that wl->mutex is locked before it is unlocked. This has been detected by the Clang thread-safety analyzer. Fixes: 45aa7f071b06 ("wlcore: Use generic runtime pm calls for wowlan elp configuration") Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20260223220102.2158611-26-bart.vanassche@linux.dev Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wlcore/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 17dd417756f2..1c340a4a0930 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -1875,6 +1875,8 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw) wl->wow_enabled); WARN_ON(!wl->wow_enabled); + mutex_lock(&wl->mutex); + ret = pm_runtime_force_resume(wl->dev); if (ret < 0) { wl1271_error("ELP wakeup failure!"); @@ -1891,8 +1893,6 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw) run_irq_work = true; spin_unlock_irqrestore(&wl->wl_lock, flags); - mutex_lock(&wl->mutex); - /* test the recovery flag before calling any SDIO functions */ pending_recovery = test_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS, &wl->flags); From 60862846308627e9e15546bb647a00de44deb27b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 26 Feb 2026 20:11:14 +0100 Subject: [PATCH 643/828] wifi: mt76: mt7996: Fix possible oob access in mt7996_mac_write_txwi_80211() Check frame length before accessing the mgmt fields in mt7996_mac_write_txwi_80211 in order to avoid a possible oob access. Fixes: 98686cd21624c ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260226-mt76-addba-req-oob-access-v1-1-b0f6d1ad4850@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 2560e2f46e89..d4f3ee943b47 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -800,6 +800,7 @@ mt7996_mac_write_txwi_80211(struct mt7996_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && mgmt->u.action.category == WLAN_CATEGORY_BACK && mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { if (is_mt7990(&dev->mt76)) From c41a9abd6ae31d130e8f332e7c8800c4c866234b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 26 Feb 2026 20:11:15 +0100 Subject: [PATCH 644/828] wifi: mt76: mt7925: Fix possible oob access in mt7925_mac_write_txwi_80211() Check frame length before accessing the mgmt fields in mt7925_mac_write_txwi_80211 in order to avoid a possible oob access. Fixes: c948b5da6bbec ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260226-mt76-addba-req-oob-access-v1-2-b0f6d1ad4850@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 871b67101976..0d9435900423 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -668,6 +668,7 @@ mt7925_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && mgmt->u.action.category == WLAN_CATEGORY_BACK && mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) tid = MT_TX_ADDBA; From 4e10a730d1b511ff49723371ed6d694dd1b2c785 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 26 Feb 2026 20:11:16 +0100 Subject: [PATCH 645/828] wifi: mt76: Fix possible oob access in mt76_connac2_mac_write_txwi_80211() Check frame length before accessing the mgmt fields in mt76_connac2_mac_write_txwi_80211 in order to avoid a possible oob access. Fixes: 577dbc6c656d ("mt76: mt7915: enable offloading of sequence number assignment") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260226-mt76-addba-req-oob-access-v1-3-b0f6d1ad4850@kernel.org [fix check to also cover mgmt->u.action.u.addba_req.capab, correct Fixes tag] Signed-off-by: Johannes Berg --- drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index 3304b5971be0..b41ca1410da9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -413,6 +413,7 @@ mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 && mgmt->u.action.category == WLAN_CATEGORY_BACK && mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); From 710f5c76580306cdb9ec51fac8fcf6a8faff7821 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Feb 2026 17:26:03 +0000 Subject: [PATCH 646/828] indirect_call_wrapper: do not reevaluate function pointer We have an increasing number of READ_ONCE(xxx->function) combined with INDIRECT_CALL_[1234]() helpers. Unfortunately this forces INDIRECT_CALL_[1234]() to read xxx->function many times, which is not what we wanted. Fix these macros so that xxx->function value is not reloaded. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 0/0 grow/shrink: 1/65 up/down: 122/-1084 (-962) Function old new delta ip_push_pending_frames 59 181 +122 ip6_finish_output 687 681 -6 __udp_enqueue_schedule_skb 1078 1072 -6 ioam6_output 2319 2312 -7 xfrm4_rcv_encap_finish2 64 56 -8 xfrm4_output 297 289 -8 vrf_ip_local_out 278 270 -8 vrf_ip6_local_out 278 270 -8 seg6_input_finish 64 56 -8 rpl_output 700 692 -8 ipmr_forward_finish 124 116 -8 ip_forward_finish 143 135 -8 ip6mr_forward2_finish 100 92 -8 ip6_forward_finish 73 65 -8 input_action_end_bpf 1091 1083 -8 dst_input 52 44 -8 __xfrm6_output 801 793 -8 __xfrm4_output 83 75 -8 bpf_input 500 491 -9 __tcp_check_space 530 521 -9 input_action_end_dt6 291 280 -11 vti6_tnl_xmit 1634 1622 -12 bpf_xmit 1203 1191 -12 rpl_input 497 483 -14 rawv6_send_hdrinc 1355 1341 -14 ndisc_send_skb 1030 1016 -14 ipv6_srh_rcv 1377 1363 -14 ip_send_unicast_reply 1253 1239 -14 ip_rcv_finish 226 212 -14 ip6_rcv_finish 300 286 -14 input_action_end_x_core 205 191 -14 input_action_end_x 355 341 -14 input_action_end_t 205 191 -14 input_action_end_dx6_finish 127 113 -14 input_action_end_dx4_finish 373 359 -14 input_action_end_dt4 426 412 -14 input_action_end_core 186 172 -14 input_action_end_b6_encap 292 278 -14 input_action_end_b6 198 184 -14 igmp6_send 1332 1318 -14 ip_sublist_rcv 864 848 -16 ip6_sublist_rcv 1091 1075 -16 ipv6_rpl_srh_rcv 1937 1920 -17 xfrm_policy_queue_process 1246 1228 -18 seg6_output_core 903 885 -18 mld_sendpack 856 836 -20 NF_HOOK 756 736 -20 vti_tunnel_xmit 1447 1426 -21 input_action_end_dx6 664 642 -22 input_action_end 1502 1480 -22 sock_sendmsg_nosec 134 111 -23 ip6mr_forward2 388 364 -24 sock_recvmsg_nosec 134 109 -25 seg6_input_core 836 810 -26 ip_send_skb 172 146 -26 ip_local_out 140 114 -26 ip6_local_out 140 114 -26 __sock_sendmsg 162 136 -26 __ip_queue_xmit 1196 1170 -26 __ip_finish_output 405 379 -26 ipmr_queue_fwd_xmit 373 346 -27 sock_recvmsg 173 145 -28 ip6_xmit 1635 1607 -28 xfrm_output_resume 1418 1389 -29 ip_build_and_send_pkt 625 591 -34 dst_output 504 432 -72 Total: Before=25217686, After=25216724, chg -0.00% Fixes: 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up indirect calls of builtin") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260227172603.1700433-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/indirect_call_wrapper.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 35227d47cfc9..dc272b514a01 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -16,22 +16,26 @@ */ #define INDIRECT_CALL_1(f, f1, ...) \ ({ \ - likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \ + typeof(f) __f1 = (f); \ + likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \ }) #define INDIRECT_CALL_2(f, f2, f1, ...) \ ({ \ - likely(f == f2) ? f2(__VA_ARGS__) : \ - INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ + typeof(f) __f2 = (f); \ + likely(__f2 == f2) ? f2(__VA_ARGS__) : \ + INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ ({ \ - likely(f == f3) ? f3(__VA_ARGS__) : \ - INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ + typeof(f) __f3 = (f); \ + likely(__f3 == f3) ? f3(__VA_ARGS__) : \ + INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ ({ \ - likely(f == f4) ? f4(__VA_ARGS__) : \ - INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ + typeof(f) __f4 = (f); \ + likely(__f4 == f4) ? f4(__VA_ARGS__) : \ + INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALLABLE_DECLARE(f) f From 6a877ececd6daa002a9a0002cd0fbca6592a9244 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Fri, 27 Feb 2026 13:23:36 -0700 Subject: [PATCH 647/828] net/rds: Fix circular locking dependency in rds_tcp_tune syzbot reported a circular locking dependency in rds_tcp_tune() where sk_net_refcnt_upgrade() is called while holding the socket lock: ====================================================== WARNING: possible circular locking dependency detected ====================================================== kworker/u10:8/15040 is trying to acquire lock: ffffffff8e9aaf80 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc_cache_noprof+0x4b/0x6f0 but task is already holding lock: ffff88805a3c1ce0 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: rds_tcp_tune+0xd7/0x930 The issue occurs because sk_net_refcnt_upgrade() performs memory allocation (via get_net_track() -> ref_tracker_alloc()) while the socket lock is held, creating a circular dependency with fs_reclaim. Fix this by moving sk_net_refcnt_upgrade() outside the socket lock critical section. This is safe because the fields modified by the sk_net_refcnt_upgrade() call (sk_net_refcnt, ns_tracker) are not accessed by any concurrent code path at this point. v2: - Corrected fixes tag - check patch line wrap nits - ai commentary nits Reported-by: syzbot+2e2cf5331207053b8106@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2e2cf5331207053b8106 Fixes: 3a58f13a881e ("net: rds: acquire refcount on TCP sockets") Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260227202336.167757-1-achender@kernel.org Signed-off-by: Paolo Abeni --- net/rds/tcp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 04f310255692..654e23d13e3d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -490,18 +490,24 @@ bool rds_tcp_tune(struct socket *sock) struct rds_tcp_net *rtn; tcp_sock_set_nodelay(sock->sk); - lock_sock(sk); /* TCP timer functions might access net namespace even after * a process which created this net namespace terminated. */ if (!sk->sk_net_refcnt) { - if (!maybe_get_net(net)) { - release_sock(sk); + if (!maybe_get_net(net)) return false; - } + /* + * sk_net_refcnt_upgrade() must be called before lock_sock() + * because it does a GFP_KERNEL allocation, which can trigger + * fs_reclaim and create a circular lock dependency with the + * socket lock. The fields it modifies (sk_net_refcnt, + * ns_tracker) are not accessed by any concurrent code path + * at this point. + */ sk_net_refcnt_upgrade(sk); put_net(net); } + lock_sock(sk); rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; From 9de68394a61528d40f575c3e6719cc75c56f62c3 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 2 Mar 2026 01:25:44 +0100 Subject: [PATCH 648/828] Revert "driver core: enforce device_lock for driver_match_device()" This reverts commit dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") and commit 289b14592cef ("driver core: fix inverted "locked" suffix of driver_match_device()"). While technically correct, there is a major downside to this approach: When a device is already present in the system and a driver is registered on the same bus, we iterate over all devices registered on this bus to see if one of them matches. If we come across an already bound one where the corresponding driver crashed while holding the device lock (e.g. in probe()) we can't make any progress anymore. However, drivers are typically the least tested code in the kernel and hence it is a case that is likely to happen regularly. Besides hurting developer ergonomics, it potentially decreases chances of shutting things down cleanly and obtaining logs in production environments as well [1]. This came up in the context of a firewire bug, which only in combination with the reverted commit, caused the machine to hang [2]. Additionally, it was observed in [3]. Thus, revert commit dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") and add a brief note clarifying that an implementer of struct bus_type must not expect match() to be called with the device lock held. Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Link: https://lore.kernel.org/all/67f655bb-4d81-4609-b008-68d200255dd2@davidgow.net/ [2] Link: https://lore.kernel.org/lkml/CALbr=LZ4v7N=tO1vgOsyj9AS+XuNbn6kG-QcF+PacdMjSo0iyw@mail.gmail.com/ [3] Reported-by: Linus Torvalds Closes: https://lore.kernel.org/driver-core/CAHk-=wgJ_L1C=HjcYJotg_zrZEmiLFJaoic+PWthjuQrutrfJw@mail.gmail.com/ Reviewed-by: Gui-Dong Han Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260302002545.19389-1-dakr@kernel.org [ Add additional Link: reference. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/base/base.h | 11 +---------- drivers/base/dd.c | 2 +- include/linux/device/bus.h | 2 ++ 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 79d031d2d845..1af95ac68b77 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -179,19 +179,10 @@ void device_release_driver_internal(struct device *dev, const struct device_driv void driver_detach(const struct device_driver *drv); void driver_deferred_probe_del(struct device *dev); void device_set_deferred_probe_reason(const struct device *dev, struct va_format *vaf); -static inline int driver_match_device_locked(const struct device_driver *drv, - struct device *dev) -{ - device_lock_assert(dev); - - return drv->bus->match ? drv->bus->match(dev, drv) : 1; -} - static inline int driver_match_device(const struct device_driver *drv, struct device *dev) { - guard(device)(dev); - return driver_match_device_locked(drv, dev); + return drv->bus->match ? drv->bus->match(dev, drv) : 1; } static inline void dev_sync_state(struct device *dev) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 0354f209529c..bea8da5f8a3a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -928,7 +928,7 @@ static int __device_attach_driver(struct device_driver *drv, void *_data) bool async_allowed; int ret; - ret = driver_match_device_locked(drv, dev); + ret = driver_match_device(drv, dev); if (ret == 0) { /* no match */ return 0; diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 99c3c83ea520..63de5f053c33 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -35,6 +35,8 @@ struct fwnode_handle; * otherwise. It may also return error code if determining that * the driver supports the device is not possible. In case of * -EPROBE_DEFER it will queue the device for deferred probing. + * Note: This callback may be invoked with or without the device + * lock held. * @uevent: Called when a device is added, removed, or a few other things * that generate uevents to add the environment variables. * @probe: Called when a new device or driver add to this bus, and callback From 1a86a1f7d88996085934139fa4c063b6299a2dd3 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Fri, 27 Feb 2026 22:19:37 +0000 Subject: [PATCH 649/828] net: Fix rcu_tasks stall in threaded busypoll I was debugging a NIC driver when I noticed that when I enable threaded busypoll, bpftrace hangs when starting up. dmesg showed: rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 10658 jiffies old. rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 40793 jiffies old. rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 131273 jiffies old. rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 402058 jiffies old. INFO: rcu_tasks detected stalls on tasks: 00000000769f52cd: .N nvcsw: 2/2 holdout: 1 idle_cpu: -1/64 task:napi/eth2-8265 state:R running task stack:0 pid:48300 tgid:48300 ppid:2 task_flags:0x208040 flags:0x00004000 Call Trace: ? napi_threaded_poll_loop+0x27c/0x2c0 ? __pfx_napi_threaded_poll+0x10/0x10 ? napi_threaded_poll+0x26/0x80 ? kthread+0xfa/0x240 ? __pfx_kthread+0x10/0x10 ? ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ? ret_from_fork_asm+0x1a/0x30 The cause is that in threaded busypoll, the main loop is in napi_threaded_poll rather than napi_threaded_poll_loop, where the latter rarely iterates more than once within its loop. For rcu_softirq_qs_periodic inside napi_threaded_poll_loop to report its qs state, the last_qs must be 100ms behind, and this can't happen because napi_threaded_poll_loop rarely iterates in threaded busypoll, and each time napi_threaded_poll_loop is called last_qs is reset to latest jiffies. This patch changes so that in threaded busypoll, last_qs is saved in the outer napi_threaded_poll, and whether busy_poll_last_qs is NULL indicates whether napi_threaded_poll_loop is called for busypoll. This way last_qs would not reset to latest jiffies on each invocation of napi_threaded_poll_loop. Fixes: c18d4b190a46 ("net: Extend NAPI threaded polling to allow kthread based busy polling") Cc: stable@vger.kernel.org Signed-off-by: YiFei Zhu Reviewed-by: Samiullah Khawaja Link: https://patch.msgid.link/20260227221937.1060857-1-zhuyifei@google.com Signed-off-by: Paolo Abeni --- net/core/dev.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c1a9f7fdcffa..4af4cf2d63a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7794,11 +7794,12 @@ static int napi_thread_wait(struct napi_struct *napi) return -1; } -static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) +static void napi_threaded_poll_loop(struct napi_struct *napi, + unsigned long *busy_poll_last_qs) { + unsigned long last_qs = busy_poll_last_qs ? *busy_poll_last_qs : jiffies; struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct softnet_data *sd; - unsigned long last_qs = jiffies; for (;;) { bool repoll = false; @@ -7827,12 +7828,12 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) /* When busy poll is enabled, the old packets are not flushed in * napi_complete_done. So flush them here. */ - if (busy_poll) + if (busy_poll_last_qs) gro_flush_normal(&napi->gro, HZ >= 1000); local_bh_enable(); /* Call cond_resched here to avoid watchdog warnings. */ - if (repoll || busy_poll) { + if (repoll || busy_poll_last_qs) { rcu_softirq_qs_periodic(last_qs); cond_resched(); } @@ -7840,11 +7841,15 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) if (!repoll) break; } + + if (busy_poll_last_qs) + *busy_poll_last_qs = last_qs; } static int napi_threaded_poll(void *data) { struct napi_struct *napi = data; + unsigned long last_qs = jiffies; bool want_busy_poll; bool in_busy_poll; unsigned long val; @@ -7862,7 +7867,7 @@ static int napi_threaded_poll(void *data) assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state, want_busy_poll); - napi_threaded_poll_loop(napi, want_busy_poll); + napi_threaded_poll_loop(napi, want_busy_poll ? &last_qs : NULL); } return 0; @@ -13175,7 +13180,7 @@ static void run_backlog_napi(unsigned int cpu) { struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); - napi_threaded_poll_loop(&sd->backlog, false); + napi_threaded_poll_loop(&sd->backlog, NULL); } static void backlog_napi_setup(unsigned int cpu) From d1a196e0a6dcddd03748468a0e9e3100790fc85c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 3 Mar 2026 12:30:51 +0100 Subject: [PATCH 650/828] platform/x86: dell-wmi-sysman: Don't hex dump plaintext password data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set_new_password() hex dumps the entire buffer, which contains plaintext password data, including current and new passwords. Remove the hex dump to avoid leaking credentials. Fixes: e8a60aa7404b ("platform/x86: Introduce support for Systems Management Driver over WMI for Dell Systems") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20260303113050.58127-2-thorsten.blum@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c index 86ec962aace9..e586f7957946 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c @@ -93,7 +93,6 @@ int set_new_password(const char *password_type, const char *new) if (ret < 0) goto out; - print_hex_dump_bytes("set new password data: ", DUMP_PREFIX_NONE, buffer, buffer_size); ret = call_password_interface(wmi_priv.password_attr_wdev, buffer, buffer_size); /* on success copy the new password to current password */ if (!ret) From 63dc2c34a91642d9ae615a9609841fa317a36c92 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Thu, 22 Jan 2026 18:06:14 +0800 Subject: [PATCH 651/828] x86/xen: Build identity mapping page tables dynamically for XENPV After commit 47ffe0578aee ("x86/pvh: Add 64bit relocation page tables"), the PVH entry uses a new set of page tables instead of the preconstructed page tables in head64.S. Since those preconstructed page tables are only used in XENPV now and XENPV does not actually need the preconstructed identity page tables directly, they can be filled in xen_setup_kernel_pagetable(). Therefore, build the identity mapping page table dynamically to remove the preconstructed page tables and make the code cleaner. Signed-off-by: Hou Wenlong Reviewed-by: Juergen Gross Acked-by: "Borislav Petkov (AMD)" Signed-off-by: Juergen Gross Message-ID: <453981eae7e8158307f971d1632d5023adbe03c3.1769074722.git.houwenlong.hwl@antgroup.com> --- arch/x86/include/asm/pgtable_64.h | 2 -- arch/x86/kernel/head_64.S | 28 ---------------------------- arch/x86/xen/mmu_pv.c | 9 +++++++++ 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f06e5d6a2747..ce45882ccd07 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -19,10 +19,8 @@ extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; -extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; -extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 21816b48537c..85d4a5094f6b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -616,38 +616,10 @@ SYM_DATA(early_recursion_flag, .long 0) .data -#if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH) -SYM_DATA_START_PTI_ALIGNED(init_top_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + L4_PAGE_OFFSET*8, 0 - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + L4_START_KERNEL*8, 0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC - .fill PTI_USER_PGD_FILL,8,0 -SYM_DATA_END(init_top_pgt) - -SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt) - .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .fill 511, 8, 0 -SYM_DATA_END(level3_ident_pgt) -SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt) - /* - * Since I easily can, map the first 1G. - * Don't set NX because code runs from these pages. - * - * Note: This sets _PAGE_GLOBAL despite whether - * the CPU supports it or it is enabled. But, - * the CPU should ignore the bit. - */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) -SYM_DATA_END(level2_ident_pgt) -#else SYM_DATA_START_PTI_ALIGNED(init_top_pgt) .fill 512,8,0 .fill PTI_USER_PGD_FILL,8,0 SYM_DATA_END(init_top_pgt) -#endif SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt) .fill 511,8,0 diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index aa6755385887..50172d64aed6 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -105,6 +105,9 @@ pte_t xen_make_pte_init(pteval_t pte); static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; #endif +static pud_t level3_ident_pgt[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t level2_ident_pgt[PTRS_PER_PMD] __page_aligned_bss; + /* * Protects atomic reservation decrease/increase against concurrent increases. * Also protects non-atomic updates of current_pages and balloon lists. @@ -1777,6 +1780,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Zap identity mapping */ init_top_pgt[0] = __pgd(0); + init_top_pgt[pgd_index(__PAGE_OFFSET_BASE_L4)].pgd = + __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE_NOENC; + init_top_pgt[pgd_index(__START_KERNEL_map)].pgd = + __pa_symbol(level3_kernel_pgt) + _PAGE_TABLE_NOENC; + level3_ident_pgt[0].pud = __pa_symbol(level2_ident_pgt) + _KERNPG_TABLE_NOENC; + /* Pre-constructed entries are in pfn, so convert to mfn */ /* L4[273] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */ From 8b57227d59a86fc06d4f09de08f98133680f2cae Mon Sep 17 00:00:00 2001 From: David Thomson Date: Tue, 24 Feb 2026 09:37:11 +0000 Subject: [PATCH 652/828] xen/acpi-processor: fix _CST detection using undersized evaluation buffer read_acpi_id() attempts to evaluate _CST using a stack buffer of sizeof(union acpi_object) (48 bytes), but _CST returns a nested Package of sub-Packages (one per C-state, each containing a register descriptor, type, latency, and power) requiring hundreds of bytes. The evaluation always fails with AE_BUFFER_OVERFLOW. On modern systems using FFH/MWAIT entry (where pblk is zero), this causes the function to return before setting the acpi_id_cst_present bit. In check_acpi_ids(), flags.power is then zero for all Phase 2 CPUs (physical CPUs beyond dom0's vCPU count), so push_cxx_to_hypervisor() is never called for them. On a system with dom0_max_vcpus=2 and 8 physical CPUs, only PCPUs 0-1 receive C-state data. PCPUs 2-7 are stuck in C0/C1 idle, unable to enter C2/C3. This costs measurable wall power (4W observed on an Intel Core Ultra 7 265K with Xen 4.20). The function never uses the _CST return value -- it only needs to know whether _CST exists. Replace the broken acpi_evaluate_object() call with acpi_has_method(), which correctly detects _CST presence using acpi_get_handle() without any buffer allocation. This brings C-state detection to parity with the P-state path, which already works correctly for Phase 2 CPUs. Fixes: 59a568029181 ("xen/acpi-processor: C and P-state driver that uploads said data to hypervisor.") Signed-off-by: David Thomson Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Message-ID: <20260224093707.19679-1-dt@linux-mail.net> --- drivers/xen/xen-acpi-processor.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index f2e8eaf684ba..8d1860bd5d57 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -379,11 +379,8 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) acpi_psd[acpi_id].domain); } - status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); - if (ACPI_FAILURE(status)) { - if (!pblk) - return AE_OK; - } + if (!pblk && !acpi_has_method(handle, "_CST")) + return AE_OK; /* .. and it has a C-state */ __set_bit(acpi_id, acpi_id_cst_present); From b8c460a045985a8cb7a7c34912d2db91afee242f Mon Sep 17 00:00:00 2001 From: kexinsun Date: Tue, 24 Feb 2026 10:24:24 +0800 Subject: [PATCH 653/828] x86/xen: update outdated comment The function xen_flush_tlb_others() was renamed xen_flush_tlb_multi() by commit 4ce94eabac16 ("x86/mm/tlb: Flush remote and local TLBs concurrently"). Update the comment accordingly. Signed-off-by: kexinsun Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20260224022424.1718-1-kexinsun@smail.nju.edu.cn> --- arch/x86/xen/enlighten_pv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index b74ff8bc7f2a..fde5f44460ee 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -392,7 +392,7 @@ static void __init xen_init_capabilities(void) /* * Xen PV would need some work to support PCID: CR3 handling as well - * as xen_flush_tlb_others() would need updating. + * as xen_flush_tlb_multi() would need updating. */ setup_clear_cpu_cap(X86_FEATURE_PCID); From 7271cb98e4981e32c9b2a5f9eb5ad0cd0cdd5bf3 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Sun, 1 Mar 2026 13:04:52 +0800 Subject: [PATCH 654/828] x86/PVH: Use boot params to pass RSDP address in start_info page After commit e6e094e053af75 ("x86/acpi, x86/boot: Take RSDP address from boot params if available"), the RSDP address can be passed in boot params. Therefore, store the RSDP address in start_info page into boot params in the PVH entry instead of registering a different callback. This removes an absolute reference during the PVH entry and is more standardized. Signed-off-by: Hou Wenlong Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <76675c4d49d3a8f72252076812ef8f22276230c2.1772282441.git.houwenlong.hwl@antgroup.com> --- arch/x86/platform/pvh/enlighten.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 2263885d16ba..f2053cbe9b0c 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -25,11 +25,6 @@ struct hvm_start_info __initdata pvh_start_info; const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info); -static u64 __init pvh_get_root_pointer(void) -{ - return pvh_start_info.rsdp_paddr; -} - /* * Xen guests are able to obtain the memory map from the hypervisor via the * HYPERVISOR_memory_op hypercall. @@ -95,7 +90,7 @@ static void __init init_pvh_bootparams(bool xen_guest) pvh_bootparams.hdr.version = (2 << 8) | 12; pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0; - x86_init.acpi.get_root_pointer = pvh_get_root_pointer; + pvh_bootparams.acpi_rsdp_addr = pvh_start_info.rsdp_paddr; } /* From 27990181031fdcdbe0f7c46011f6404e5d116386 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 3 Mar 2026 14:17:07 +0000 Subject: [PATCH 655/828] ASoC: SDCA: Add allocation failure check for Entity name Currently find_sdca_entity_iot() can allocate a string for the Entity name but it doesn't check if that allocation succeeded. Add the missing NULL check after the allocation. Fixes: 48fa77af2f4a ("ASoC: SDCA: Add terminal type into input/output widget name") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20260303141707.3841635-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdca/sdca_functions.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index 95b67bb904c3..e0ed593697ba 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -1156,9 +1156,12 @@ static int find_sdca_entity_iot(struct device *dev, if (!terminal->is_dataport) { const char *type_name = sdca_find_terminal_name(terminal->type); - if (type_name) + if (type_name) { entity->label = devm_kasprintf(dev, GFP_KERNEL, "%s %s", entity->label, type_name); + if (!entity->label) + return -ENOMEM; + } } ret = fwnode_property_read_u32(entity_node, From 00fd9aad55e7ced494a738a07662155d058b872f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Mar 2026 15:26:31 +0100 Subject: [PATCH 656/828] Revert "ACPI: PM: Let acpi_dev_pm_attach() skip devices without ACPI PM" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert commit 88fad6ce090b ("ACPI: PM: Let acpi_dev_pm_attach() skip devices without ACPI PM") that introduced a SoundWire suspend regression [1]. It is actually not true that the commit above doesn't make a functional difference because acpi_subsys_suspend(), for example, may resume devices in runtime-suspend which affects the subsequent handling of those devices during the suspend transition. For this reason, the devices that were handled by the ACPI PM domain before that commit may be handled differently now which may lead to suspend-resume issues. Fixes: 88fad6ce090b ("ACPI: PM: Let acpi_dev_pm_attach() skip devices without ACPI PM") Reported-by: Péter Ujfalusi Closes: https://github.com/thesofproject/linux/pull/5677#issuecomment-3984375077 [1] Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2829615.mvXUDI8C0e@rafael.j.wysocki --- drivers/acpi/device_pm.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index f2579611e0a5..aa55ecfc2923 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1456,15 +1456,6 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) if (!adev || !acpi_match_device_ids(adev, special_pm_ids)) return 0; - /* - * Skip devices whose ACPI companions don't support power management and - * don't have a wakeup GPE. - */ - if (!acpi_device_power_manageable(adev) && !acpi_device_can_wakeup(adev)) { - dev_dbg(dev, "No ACPI power management or wakeup GPE\n"); - return 0; - } - /* * Only attach the power domain to the first device if the * companion is shared by multiple. This is to prevent doing power From 5f25805303e201f3afaff0a90f7c7ce257468704 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 2 Mar 2026 19:03:34 +0100 Subject: [PATCH 657/828] s390/xor: Fix xor_xc_5() inline assembly xor_xc_5() contains a larl 1,2f that is not used by the asm and is not declared as a clobber. This can corrupt a compiler-allocated value in %r1 and lead to miscompilation. Remove the instruction. Fixes: 745600ed6965 ("s390/lib: Use exrl instead of ex in xor functions") Cc: stable@vger.kernel.org Reviewed-by: Juergen Christ Reviewed-by: Heiko Carstens Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/lib/xor.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 1721b73b7803..a4d8b51beb95 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -96,7 +96,6 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p5) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" " jm 6f\n" " srlg 0,%0,8\n" From f775276edc0c505dc0f782773796c189f31a1123 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Mar 2026 14:34:58 +0100 Subject: [PATCH 658/828] s390/xor: Fix xor_xc_2() inline assembly constraints The inline assembly constraints for xor_xc_2() are incorrect. "bytes", "p1", and "p2" are input operands, while all three of them are modified within the inline assembly. Given that the function consists only of this inline assembly it seems unlikely that this may cause any problems, however fix this in any case. Fixes: 2cfc5f9ce7f5 ("s390/xor: optimized xor routing using the XC instruction") Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Reviewed-by: Vasily Gorbik Link: https://lore.kernel.org/r/20260302133500.1560531-2-hca@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/lib/xor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index a4d8b51beb95..81c0235c0466 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -28,8 +28,8 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:" - : : "d" (bytes), "a" (p1), "a" (p2) - : "0", "cc", "memory"); + : "+d" (bytes), "+a" (p1), "+a" (p2) + : : "0", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, From 87ff6da3001b2a35d241c5d965b82536f6418277 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Mar 2026 14:34:59 +0100 Subject: [PATCH 659/828] s390/xor: Improve inline assembly constraints The inline assembly constraint for the "bytes" operand is "d" for all xor() inline assemblies. "d" means that any register from 0 to 15 can be used. If the compiler would use register 0 then the exrl instruction would not or the value of "bytes" into the executed instruction - resulting in an incorrect result. However all the xor() inline assemblies make hard-coded use of register 0, and it is correctly listed in the clobber list, so that this cannot happen. Given that this is quite subtle use the better "a" constraint, which excludes register 0 from register allocation in any case. Signed-off-by: Heiko Carstens Reviewed-by: Vasily Gorbik Link: https://lore.kernel.org/r/20260302133500.1560531-3-hca@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/lib/xor.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 81c0235c0466..5363e4c2462d 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -28,7 +28,7 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:" - : "+d" (bytes), "+a" (p1), "+a" (p2) + : "+a" (bytes), "+a" (p1), "+a" (p2) : : "0", "cc", "memory"); } @@ -54,7 +54,7 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, "2: xc 0(1,%1),0(%2)\n" "3: xc 0(1,%1),0(%3)\n" "4:" - : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) + : "+a" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) : : "0", "cc", "memory"); } @@ -85,7 +85,7 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, "3: xc 0(1,%1),0(%3)\n" "4: xc 0(1,%1),0(%4)\n" "5:" - : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) + : "+a" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) : : "0", "cc", "memory"); } @@ -121,7 +121,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, "4: xc 0(1,%1),0(%4)\n" "5: xc 0(1,%1),0(%5)\n" "6:" - : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), + : "+a" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (p5) : : "0", "cc", "memory"); } From 674c5ff0f440a051ebf299d29a4c013133d81a65 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Mar 2026 14:35:00 +0100 Subject: [PATCH 660/828] s390/stackleak: Fix __stackleak_poison() inline assembly constraint The __stackleak_poison() inline assembly comes with a "count" operand where the "d" constraint is used. "count" is used with the exrl instruction and "d" means that the compiler may allocate any register from 0 to 15. If the compiler would allocate register 0 then the exrl instruction would not or the value of "count" into the executed instruction - resulting in a stackframe which is only partially poisoned. Use the correct "a" constraint, which excludes register 0 from register allocation. Fixes: 2a405f6bb3a5 ("s390/stackleak: provide fast __stackleak_poison() implementation") Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Reviewed-by: Vasily Gorbik Link: https://lore.kernel.org/r/20260302133500.1560531-4-hca@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index cc187afa07b3..78195ee5e99f 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -159,7 +159,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low, " j 4f\n" "3: mvc 8(1,%[addr]),0(%[addr])\n" "4:" - : [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp) + : [addr] "+&a" (erase_low), [count] "+&a" (count), [tmp] "=&a" (tmp) : [poison] "d" (poison) : "memory", "cc" ); From 1336b579f6079fb8520be03624fcd9ba443c930b Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Tue, 3 Mar 2026 22:35:30 +0800 Subject: [PATCH 661/828] sched_ext: Remove redundant css_put() in scx_cgroup_init() The iterator css_for_each_descendant_pre() walks the cgroup hierarchy under cgroup_lock(). It does not increment the reference counts on yielded css structs. According to the cgroup documentation, css_put() should only be used to release a reference obtained via css_get() or css_tryget_online(). Since the iterator does not use either of these to acquire a reference, calling css_put() in the error path of scx_cgroup_init() causes a refcount underflow. Remove the unbalanced css_put() to prevent a potential Use-After-Free (UAF) vulnerability. Fixes: 819513666966 ("sched_ext: Add cgroup support") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Cheng-Yang Chou Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2ba69b302368..eab6e09b6442 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3589,7 +3589,6 @@ static int scx_cgroup_init(struct scx_sched *sch) ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL, css->cgroup, &args); if (ret) { - css_put(css); scx_error(sch, "ops.cgroup_init() failed (%d)", ret); return ret; } From 2185904ff8b5da76a4353e5d1236caa78e0d98e3 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Fri, 20 Feb 2026 12:32:27 +0530 Subject: [PATCH 662/828] powerpc/pci: Initialize msi_addr_mask for OF-created PCI devices Recent changes replaced the use of no_64bit_msi with msi_addr_mask. As a result, msi_addr_mask is now expected to be initialized to DMA_BIT_MASK(64) when a pci_dev is set up. However, this initialization was missed on powerpc due to differences in the device initialization path compared to other (x86) architecture. Due to this, now PCI device probe method fails on powerpc system. On powerpc systems, struct pci_dev instances are created from device tree nodes via of_create_pci_dev(). Because msi_addr_mask was not initialized there, it remained zero. Later, during MSI setup, msi_verify_entries() validates the programmed MSI address against pdev->msi_addr_mask. Since the mask was not set correctly, the validation fails, causing PCI driver probe failures for devices on powerpc systems. Initialize pdev->msi_addr_mask to DMA_BIT_MASK(64) in of_create_pci_dev() so that MSI address validation succeeds and device probe works as expected. Fixes: 386ced19e9a3 ("PCI/MSI: Convert the boolean no_64bit_msi flag to a DMA address mask") Signed-off-by: Nilay Shroff Signed-off-by: Bjorn Helgaas Tested-by: Venkat Rao Bagalkote Tested-by: Nam Cao Reviewed-by: Nam Cao Reviewed-by: Vivian Wang Acked-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260220070239.1693303-2-nilay@linux.ibm.com --- arch/powerpc/kernel/pci_of_scan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 756043dd06e9..fb9fbf0d1796 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -212,6 +212,13 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* + * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit + * if MSI (rather than MSI-X) capability does not have + * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver. + */ + dev->msi_addr_mask = DMA_BIT_MASK(64); + /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); From 147dae12985947cdb9e1918142f06482c5077a81 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Fri, 20 Feb 2026 12:32:28 +0530 Subject: [PATCH 663/828] sparc/PCI: Initialize msi_addr_mask for OF-created PCI devices Recent changes replaced the use of no_64bit_msi with msi_addr_mask, which is now expected to be initialized to DMA_BIT_MASK(64) during PCI device setup. On SPARC systems, this initialization was inadvertently missed for devices instantiated from device tree nodes, leaving msi_addr_mask unset for OF-created pci_dev instances. As a result, MSI address validation fails during probe, causing affected devices to fail initialization. Initialize pdev->msi_addr_mask to DMA_BIT_MASK(64) in of_create_pci_dev() so that MSI address validation succeeds and PCI device probing works as expected. Fixes: 386ced19e9a3 ("PCI/MSI: Convert the boolean no_64bit_msi flag to a DMA address mask") Signed-off-by: Nilay Shroff Signed-off-by: Bjorn Helgaas Tested-by: Han Gao # SPARC Enterprise T5220 Tested-by: Nathaniel Roach # SPARC T5-2 Reviewed-by: Vivian Wang Link: https://patch.msgid.link/20260220070239.1693303-3-nilay@linux.ibm.com --- arch/sparc/kernel/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 7e41574634b3..1603d50fdcad 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -355,6 +355,13 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* + * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit + * if MSI (rather than MSI-X) capability does not have + * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver. + */ + dev->msi_addr_mask = DMA_BIT_MASK(64); + if (of_node_name_eq(node, "pci")) { /* a PCI-PCI bridge */ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; From 3875ceb592d3cb23dc932165cc1eeb74cf4dc319 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Mar 2026 15:48:56 -0800 Subject: [PATCH 664/828] crypto: testmgr - Fix stale references to aes-generic Due to commit a2484474272e ("crypto: aes - Replace aes-generic with wrapper around lib"), the "aes-generic" driver name has been replaced with "aes-lib". Update a couple testmgr entries that were added concurrently with this change. Fixes: a22d48cbe558 ("crypto: testmgr - Add test vectors for authenc(hmac(sha224),cbc(aes))") Fixes: 030218dedee2 ("crypto: testmgr - Add test vectors for authenc(hmac(sha384),cbc(aes))") Acked-by: Aleksander Jan Bajkowski Link: https://lore.kernel.org/r/20260302234856.30569-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- crypto/testmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 49b607f65f63..4985411dedae 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4132,7 +4132,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, }, { .alg = "authenc(hmac(sha224),cbc(aes))", - .generic_driver = "authenc(hmac-sha224-lib,cbc(aes-generic))", + .generic_driver = "authenc(hmac-sha224-lib,cbc(aes-lib))", .test = alg_test_aead, .suite = { .aead = __VECS(hmac_sha224_aes_cbc_tv_temp) @@ -4194,7 +4194,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, }, { .alg = "authenc(hmac(sha384),cbc(aes))", - .generic_driver = "authenc(hmac-sha384-lib,cbc(aes-generic))", + .generic_driver = "authenc(hmac-sha384-lib,cbc(aes-lib))", .test = alg_test_aead, .suite = { .aead = __VECS(hmac_sha384_aes_cbc_tv_temp) From eef33aa44935d001747ca97703c08dd6f9031162 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 3 Dec 2025 14:29:48 +0100 Subject: [PATCH 665/828] ice: fix adding AQ LLDP filter for VF The referenced commit came from a misunderstanding of the FW LLDP filter AQ (Admin Queue) command due to the error in the internal documentation. Contrary to the assumptions in the original commit, VFs can be added and deleted from this filter without any problems. Introduced dev_info message proved to be useful, so reverting the whole commit does not make sense. Without this fix, trusted VFs do not receive LLDP traffic, if there is an AQ LLDP filter on PF. When trusted VF attempts to add an LLDP multicast MAC address, the following message can be seen in dmesg on host: ice 0000:33:00.0: Failed to add Rx LLDP rule on VSI 20 error: -95 Revert checking VSI type when adding LLDP filter through AQ. Fixes: 4d5a1c4e6d49 ("ice: do not add LLDP-specific filter if not necessary") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 6cd63190f55d..74fe74225277 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -6391,7 +6391,7 @@ int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add) struct ice_aqc_lldp_filter_ctrl *cmd; struct libie_aq_desc desc; - if (vsi->type != ICE_VSI_PF || !ice_fw_supports_lldp_fltr_ctrl(hw)) + if (!ice_fw_supports_lldp_fltr_ctrl(hw)) return -EOPNOTSUPP; cmd = libie_aq_raw(&desc); From 326256c0a72d4877cec1d4df85357da106233128 Mon Sep 17 00:00:00 2001 From: Jakub Staniszewski Date: Tue, 13 Jan 2026 20:38:16 +0100 Subject: [PATCH 666/828] ice: reintroduce retry mechanism for indirect AQ Add retry mechanism for indirect Admin Queue (AQ) commands. To do so we need to keep the command buffer. This technically reverts commit 43a630e37e25 ("ice: remove unused buffer copy code in ice_sq_send_cmd_retry()"), but combines it with a fix in the logic by using a kmemdup() call, making it more robust and less likely to break in the future due to programmer error. Cc: Michal Schmidt Cc: stable@vger.kernel.org Fixes: 3056df93f7a8 ("ice: Re-send some AQ commands, as result of EBUSY AQ error") Signed-off-by: Jakub Staniszewski Co-developed-by: Dawid Osuchowski Signed-off-by: Dawid Osuchowski Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 74fe74225277..fd32c318d3f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1841,6 +1841,7 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, { struct libie_aq_desc desc_cpy; bool is_cmd_for_retry; + u8 *buf_cpy = NULL; u8 idx = 0; u16 opcode; int status; @@ -1850,8 +1851,11 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, memset(&desc_cpy, 0, sizeof(desc_cpy)); if (is_cmd_for_retry) { - /* All retryable cmds are direct, without buf. */ - WARN_ON(buf); + if (buf) { + buf_cpy = kmemdup(buf, buf_size, GFP_KERNEL); + if (!buf_cpy) + return -ENOMEM; + } memcpy(&desc_cpy, desc, sizeof(desc_cpy)); } @@ -1863,12 +1867,14 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, hw->adminq.sq_last_status != LIBIE_AQ_RC_EBUSY) break; + if (buf_cpy) + memcpy(buf, buf_cpy, buf_size); memcpy(desc, &desc_cpy, sizeof(desc_cpy)); - msleep(ICE_SQ_SEND_DELAY_TIME_MS); } while (++idx < ICE_SQ_SEND_MAX_EXECUTE); + kfree(buf_cpy); return status; } From fb4903b3354aed4a2301180cf991226f896c87ed Mon Sep 17 00:00:00 2001 From: Jakub Staniszewski Date: Tue, 13 Jan 2026 20:38:17 +0100 Subject: [PATCH 667/828] ice: fix retry for AQ command 0x06EE Executing ethtool -m can fail reporting a netlink I/O error while firmware link management holds the i2c bus used to communicate with the module. According to Intel(R) Ethernet Controller E810 Datasheet Rev 2.8 [1] Section 3.3.10.4 Read/Write SFF EEPROM (0x06EE) request should to be retried upon receiving EBUSY from firmware. Commit e9c9692c8a81 ("ice: Reimplement module reads used by ethtool") implemented it only for part of ice_get_module_eeprom(), leaving all other calls to ice_aq_sff_eeprom() vulnerable to returning early on getting EBUSY without retrying. Remove the retry loop from ice_get_module_eeprom() and add Admin Queue (AQ) command with opcode 0x06EE to the list of commands that should be retried on receiving EBUSY from firmware. Cc: stable@vger.kernel.org Fixes: e9c9692c8a81 ("ice: Reimplement module reads used by ethtool") Signed-off-by: Jakub Staniszewski Co-developed-by: Dawid Osuchowski Signed-off-by: Dawid Osuchowski Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Link: https://www.intel.com/content/www/us/en/content-details/613875/intel-ethernet-controller-e810-datasheet.html [1] Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 1 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 35 ++++++++------------ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index fd32c318d3f5..ce11fea122d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1816,6 +1816,7 @@ static bool ice_should_retry_sq_send_cmd(u16 opcode) case ice_aqc_opc_lldp_stop: case ice_aqc_opc_lldp_start: case ice_aqc_opc_lldp_filter_ctrl: + case ice_aqc_opc_sff_eeprom: return true; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3c1a084c2a4b..29e341251754 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4509,7 +4509,7 @@ ice_get_module_eeprom(struct net_device *netdev, u8 addr = ICE_I2C_EEPROM_DEV_ADDR; struct ice_hw *hw = &pf->hw; bool is_sfp = false; - unsigned int i, j; + unsigned int i; u16 offset = 0; u8 page = 0; int status; @@ -4551,26 +4551,19 @@ ice_get_module_eeprom(struct net_device *netdev, if (page == 0 || !(data[0x2] & 0x4)) { u32 copy_len; - /* If i2c bus is busy due to slow page change or - * link management access, call can fail. This is normal. - * So we retry this a few times. - */ - for (j = 0; j < 4; j++) { - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, - !is_sfp, value, - SFF_READ_BLOCK_SIZE, - 0, NULL); - netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n", - addr, offset, page, is_sfp, - value[0], value[1], value[2], value[3], - value[4], value[5], value[6], value[7], - status); - if (status) { - usleep_range(1500, 2500); - memset(value, 0, SFF_READ_BLOCK_SIZE); - continue; - } - break; + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, + !is_sfp, value, + SFF_READ_BLOCK_SIZE, + 0, NULL); + netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%pe)\n", + addr, offset, page, is_sfp, + value[0], value[1], value[2], value[3], + value[4], value[5], value[6], value[7], + ERR_PTR(status)); + if (status) { + netdev_err(netdev, "%s: error reading module EEPROM: status %pe\n", + __func__, ERR_PTR(status)); + return status; } /* Make sure we have enough room for the new block */ From fe868b499d16f55bbeea89992edb98043c9de416 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Thu, 22 Jan 2026 03:26:44 +0000 Subject: [PATCH 668/828] ice: Fix memory leak in ice_set_ringparam() In ice_set_ringparam, tx_rings and xdp_rings are allocated before rx_rings. If the allocation of rx_rings fails, the code jumps to the done label leaking both tx_rings and xdp_rings. Furthermore, if the setup of an individual Rx ring fails during the loop, the code jumps to the free_tx label which releases tx_rings but leaks xdp_rings. Fix this by introducing a free_xdp label and updating the error paths to ensure both xdp_rings and tx_rings are properly freed if rx_rings allocation or setup fails. Compile tested only. Issue found using a prototype static analysis tool and code review. Fixes: fcea6f3da546 ("ice: Add stats and ethtool support") Fixes: efc2214b6047 ("ice: Add support for XDP") Signed-off-by: Zilin Guan Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 29e341251754..b9be10b58856 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3332,7 +3332,7 @@ process_rx: rx_rings = kzalloc_objs(*rx_rings, vsi->num_rxq); if (!rx_rings) { err = -ENOMEM; - goto done; + goto free_xdp; } ice_for_each_rxq(vsi, i) { @@ -3359,7 +3359,7 @@ rx_unwind: } kfree(rx_rings); err = -ENOMEM; - goto free_tx; + goto free_xdp; } } @@ -3411,6 +3411,13 @@ process_link: } goto done; +free_xdp: + if (xdp_rings) { + ice_for_each_xdp_txq(vsi, i) + ice_free_tx_ring(&xdp_rings[i]); + kfree(xdp_rings); + } + free_tx: /* error cleanup if the Rx allocations failed after getting Tx */ if (tx_rings) { From 636cc3bd12f499c74eaf5dc9a7d5b832f1bb24ed Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 11 Feb 2026 10:11:40 +0100 Subject: [PATCH 669/828] libie: don't unroll if fwlog isn't supported The libie_fwlog_deinit() function can be called during driver unload even when firmware logging was never properly initialized. This led to call trace: [ 148.576156] Oops: Oops: 0000 [#1] SMP NOPTI [ 148.576167] CPU: 80 UID: 0 PID: 12843 Comm: rmmod Kdump: loaded Not tainted 6.17.0-rc7next-queue-3oct-01915-g06d79d51cf51 #1 PREEMPT(full) [ 148.576177] Hardware name: HPE ProLiant DL385 Gen10 Plus/ProLiant DL385 Gen10 Plus, BIOS A42 07/18/2020 [ 148.576182] RIP: 0010:__dev_printk+0x16/0x70 [ 148.576196] Code: 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 41 55 41 54 49 89 d4 55 48 89 fd 53 48 85 f6 74 3c <4c> 8b 6e 50 48 89 f3 4d 85 ed 75 03 4c 8b 2e 48 89 df e8 f3 27 98 [ 148.576204] RSP: 0018:ffffd2fd7ea17a48 EFLAGS: 00010202 [ 148.576211] RAX: ffffd2fd7ea17aa0 RBX: ffff8eb288ae2000 RCX: 0000000000000000 [ 148.576217] RDX: ffffd2fd7ea17a70 RSI: 00000000000000c8 RDI: ffffffffb68d3d88 [ 148.576222] RBP: ffffffffb68d3d88 R08: 0000000000000000 R09: 0000000000000000 [ 148.576227] R10: 00000000000000c8 R11: ffff8eb2b1a49400 R12: ffffd2fd7ea17a70 [ 148.576231] R13: ffff8eb3141fb000 R14: ffffffffc1215b48 R15: ffffffffc1215bd8 [ 148.576236] FS: 00007f5666ba6740(0000) GS:ffff8eb2472b9000(0000) knlGS:0000000000000000 [ 148.576242] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 148.576247] CR2: 0000000000000118 CR3: 000000011ad17000 CR4: 0000000000350ef0 [ 148.576252] Call Trace: [ 148.576258] [ 148.576269] _dev_warn+0x7c/0x96 [ 148.576290] libie_fwlog_deinit+0x112/0x117 [libie_fwlog] [ 148.576303] ixgbe_remove+0x63/0x290 [ixgbe] [ 148.576342] pci_device_remove+0x42/0xb0 [ 148.576354] device_release_driver_internal+0x19c/0x200 [ 148.576365] driver_detach+0x48/0x90 [ 148.576372] bus_remove_driver+0x6d/0xf0 [ 148.576383] pci_unregister_driver+0x2e/0xb0 [ 148.576393] ixgbe_exit_module+0x1c/0xd50 [ixgbe] [ 148.576430] __do_sys_delete_module.isra.0+0x1bc/0x2e0 [ 148.576446] do_syscall_64+0x7f/0x980 It can be reproduced by trying to unload ixgbe driver in recovery mode. Fix that by checking if fwlog is supported before doing unroll. Fixes: 641585bc978e ("ixgbe: fwlog support for e610") Reviewed-by: Aleksandr Loktionov Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/libie/fwlog.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/libie/fwlog.c b/drivers/net/ethernet/intel/libie/fwlog.c index 79020d990859..4d0c8370386b 100644 --- a/drivers/net/ethernet/intel/libie/fwlog.c +++ b/drivers/net/ethernet/intel/libie/fwlog.c @@ -1049,6 +1049,10 @@ void libie_fwlog_deinit(struct libie_fwlog *fwlog) { int status; + /* if FW logging isn't supported it means no configuration was done */ + if (!libie_fwlog_supported(fwlog)) + return; + /* make sure FW logging is disabled to not put the FW in a weird state * for the next driver load */ From b84852170153671bb0fa6737a6e48370addd8e1a Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Tue, 10 Feb 2026 15:57:14 +0000 Subject: [PATCH 670/828] iavf: fix netdev->max_mtu to respect actual hardware limit iavf sets LIBIE_MAX_MTU as netdev->max_mtu, ignoring vf_res->max_mtu from PF [1]. This allows setting an MTU beyond the actual hardware limit, causing TX queue timeouts [2]. Set correct netdev->max_mtu using vf_res->max_mtu from the PF. Note that currently PF drivers such as ice/i40e set the frame size in vf_res->max_mtu, not MTU. Convert vf_res->max_mtu to MTU before setting netdev->max_mtu. [1] # ip -j -d link show $DEV | jq '.[0].max_mtu' 16356 [2] iavf 0000:00:05.0 enp0s5: NETDEV WATCHDOG: CPU: 1: transmit queue 0 timed out 5692 ms iavf 0000:00:05.0 enp0s5: NIC Link is Up Speed is 10 Gbps Full Duplex iavf 0000:00:05.0 enp0s5: NETDEV WATCHDOG: CPU: 6: transmit queue 3 timed out 5312 ms iavf 0000:00:05.0 enp0s5: NIC Link is Up Speed is 10 Gbps Full Duplex ... Fixes: 5fa4caff59f2 ("iavf: switch to Page Pool") Signed-off-by: Kohei Enju Reviewed-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index bceaf4b1b85d..86c1964f42e1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2793,7 +2793,22 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) netdev->watchdog_timeo = 5 * HZ; netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = LIBIE_MAX_MTU; + + /* PF/VF API: vf_res->max_mtu is max frame size (not MTU). + * Convert to MTU. + */ + if (!adapter->vf_res->max_mtu) { + netdev->max_mtu = LIBIE_MAX_MTU; + } else if (adapter->vf_res->max_mtu < LIBETH_RX_LL_LEN + ETH_MIN_MTU || + adapter->vf_res->max_mtu > + LIBETH_RX_LL_LEN + LIBIE_MAX_MTU) { + netdev_warn_once(adapter->netdev, + "invalid max frame size %d from PF, using default MTU %d", + adapter->vf_res->max_mtu, LIBIE_MAX_MTU); + netdev->max_mtu = LIBIE_MAX_MTU; + } else { + netdev->max_mtu = adapter->vf_res->max_mtu - LIBETH_RX_LL_LEN; + } if (!is_valid_ether_addr(adapter->hw.mac.addr)) { dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", From d4c13ab36273a8c318ba06799793cc1f5d9c6fa1 Mon Sep 17 00:00:00 2001 From: Vivek Behera Date: Thu, 22 Jan 2026 15:16:52 +0100 Subject: [PATCH 671/828] igb: Fix trigger of incorrect irq in igb_xsk_wakeup The current implementation in the igb_xsk_wakeup expects the Rx and Tx queues to share the same irq. This would lead to triggering of incorrect irq in split irq configuration. This patch addresses this issue which could impact environments with 2 active cpu cores or when the number of queues is reduced to 2 or less cat /proc/interrupts | grep eno2 167: 0 0 0 0 IR-PCI-MSIX-0000:08:00.0 0-edge eno2 168: 0 0 0 0 IR-PCI-MSIX-0000:08:00.0 1-edge eno2-rx-0 169: 0 0 0 0 IR-PCI-MSIX-0000:08:00.0 2-edge eno2-rx-1 170: 0 0 0 0 IR-PCI-MSIX-0000:08:00.0 3-edge eno2-tx-0 171: 0 0 0 0 IR-PCI-MSIX-0000:08:00.0 4-edge eno2-tx-1 Furthermore it uses the flags input argument to trigger either rx, tx or both rx and tx irqs as specified in the ndo_xsk_wakeup api documentation Fixes: 80f6ccf9f116 ("igb: Introduce XSK data structures and helpers") Signed-off-by: Vivek Behera Reviewed-by: Aleksandr Loktionov Suggested-by: Maciej Fijalkowski Acked-by: Maciej Fijalkowski Tested-by: Saritha Sanigani (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_xsk.c | 38 +++++++++++++++++++----- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c index 30ce5fbb5b77..ce4a7b58cad2 100644 --- a/drivers/net/ethernet/intel/igb/igb_xsk.c +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c @@ -524,6 +524,16 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool) return nb_pkts < budget; } +static u32 igb_sw_irq_prep(struct igb_q_vector *q_vector) +{ + u32 eics = 0; + + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + eics = q_vector->eims_value; + + return eics; +} + int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct igb_adapter *adapter = netdev_priv(dev); @@ -542,20 +552,32 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) ring = adapter->tx_ring[qid]; - if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) - return -ENETDOWN; - if (!READ_ONCE(ring->xsk_pool)) return -EINVAL; - if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { + if (flags & XDP_WAKEUP_TX) { + if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) + return -ENETDOWN; + + eics |= igb_sw_irq_prep(ring->q_vector); + } + + if (flags & XDP_WAKEUP_RX) { + /* If IGB_FLAG_QUEUE_PAIRS is active, the q_vector + * and NAPI is shared between RX and TX. + * If NAPI is already running it would be marked as missed + * from the TX path, making this RX call a NOP + */ + ring = adapter->rx_ring[qid]; + eics |= igb_sw_irq_prep(ring->q_vector); + } + + if (eics) { /* Cause software interrupt */ - if (adapter->flags & IGB_FLAG_HAS_MSIX) { - eics |= ring->q_vector->eims_value; + if (adapter->flags & IGB_FLAG_HAS_MSIX) wr32(E1000_EICS, eics); - } else { + else wr32(E1000_ICS, E1000_ICS_RXDMT0); - } } return 0; From 554a1c34c11a057d01819ce9bb04653a8ffc8071 Mon Sep 17 00:00:00 2001 From: Vivek Behera Date: Tue, 20 Jan 2026 08:52:16 +0100 Subject: [PATCH 672/828] igc: Fix trigger of incorrect irq in igc_xsk_wakeup function This patch addresses the issue where the igc_xsk_wakeup function was triggering an incorrect IRQ for tx-0 when the i226 is configured with only 2 combined queues or in an environment with 2 active CPU cores. This prevented XDP Zero-copy send functionality in such split IRQ configurations. The fix implements the correct logic for extracting q_vectors saved during rx and tx ring allocation and utilizes flags provided by the ndo_xsk_wakeup API to trigger the appropriate IRQ. Fixes: fc9df2a0b520 ("igc: Enable RX via AF_XDP zero-copy") Fixes: 15fd021bc427 ("igc: Add Tx hardware timestamp request for AF_XDP zero-copy packet") Signed-off-by: Vivek Behera Reviewed-by: Jacob Keller Reviewed-by: Aleksandr loktinov Reviewed-by: Piotr Kwapulinski Reviewed-by: Song Yoong Siang Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 34 ++++++++++++++++------- drivers/net/ethernet/intel/igc/igc_ptp.c | 3 +- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 27e5c2109138..b2e8d0c0f827 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6906,28 +6906,29 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, return nxmit; } -static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, - struct igc_q_vector *q_vector) +static u32 igc_sw_irq_prep(struct igc_q_vector *q_vector) { - struct igc_hw *hw = &adapter->hw; u32 eics = 0; - eics |= q_vector->eims_value; - wr32(IGC_EICS, eics); + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + eics = q_vector->eims_value; + + return eics; } int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct igc_adapter *adapter = netdev_priv(dev); - struct igc_q_vector *q_vector; + struct igc_hw *hw = &adapter->hw; struct igc_ring *ring; + u32 eics = 0; if (test_bit(__IGC_DOWN, &adapter->state)) return -ENETDOWN; if (!igc_xdp_is_enabled(adapter)) return -ENXIO; - + /* Check if queue_id is valid. Tx and Rx queue numbers are always same */ if (queue_id >= adapter->num_rx_queues) return -EINVAL; @@ -6936,9 +6937,22 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) if (!ring->xsk_pool) return -ENXIO; - q_vector = adapter->q_vector[queue_id]; - if (!napi_if_scheduled_mark_missed(&q_vector->napi)) - igc_trigger_rxtxq_interrupt(adapter, q_vector); + if (flags & XDP_WAKEUP_RX) + eics |= igc_sw_irq_prep(ring->q_vector); + + if (flags & XDP_WAKEUP_TX) { + /* If IGC_FLAG_QUEUE_PAIRS is active, the q_vector + * and NAPI is shared between RX and TX. + * If NAPI is already running it would be marked as missed + * from the RX path, making this TX call a NOP + */ + ring = adapter->tx_ring[queue_id]; + eics |= igc_sw_irq_prep(ring->q_vector); + } + + if (eics) + /* Cause software interrupt */ + wr32(IGC_EICS, eics); return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 7aae83c108fd..44ee19386766 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -550,7 +550,8 @@ static void igc_ptp_free_tx_buffer(struct igc_adapter *adapter, tstamp->buffer_type = 0; /* Trigger txrx interrupt for transmit completion */ - igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 0); + igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, + XDP_WAKEUP_TX); return; } From b06ccbabe2506fd70b9167a644978b049150224a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 3 Mar 2026 01:01:15 -1000 Subject: [PATCH 673/828] sched_ext: Fix starvation of scx_enable() under fair-class saturation During scx_enable(), the READY -> ENABLED task switching loop changes the calling thread's sched_class from fair to ext. Since fair has higher priority than ext, saturating fair-class workloads can indefinitely starve the enable thread, hanging the system. This was introduced when the enable path switched from preempt_disable() to scx_bypass() which doesn't protect against fair-class starvation. Note that the original preempt_disable() protection wasn't complete either - in partial switch modes, the calling thread could still be starved after preempt_enable() as it may have been switched to ext class. Fix it by offloading the enable body to a dedicated system-wide RT (SCHED_FIFO) kthread which cannot be starved by either fair or ext class tasks. scx_enable() lazily creates the kthread on first use and passes the ops pointer through a struct scx_enable_cmd containing the kthread_work, then synchronously waits for completion. The workfn runs on a different kthread from sch->helper (which runs disable_work), so it can safely flush disable_work on the error path without deadlock. Fixes: 8c2090c504e9 ("sched_ext: Initialize in bypass mode") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 66 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index eab6e09b6442..ba51969718f5 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4975,20 +4975,30 @@ static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops) return 0; } -static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) +/* + * scx_enable() is offloaded to a dedicated system-wide RT kthread to avoid + * starvation. During the READY -> ENABLED task switching loop, the calling + * thread's sched_class gets switched from fair to ext. As fair has higher + * priority than ext, the calling thread can be indefinitely starved under + * fair-class saturation, leading to a system hang. + */ +struct scx_enable_cmd { + struct kthread_work work; + struct sched_ext_ops *ops; + int ret; +}; + +static void scx_enable_workfn(struct kthread_work *work) { + struct scx_enable_cmd *cmd = + container_of(work, struct scx_enable_cmd, work); + struct sched_ext_ops *ops = cmd->ops; struct scx_sched *sch; struct scx_task_iter sti; struct task_struct *p; unsigned long timeout; int i, cpu, ret; - if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), - cpu_possible_mask)) { - pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); - return -EINVAL; - } - mutex_lock(&scx_enable_mutex); if (scx_enable_state() != SCX_DISABLED) { @@ -5205,13 +5215,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) atomic_long_inc(&scx_enable_seq); - return 0; + cmd->ret = 0; + return; err_free_ksyncs: free_kick_syncs(); err_unlock: mutex_unlock(&scx_enable_mutex); - return ret; + cmd->ret = ret; + return; err_disable_unlock_all: scx_cgroup_unlock(); @@ -5230,7 +5242,41 @@ err_disable: */ scx_error(sch, "scx_enable() failed (%d)", ret); kthread_flush_work(&sch->disable_work); - return 0; + cmd->ret = 0; +} + +static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) +{ + static struct kthread_worker *helper; + static DEFINE_MUTEX(helper_mutex); + struct scx_enable_cmd cmd; + + if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), + cpu_possible_mask)) { + pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); + return -EINVAL; + } + + if (!READ_ONCE(helper)) { + mutex_lock(&helper_mutex); + if (!helper) { + helper = kthread_run_worker(0, "scx_enable_helper"); + if (IS_ERR_OR_NULL(helper)) { + helper = NULL; + mutex_unlock(&helper_mutex); + return -ENOMEM; + } + sched_set_fifo(helper->task); + } + mutex_unlock(&helper_mutex); + } + + kthread_init_work(&cmd.work, scx_enable_workfn); + cmd.ops = ops; + + kthread_queue_work(READ_ONCE(helper), &cmd.work); + kthread_flush_work(&cmd.work); + return cmd.ret; } From 56145d237385ca0e7ca9ff7b226aaf2eb8ef368b Mon Sep 17 00:00:00 2001 From: Lang Xu Date: Tue, 3 Mar 2026 17:52:17 +0800 Subject: [PATCH 674/828] bpf: Fix a UAF issue in bpf_trampoline_link_cgroup_shim The root cause of this bug is that when 'bpf_link_put' reduces the refcount of 'shim_link->link.link' to zero, the resource is considered released but may still be referenced via 'tr->progs_hlist' in 'cgroup_shim_find'. The actual cleanup of 'tr->progs_hlist' in 'bpf_shim_tramp_link_release' is deferred. During this window, another process can cause a use-after-free via 'bpf_trampoline_link_cgroup_shim'. Based on Martin KaFai Lau's suggestions, I have created a simple patch. To fix this: Add an atomic non-zero check in 'bpf_trampoline_link_cgroup_shim'. Only increment the refcount if it is not already zero. Testing: I verified the fix by adding a delay in 'bpf_shim_tramp_link_release' to make the bug easier to trigger: static void bpf_shim_tramp_link_release(struct bpf_link *link) { /* ... */ if (!shim_link->trampoline) return; + msleep(100); WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); bpf_trampoline_put(shim_link->trampoline); } Before the patch, running a PoC easily reproduced the crash(almost 100%) with a call trace similar to KaiyanM's report. After the patch, the bug no longer occurs even after millions of iterations. Fixes: 69fd337a975c ("bpf: per-cgroup lsm flavor") Reported-by: Kaiyan Mei Closes: https://lore.kernel.org/bpf/3c4ebb0b.46ff8.19abab8abe2.Coremail.kaiyanm@hust.edu.cn/ Signed-off-by: Lang Xu Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/279EEE1BA1DDB49D+20260303095217.34436-1-xulang@uniontech.com --- kernel/bpf/trampoline.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 84db9e658e52..f02254a21585 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -1002,10 +1002,8 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, mutex_lock(&tr->mutex); shim_link = cgroup_shim_find(tr, bpf_func); - if (shim_link) { + if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) { /* Reusing existing shim attached by the other program. */ - bpf_link_inc(&shim_link->link.link); - mutex_unlock(&tr->mutex); bpf_trampoline_put(tr); /* bpf_trampoline_get above */ return 0; From 12c43a062acb0ac137fc2a4a106d4d084b8c5416 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Tue, 3 Mar 2026 15:13:11 +0000 Subject: [PATCH 675/828] smb/client: fix buffer size for smb311_posix_qinfo in smb2_compound_op() Use `sizeof(struct smb311_posix_qinfo)` instead of sizeof its pointer, so the allocated buffer matches the actual struct size. Fixes: 6a5f6592a0b6 ("SMB311: Add support for query info using posix extensions (level 100)") Reported-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 195a38fd61e8..1c4663ed7e69 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -325,7 +325,7 @@ replay_again: cfile->fid.volatile_fid, SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0, - sizeof(struct smb311_posix_qinfo *) + + sizeof(struct smb311_posix_qinfo) + (PATH_MAX * 2) + (sizeof(struct smb_sid) * 2), 0, NULL); } else { @@ -335,7 +335,7 @@ replay_again: COMPOUND_FID, SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0, - sizeof(struct smb311_posix_qinfo *) + + sizeof(struct smb311_posix_qinfo) + (PATH_MAX * 2) + (sizeof(struct smb_sid) * 2), 0, NULL); } From 9621b996e4db1dbc2b3dc5d5910b7d6179397320 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Tue, 3 Mar 2026 15:13:12 +0000 Subject: [PATCH 676/828] smb/client: fix buffer size for smb311_posix_qinfo in SMB311_posix_query_info() SMB311_posix_query_info() is currently unused, but it may still be used in some stable versions, so these changes are submitted as a separate patch. Use `sizeof(struct smb311_posix_qinfo)` instead of sizeof its pointer, so the allocated buffer matches the actual struct size. Fixes: b1bc1874b885 ("smb311: Add support for SMB311 query info (non-compounded)") Reported-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 04e361ed2356..8a1fcc097606 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3996,7 +3996,7 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct smb311_posix_qinfo *data, u32 *plen) { - size_t output_len = sizeof(struct smb311_posix_qinfo *) + + size_t output_len = sizeof(struct smb311_posix_qinfo) + (sizeof(struct smb_sid) * 2) + (PATH_MAX * 2); *plen = 0; From 8098179dc981c361c4ff238bc3935329a93bbdfb Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Tue, 3 Mar 2026 15:13:13 +0000 Subject: [PATCH 677/828] smb/client: remove unused SMB311_posix_query_info() It is currently unused, as now we are doing compounding instead (see smb2_query_path_info()). Signed-off-by: ZhangGuoDong Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 18 ------------------ fs/smb/client/smb2proto.h | 3 --- 2 files changed, 21 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 8a1fcc097606..c43ca74e8704 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3989,24 +3989,6 @@ int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, NULL); } -#if 0 -/* currently unused, as now we are doing compounding instead (see smb311_posix_query_path_info) */ -int -SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb311_posix_qinfo *data, u32 *plen) -{ - size_t output_len = sizeof(struct smb311_posix_qinfo) + - (sizeof(struct smb_sid) * 2) + (PATH_MAX * 2); - *plen = 0; - - return query_info(xid, tcon, persistent_fid, volatile_fid, - SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0, - output_len, sizeof(struct smb311_posix_qinfo), (void **)&data, plen); - /* Note caller must free "data" (passed in above). It may be allocated in query_info call */ -} -#endif - int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 881e42cf66ce..230bb1e9f4e1 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -167,9 +167,6 @@ int SMB2_flush_init(const unsigned int xid, struct smb_rqst *rqst, struct cifs_tcon *tcon, struct TCP_Server_Info *server, u64 persistent_fid, u64 volatile_fid); void SMB2_flush_free(struct smb_rqst *rqst); -int SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb311_posix_qinfo *data, u32 *plen); int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct smb2_file_all_info *data); From 2ffb4f5c2ccb2fa1c049dd11899aee7967deef5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 1 Mar 2026 11:45:48 -0800 Subject: [PATCH 678/828] ipv6: fix NULL pointer deref in ip6_rt_get_dev_rcu() l3mdev_master_dev_rcu() can return NULL when the slave device is being un-slaved from a VRF. All other callers deal with this, but we lost the fallback to loopback in ip6_rt_pcpu_alloc() -> ip6_rt_get_dev_rcu() with commit 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address"). KASAN: null-ptr-deref in range [0x0000000000000108-0x000000000000010f] RIP: 0010:ip6_rt_pcpu_alloc (net/ipv6/route.c:1418) Call Trace: ip6_pol_route (net/ipv6/route.c:2318) fib6_rule_lookup (net/ipv6/fib6_rules.c:115) ip6_route_output_flags (net/ipv6/route.c:2607) vrf_process_v6_outbound (drivers/net/vrf.c:437) I was tempted to rework the un-slaving code to clear the flag first and insert synchronize_rcu() before we remove the upper. But looks like the explicit fallback to loopback_dev is an established pattern. And I guess avoiding the synchronize_rcu() is nice, too. Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address") Reviewed-by: David Ahern Link: https://patch.msgid.link/20260301194548.927324-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 85df25c36409..7db0c837196c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1063,7 +1063,8 @@ static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res) */ if (netif_is_l3_slave(dev) && !rt6_need_strict(&res->f6i->fib6_dst.addr)) - dev = l3mdev_master_dev_rcu(dev); + dev = l3mdev_master_dev_rcu(dev) ? : + dev_net(dev)->loopback_dev; else if (!netif_is_l3_master(dev)) dev = dev_net(dev)->loopback_dev; /* last case is netif_is_l3_master(dev) is true in which From 67edfec516d30d3e62925c397be4a1e5185802fc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Mar 2026 12:36:00 -0800 Subject: [PATCH 679/828] net/tcp-ao: Fix MAC comparison to be constant-time To prevent timing attacks, MACs need to be compared in constant time. Use the appropriate helper function for this. Fixes: 0a3a809089eb ("net/tcp: Verify inbound TCP-AO signed segments") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20260302203600.13561-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/Kconfig | 1 + net/ipv4/tcp_ao.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b71c22475c51..3ab6247be585 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -748,6 +748,7 @@ config TCP_SIGPOOL config TCP_AO bool "TCP: Authentication Option (RFC5925)" select CRYPTO + select CRYPTO_LIB_UTILS select TCP_SIGPOOL depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) help diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 4980caddb0fc..a97cdf3e6af4 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "TCP: " fmt #include +#include #include #include @@ -922,7 +923,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, /* XXX: make it per-AF callback? */ tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, (phash - (u8 *)th), sne); - if (memcmp(phash, hash_buf, maclen)) { + if (crypto_memneq(phash, hash_buf, maclen)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); From 4ee7fa6cf78ff26d783d39e2949d14c4c1cd5e7f Mon Sep 17 00:00:00 2001 From: Yung Chih Su Date: Mon, 2 Mar 2026 14:02:47 +0800 Subject: [PATCH 680/828] net: ipv4: fix ARM64 alignment fault in multipath hash seed `struct sysctl_fib_multipath_hash_seed` contains two u32 fields (user_seed and mp_seed), making it an 8-byte structure with a 4-byte alignment requirement. In `fib_multipath_hash_from_keys()`, the code evaluates the entire struct atomically via `READ_ONCE()`: mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; While this silently works on GCC by falling back to unaligned regular loads which the ARM64 kernel tolerates, it causes a fatal kernel panic when compiled with Clang and LTO enabled. Commit e35123d83ee3 ("arm64: lto: Strengthen READ_ONCE() to acquire when CONFIG_LTO=y") strengthens `READ_ONCE()` to use Load-Acquire instructions (`ldar` / `ldapr`) to prevent compiler reordering bugs under Clang LTO. Since the macro evaluates the full 8-byte struct, Clang emits a 64-bit `ldar` instruction. ARM64 architecture strictly requires `ldar` to be naturally aligned, thus executing it on a 4-byte aligned address triggers a strict Alignment Fault (FSC = 0x21). Fix the read side by moving the `READ_ONCE()` directly to the `u32` member, which emits a safe 32-bit `ldar Wn`. Furthermore, Eric Dumazet pointed out that `WRITE_ONCE()` on the entire struct in `proc_fib_multipath_hash_set_seed()` is also flawed. Analysis shows that Clang splits this 8-byte write into two separate 32-bit `str` instructions. While this avoids an alignment fault, it destroys atomicity and exposes a tear-write vulnerability. Fix this by explicitly splitting the write into two 32-bit `WRITE_ONCE()` operations. Finally, add the missing `READ_ONCE()` when reading `user_seed` in `proc_fib_multipath_hash_seed()` to ensure proper pairing and concurrency safety. Fixes: 4ee2a8cace3f ("net: ipv4: Add a sysctl to set multipath hash seed") Signed-off-by: Yung Chih Su Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260302060247.7066-1-yuuchihsu@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b4495c38e0a0..318593743b6e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, siphash_aligned_key_t hash_key; u32 mp_seed; - mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed); fib_multipath_hash_construct_key(&hash_key, mp_seed); return flow_hash_from_keys_seed(keys, &hash_key); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 643763bc2142..5654cc9c8a0b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -486,7 +486,8 @@ static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) proc_fib_multipath_hash_rand_seed), }; - WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed); + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed); } static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write, @@ -500,7 +501,7 @@ static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write int ret; mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; - user_seed = mphs->user_seed; + user_seed = READ_ONCE(mphs->user_seed); tmp = *table; tmp.data = &user_seed; From 5af6e8b54927f7a8d3c7fd02b1bdc09e93d5c079 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 2 Mar 2026 03:40:46 -0800 Subject: [PATCH 681/828] netconsole: fix sysdata_release_enabled_show checking wrong flag sysdata_release_enabled_show() checks SYSDATA_TASKNAME instead of SYSDATA_RELEASE, causing the configfs release_enabled attribute to reflect the taskname feature state rather than the release feature state. This is a copy-paste error from the adjacent sysdata_taskname_enabled_show() function. The corresponding _store function already uses the correct SYSDATA_RELEASE flag. Fixes: 343f90227070 ("netconsole: implement configfs for release_enabled") Signed-off-by: Breno Leitao Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260302-sysdata_release_fix-v1-1-e5090f677c7c@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 2db116fb1a7c..3c9acd6e49e8 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -617,7 +617,7 @@ static ssize_t sysdata_release_enabled_show(struct config_item *item, bool release_enabled; dynamic_netconsole_mutex_lock(); - release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); + release_enabled = !!(nt->sysdata_fields & SYSDATA_RELEASE); dynamic_netconsole_mutex_unlock(); return sysfs_emit(buf, "%d\n", release_enabled); From e2f27363aa6d983504c6836dd0975535e2e9dba0 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 2 Mar 2026 09:51:24 +0530 Subject: [PATCH 682/828] amd-xgbe: fix sleep while atomic on suspend/resume The xgbe_powerdown() and xgbe_powerup() functions use spinlocks (spin_lock_irqsave) while calling functions that may sleep: - napi_disable() can sleep waiting for NAPI polling to complete - flush_workqueue() can sleep waiting for pending work items This causes a "BUG: scheduling while atomic" error during suspend/resume cycles on systems using the AMD XGBE Ethernet controller. The spinlock protection in these functions is unnecessary as these functions are called from suspend/resume paths which are already serialized by the PM core Fix this by removing the spinlock. Since only code that takes this lock is xgbe_powerdown() and xgbe_powerup(), remove it completely. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260302042124.1386445-1-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 10 ---------- drivers/net/ethernet/amd/xgbe/xgbe-main.c | 1 - drivers/net/ethernet/amd/xgbe/xgbe.h | 3 --- 3 files changed, 14 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 62bb4b8a68e1..8b79d88480db 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1120,7 +1120,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned long flags; DBGPR("-->xgbe_powerdown\n"); @@ -1131,8 +1130,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) return -EINVAL; } - spin_lock_irqsave(&pdata->lock, flags); - if (caller == XGMAC_DRIVER_CONTEXT) netif_device_detach(netdev); @@ -1148,8 +1145,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) pdata->power_down = 1; - spin_unlock_irqrestore(&pdata->lock, flags); - DBGPR("<--xgbe_powerdown\n"); return 0; @@ -1159,7 +1154,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned long flags; DBGPR("-->xgbe_powerup\n"); @@ -1170,8 +1164,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) return -EINVAL; } - spin_lock_irqsave(&pdata->lock, flags); - pdata->power_down = 0; xgbe_napi_enable(pdata, 0); @@ -1186,8 +1178,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) xgbe_start_timers(pdata); - spin_unlock_irqrestore(&pdata->lock, flags); - DBGPR("<--xgbe_powerup\n"); return 0; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index d1f0419edb23..7d45ea22a02e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -76,7 +76,6 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev) pdata->netdev = netdev; pdata->dev = dev; - spin_lock_init(&pdata->lock); spin_lock_init(&pdata->xpcs_lock); mutex_init(&pdata->rss_mutex); spin_lock_init(&pdata->tstamp_lock); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 1269b8ce9249..e1d7d7150e16 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1004,9 +1004,6 @@ struct xgbe_prv_data { unsigned int pp3; unsigned int pp4; - /* Overall device lock */ - spinlock_t lock; - /* XPCS indirect addressing lock */ spinlock_t xpcs_lock; unsigned int xpcs_window_def_reg; From 7f5d8e63f3d4dc952548502a2227de780cbcd21f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 2 Mar 2026 11:50:20 -0800 Subject: [PATCH 683/828] MAINTAINERS: update the skge/sky2 maintainers Mark the skge and sky2 drivers as orphan. I no longer have any Marvell/SysKonnect boards to test with and mail to Mirko Lindner bounced because Marvell sold off that divsion. Signed-off-by: Stephen Hemminger Link: https://patch.msgid.link/20260302195120.187183-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7829ff2180ae..bee85351120e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15372,10 +15372,8 @@ F: drivers/crypto/marvell/ F: include/linux/soc/marvell/octeontx2/ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) -M: Mirko Lindner -M: Stephen Hemminger L: netdev@vger.kernel.org -S: Odd fixes +S: Orphan F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER From 46d0d6f50dab706637f4c18a470aac20a21900d3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Mar 2026 12:34:09 -0800 Subject: [PATCH 684/828] net/tcp-md5: Fix MAC comparison to be constant-time To prevent timing attacks, MACs need to be compared in constant time. Use the appropriate helper function for this. Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Fixes: 658ddaaf6694 ("tcp: md5: RST: getting md5 key from listener") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260302203409.13388-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/Kconfig | 1 + net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 3 ++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 3ab6247be585..df922f9f5289 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -762,6 +762,7 @@ config TCP_AO config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO_LIB_MD5 + select CRYPTO_LIB_UTILS help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8cdc26e8ad68..202a4e57a218 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -244,6 +244,7 @@ #define pr_fmt(fmt) "TCP: " fmt #include +#include #include #include #include @@ -4970,7 +4971,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); - if (memcmp(hash_location, newhash, 16) != 0) { + if (crypto_memneq(hash_location, newhash, 16)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d53d39be291a..910c25cb24e1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -88,6 +88,7 @@ #include #include +#include #include @@ -839,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, goto out; tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (memcmp(md5_hash_location, newhash, 16) != 0) + if (crypto_memneq(md5_hash_location, newhash, 16)) goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e46a0efae012..5195a46b951e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -68,6 +68,7 @@ #include #include +#include #include @@ -1048,7 +1049,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, key.type = TCP_KEY_MD5; tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); - if (memcmp(md5_hash_location, newhash, 16) != 0) + if (crypto_memneq(md5_hash_location, newhash, 16)) goto out; } #endif From 9e7dc228bb6d4afa74dd6bab4f3aad43126cc2db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Wed, 4 Mar 2026 01:42:57 +0100 Subject: [PATCH 685/828] io_uring/mock: Fix typo in help text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the spelling of "subsystem". Signed-off-by: J. Neuschäfer Signed-off-by: Jens Axboe --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index e95d43457851..74f07212db85 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1897,7 +1897,7 @@ config IO_URING_MOCK_FILE default n depends on IO_URING help - Enable mock files for io_uring subststem testing. The ABI might + Enable mock files for io_uring subsystem testing. The ABI might still change, so it's still experimental and should only be enabled for specific test purposes. From 26bc83b88bbbf054f0980a4a42047a8d1e210e4c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Feb 2026 20:27:02 -0800 Subject: [PATCH 686/828] smb: client: Compare MACs in constant time To prevent timing attacks, MAC comparisons need to be constant-time. Replace the memcmp() with the correct function, crypto_memneq(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/smb1encrypt.c | 3 ++- fs/smb/client/smb2transport.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb1encrypt.c b/fs/smb/client/smb1encrypt.c index 0dbbce2431ff..bf10fdeeedca 100644 --- a/fs/smb/client/smb1encrypt.c +++ b/fs/smb/client/smb1encrypt.c @@ -11,6 +11,7 @@ #include #include +#include #include "cifsproto.h" #include "smb1proto.h" #include "cifs_debug.h" @@ -131,7 +132,7 @@ int cifs_verify_signature(struct smb_rqst *rqst, /* cifs_dump_mem("what we think it should be: ", what_we_think_sig_should_be, 16); */ - if (memcmp(server_response_sig, what_we_think_sig_should_be, 8)) + if (crypto_memneq(server_response_sig, what_we_think_sig_should_be, 8)) return -EACCES; else return 0; diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 8b9000a83181..81be2b226e26 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -617,7 +618,8 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (rc) return rc; - if (memcmp(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE)) { + if (crypto_memneq(server_response_sig, shdr->Signature, + SMB2_SIGNATURE_SIZE)) { cifs_dbg(VFS, "sign fail cmd 0x%x message id 0x%llx\n", shdr->Command, shdr->MessageId); return -EACCES; From 6ca8379b5d36e22b04e6315c3e49a6083377c862 Mon Sep 17 00:00:00 2001 From: Shengming Hu Date: Sat, 21 Feb 2026 11:30:07 +0800 Subject: [PATCH 687/828] fgraph: Fix thresh_return clear per-task notrace When tracing_thresh is enabled, function graph tracing uses trace_graph_thresh_return() as the return handler. Unlike trace_graph_return(), it did not clear the per-task TRACE_GRAPH_NOTRACE flag set by the entry handler for set_graph_notrace addresses. This could leave the task permanently in "notrace" state and effectively disable function graph tracing for that task. Mirror trace_graph_return()'s per-task notrace handling by clearing TRACE_GRAPH_NOTRACE and returning early when set. Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260221113007819YgrZsMGABff4Rc-O_fZxL@zte.com.cn Fixes: b84214890a9bc ("function_graph: Move graph notrace bit to shadow stack global var") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shengming Hu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3d8239fee004..817d0f1696b6 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -400,14 +400,15 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops, struct ftrace_regs *fregs) { + unsigned long *task_var = fgraph_get_task_var(gops); struct fgraph_times *ftimes; struct trace_array *tr; int size; ftrace_graph_addr_finish(gops, trace); - if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { - trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); + if (*task_var & TRACE_GRAPH_NOTRACE) { + *task_var &= ~TRACE_GRAPH_NOTRACE; return; } From b96d0c59cdbb2a22b2545f6f3d5c6276b05761dd Mon Sep 17 00:00:00 2001 From: Shengming Hu Date: Sat, 21 Feb 2026 11:33:14 +0800 Subject: [PATCH 688/828] fgraph: Fix thresh_return nosleeptime double-adjust trace_graph_thresh_return() called handle_nosleeptime() and then delegated to trace_graph_return(), which calls handle_nosleeptime() again. When sleep-time accounting is disabled this double-adjusts calltime and can produce bogus durations (including underflow). Fix this by computing rettime once, applying handle_nosleeptime() only once, using the adjusted calltime for threshold comparison, and writing the return event directly via __trace_graph_return() when the threshold is met. Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260221113314048jE4VRwIyZEALiYByGK0My@zte.com.cn Fixes: 3c9880f3ab52b ("ftrace: Use a running sleeptime instead of saving on shadow stack") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shengming Hu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 817d0f1696b6..0d2d3a2ea7dd 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -403,8 +403,12 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, unsigned long *task_var = fgraph_get_task_var(gops); struct fgraph_times *ftimes; struct trace_array *tr; + unsigned int trace_ctx; + u64 calltime, rettime; int size; + rettime = trace_clock_local(); + ftrace_graph_addr_finish(gops, trace); if (*task_var & TRACE_GRAPH_NOTRACE) { @@ -419,11 +423,13 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, tr = gops->private; handle_nosleeptime(tr, trace, ftimes, size); - if (tracing_thresh && - (trace_clock_local() - ftimes->calltime < tracing_thresh)) + calltime = ftimes->calltime; + + if (tracing_thresh && (rettime - calltime < tracing_thresh)) return; - else - trace_graph_return(trace, gops, fregs); + + trace_ctx = tracing_gen_ctx(); + __trace_graph_return(tr, trace, trace_ctx, calltime, rettime); } static struct fgraph_ops funcgraph_ops = { From 0a663b764dbdf135a126284f454c9f01f95a87d4 Mon Sep 17 00:00:00 2001 From: Huiwen He Date: Tue, 24 Feb 2026 10:35:44 +0800 Subject: [PATCH 689/828] tracing: Fix syscall events activation by ensuring refcount hits zero When multiple syscall events are specified in the kernel command line (e.g., trace_event=syscalls:sys_enter_openat,syscalls:sys_enter_close), they are often not captured after boot, even though they appear enabled in the tracing/set_event file. The issue stems from how syscall events are initialized. Syscall tracepoints require the global reference count (sys_tracepoint_refcount) to transition from 0 to 1 to trigger the registration of the syscall work (TIF_SYSCALL_TRACEPOINT) for tasks, including the init process (pid 1). The current implementation of early_enable_events() with disable_first=true used an interleaved sequence of "Disable A -> Enable A -> Disable B -> Enable B". If multiple syscalls are enabled, the refcount never drops to zero, preventing the 0->1 transition that triggers actual registration. Fix this by splitting early_enable_events() into two distinct phases: 1. Disable all events specified in the buffer. 2. Enable all events specified in the buffer. This ensures the refcount hits zero before re-enabling, allowing syscall events to be properly activated during early boot. The code is also refactored to use a helper function to avoid logic duplication between the disable and enable phases. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260224023544.1250787-1-hehuiwen@kylinos.cn Fixes: ce1039bd3a89 ("tracing: Fix enabling of syscall events on the command line") Signed-off-by: Huiwen He Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 52 ++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9928da636c9d..9c7f26cbe171 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -4668,26 +4668,22 @@ static __init int event_trace_memsetup(void) return 0; } -__init void -early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +/* + * Helper function to enable or disable a comma-separated list of events + * from the bootup buffer. + */ +static __init void __early_set_events(struct trace_array *tr, char *buf, bool enable) { char *token; - int ret; - - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; + while ((token = strsep(&buf, ","))) { if (*token) { - /* Restarting syscalls requires that we stop them first */ - if (disable_first) + if (enable) { + if (ftrace_set_clr_event(tr, token, 1)) + pr_warn("Failed to enable trace event: %s\n", token); + } else { ftrace_set_clr_event(tr, token, 0); - - ret = ftrace_set_clr_event(tr, token, 1); - if (ret) - pr_warn("Failed to enable trace event: %s\n", token); + } } /* Put back the comma to allow this to be called again */ @@ -4696,6 +4692,32 @@ early_enable_events(struct trace_array *tr, char *buf, bool disable_first) } } +/** + * early_enable_events - enable events from the bootup buffer + * @tr: The trace array to enable the events in + * @buf: The buffer containing the comma separated list of events + * @disable_first: If true, disable all events in @buf before enabling them + * + * This function enables events from the bootup buffer. If @disable_first + * is true, it will first disable all events in the buffer before enabling + * them. + * + * For syscall events, which rely on a global refcount to register the + * SYSCALL_WORK_SYSCALL_TRACEPOINT flag (especially for pid 1), we must + * ensure the refcount hits zero before re-enabling them. A simple + * "disable then enable" per-event is not enough if multiple syscalls are + * used, as the refcount will stay above zero. Thus, we need a two-phase + * approach: disable all, then enable all. + */ +__init void +early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +{ + if (disable_first) + __early_set_events(tr, buf, false); + + __early_set_events(tr, buf, true); +} + static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); From cc337974cd1084f9821179eb66f4e470d9fd2ed8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2026 21:35:46 -0500 Subject: [PATCH 690/828] ftrace: Disable preemption in the tracepoint callbacks handling filtered pids When function trace PID filtering is enabled, the function tracer will attach a callback to the fork tracepoint as well as the exit tracepoint that will add the forked child PID to the PID filtering list as well as remove the PID that is exiting. Commit a46023d5616e ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast") removed the disabling of preemption when calling tracepoint callbacks. The callbacks used for the PID filtering accounting depended on preemption being disabled, and now the trigger a "suspicious RCU usage" warning message. Make them explicitly disable preemption. Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260302213546.156e3e4f@gandalf.local.home Fixes: a46023d5616e ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 827fb9a0bf0d..2f72af0357e5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8611,6 +8611,7 @@ ftrace_pid_follow_sched_process_fork(void *data, struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, self, task); @@ -8624,6 +8625,7 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, NULL, task); From a5dd6f58666f22ae16b98a2177bebc3340d38fe9 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 3 Mar 2026 21:57:38 -0500 Subject: [PATCH 691/828] tracing: Disable preemption in the tracepoint callbacks handling filtered pids Filtering PIDs for events triggered the following during selftests: [37] event tracing - restricts events based on pid notrace filtering [ 155.874095] [ 155.874869] ============================= [ 155.876037] WARNING: suspicious RCU usage [ 155.877287] 7.0.0-rc1-00004-g8cd473a19bc7 #7 Not tainted [ 155.879263] ----------------------------- [ 155.882839] kernel/trace/trace_events.c:1057 suspicious rcu_dereference_check() usage! [ 155.889281] [ 155.889281] other info that might help us debug this: [ 155.889281] [ 155.894519] [ 155.894519] rcu_scheduler_active = 2, debug_locks = 1 [ 155.898068] no locks held by ftracetest/4364. [ 155.900524] [ 155.900524] stack backtrace: [ 155.902645] CPU: 1 UID: 0 PID: 4364 Comm: ftracetest Not tainted 7.0.0-rc1-00004-g8cd473a19bc7 #7 PREEMPT(lazy) [ 155.902648] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014 [ 155.902651] Call Trace: [ 155.902655] [ 155.902659] dump_stack_lvl+0x67/0x90 [ 155.902665] lockdep_rcu_suspicious+0x154/0x1a0 [ 155.902672] event_filter_pid_sched_process_fork+0x9a/0xd0 [ 155.902678] kernel_clone+0x367/0x3a0 [ 155.902689] __x64_sys_clone+0x116/0x140 [ 155.902696] do_syscall_64+0x158/0x460 [ 155.902700] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 155.902702] ? trace_irq_disable+0x1d/0xc0 [ 155.902709] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 155.902711] RIP: 0033:0x4697c3 [ 155.902716] Code: 1f 84 00 00 00 00 00 64 48 8b 04 25 10 00 00 00 45 31 c0 31 d2 31 f6 bf 11 00 20 01 4c 8d 90 d0 02 00 00 b8 38 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 89 c2 85 c0 75 2c 64 48 8b 04 25 10 00 00 [ 155.902718] RSP: 002b:00007ffc41150428 EFLAGS: 00000246 ORIG_RAX: 0000000000000038 [ 155.902721] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004697c3 [ 155.902722] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011 [ 155.902724] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000003fccf990 [ 155.902725] R10: 000000003fccd690 R11: 0000000000000246 R12: 0000000000000001 [ 155.902726] R13: 000000003fce8103 R14: 0000000000000001 R15: 0000000000000000 [ 155.902733] [ 155.902747] The tracepoint callbacks recently were changed to allow preemption. The event PID filtering callbacks that were attached to the fork and exit tracepoints expected preemption disabled in order to access the RCU protected PID lists. Add a guard(preempt)() to protect the references to the PID list. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260303215738.6ab275af@fedora Fixes: a46023d5616e ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast") Link: https://patch.msgid.link/20260303131706.96057f61a48a34c43ce1e396@kernel.org Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9c7f26cbe171..b7343fdfd7b0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1039,6 +1039,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task) struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_raw(tr->filtered_pids); trace_filter_add_remove_task(pid_list, NULL, task); @@ -1054,6 +1055,7 @@ event_filter_pid_sched_process_fork(void *data, struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->filtered_pids); trace_filter_add_remove_task(pid_list, self, task); From e39bb9e02b68942f8e9359d2a3efe7d37ae6be0e Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Fri, 27 Feb 2026 10:58:42 +0800 Subject: [PATCH 692/828] tracing: Fix WARN_ON in tracing_buffers_mmap_close When a process forks, the child process copies the parent's VMAs but the user_mapped reference count is not incremented. As a result, when both the parent and child processes exit, tracing_buffers_mmap_close() is called twice. On the second call, user_mapped is already 0, causing the function to return -ENODEV and triggering a WARN_ON. Normally, this isn't an issue as the memory is mapped with VM_DONTCOPY set. But this is only a hint, and the application can call madvise(MADVISE_DOFORK) which resets the VM_DONTCOPY flag. When the application does that, it can trigger this issue on fork. Fix it by incrementing the user_mapped reference count without re-mapping the pages in the VMA's open callback. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Cc: Lorenzo Stoakes Link: https://patch.msgid.link/20260227025842.1085206-1-wangqing7171@gmail.com Fixes: cf9f0f7c4c5bb ("tracing: Allow user-space mapping of the ring-buffer") Reported-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=3b5dd2030fe08afdf65d Tested-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com Signed-off-by: Qing Wang Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 21 +++++++++++++++++++++ kernel/trace/trace.c | 13 +++++++++++++ 3 files changed, 35 insertions(+) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 876358cfe1b1..d862fa610270 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -248,6 +248,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); int ring_buffer_map(struct trace_buffer *buffer, int cpu, struct vm_area_struct *vma); +void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu); int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); #endif /* _LINUX_RING_BUFFER_H */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f16f053ef77d..17d0ea0cc3e6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7310,6 +7310,27 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu, return err; } +/* + * This is called when a VMA is duplicated (e.g., on fork()) to increment + * the user_mapped counter without remapping pages. + */ +void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + + if (WARN_ON(!cpumask_test_cpu(cpu, buffer->cpumask))) + return; + + cpu_buffer = buffer->buffers[cpu]; + + guard(mutex)(&cpu_buffer->mapping_lock); + + if (cpu_buffer->user_mapped) + __rb_inc_dec_mapped(cpu_buffer, true); + else + WARN(1, "Unexpected buffer stat, it should be mapped"); +} + int ring_buffer_unmap(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 23de3719f495..1e7c032a72d2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8213,6 +8213,18 @@ static inline int get_snapshot_map(struct trace_array *tr) { return 0; } static inline void put_snapshot_map(struct trace_array *tr) { } #endif +/* + * This is called when a VMA is duplicated (e.g., on fork()) to increment + * the user_mapped counter without remapping pages. + */ +static void tracing_buffers_mmap_open(struct vm_area_struct *vma) +{ + struct ftrace_buffer_info *info = vma->vm_file->private_data; + struct trace_iterator *iter = &info->iter; + + ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file); +} + static void tracing_buffers_mmap_close(struct vm_area_struct *vma) { struct ftrace_buffer_info *info = vma->vm_file->private_data; @@ -8232,6 +8244,7 @@ static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long a } static const struct vm_operations_struct tracing_buffers_vmops = { + .open = tracing_buffers_mmap_open, .close = tracing_buffers_mmap_close, .may_split = tracing_buffers_may_split, }; From fbdfa8da05b6ae44114fc4f9b3e83e1736fd411c Mon Sep 17 00:00:00 2001 From: Naveen Anandhan Date: Sat, 28 Feb 2026 13:17:35 +0530 Subject: [PATCH 693/828] selftests: tc-testing: fix list_categories() crash on list type list_categories() builds a set directly from the 'category' field of each test case. Since 'category' is a list, set(map(...)) attempts to insert lists into a set, which raises: TypeError: unhashable type: 'list' Flatten category lists and collect unique category names using set.update() instead. Signed-off-by: Naveen Anandhan Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc_helper.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index 0440d252c4c5..bc447ca57333 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -38,10 +38,14 @@ def list_test_cases(testlist): def list_categories(testlist): - """ Show all categories that are present in a test case file. """ - categories = set(map(lambda x: x['category'], testlist)) + """Show all unique categories present in the test cases.""" + categories = set() + for t in testlist: + if 'category' in t: + categories.update(t['category']) + print("Available categories:") - print(", ".join(str(s) for s in categories)) + print(", ".join(sorted(categories))) print("") From b3b1d3ae1d87bc9398fb715c945968bf4c75a09a Mon Sep 17 00:00:00 2001 From: Maximilian Pezzullo Date: Wed, 4 Mar 2026 08:22:59 +0100 Subject: [PATCH 694/828] ata: libata-core: Disable LPM on ST1000DM010-2EP102 According to a user report, the ST1000DM010-2EP102 has problems with LPM, causing random system freezes. The drive belongs to the same BarraCuda family as the ST2000DM008-2FR102 which has the same issue. Cc: stable@vger.kernel.org Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Reported-by: Filippo Baiamonte Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221163 Signed-off-by: Maximilian Pezzullo Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ccbf320524da..76b012f544ea 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4189,6 +4189,7 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { ATA_QUIRK_FIRMWARE_WARN }, /* Seagate disks with LPM issues */ + { "ST1000DM010-2EP102", NULL, ATA_QUIRK_NOLPM }, { "ST2000DM008-2FR102", NULL, ATA_QUIRK_NOLPM }, /* drives which fail FPDMA_AA activation (some may freeze afterwards) From 61ded1083b264ff67ca8c2de822c66b6febaf9a8 Mon Sep 17 00:00:00 2001 From: Yujie Liu Date: Fri, 27 Feb 2026 16:24:52 +0800 Subject: [PATCH 695/828] drm/sched: Fix kernel-doc warning for drm_sched_job_done() There is a kernel-doc warning for the scheduler: Warning: drivers/gpu/drm/scheduler/sched_main.c:367 function parameter 'result' not described in 'drm_sched_job_done' Fix the warning by describing the undocumented error code. Fixes: 539f9ee4b52a ("drm/scheduler: properly forward fence errors") Signed-off-by: Yujie Liu [phasta: Flesh out commit message] Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20260227082452.1802922-1-yujie.liu@intel.com --- drivers/gpu/drm/scheduler/sched_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index e6ee35406165..2d5cb21a05b6 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -361,6 +361,7 @@ static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done + * @result: 0 on success, -ERRNO on error * * Finish the job's fence and resubmit the work items. */ From aac9b27f7c1f2b2cf7f50a9ca633ecbbcaf22af9 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 3 Mar 2026 11:03:42 +0100 Subject: [PATCH 696/828] ata: libata: cancel pending work after clearing deferred_qc Syzbot reported a WARN_ON() in ata_scsi_deferred_qc_work(), caused by ap->ops->qc_defer() returning non-zero before issuing the deferred qc. ata_scsi_schedule_deferred_qc() is called during each command completion. This function will check if there is a deferred QC, and if ap->ops->qc_defer() returns zero, meaning that it is possible to queue the deferred qc at this time (without being deferred), then it will queue the work which will issue the deferred qc. Once the work get to run, which can potentially be a very long time after the work was scheduled, there is a WARN_ON() if ap->ops->qc_defer() returns non-zero. While we hold the ap->lock both when assigning and clearing deferred_qc, and the work itself holds the ap->lock, the code currently does not cancel the work after clearing the deferred qc. This means that the following scenario can happen: 1) One or several NCQ commands are queued. 2) A non-NCQ command is queued, gets stored in ap->deferred_qc. 3) Last NCQ command gets completed, work is queued to issue the deferred qc. 4) Timeout or error happens, ap->deferred_qc is cleared. The queued work is currently NOT canceled. 5) Port is reset. 6) One or several NCQ commands are queued. 7) A non-NCQ command is queued, gets stored in ap->deferred_qc. 8) Work is finally run. Yet at this time, there is still NCQ commands in flight. The work in 8) really belongs to the non-NCQ command in 2), not to the non-NCQ command in 7). The reason why the work is executed when it is not supposed to, is because it was never canceled when ap->deferred_qc was cleared in 4). Thus, ensure that we always cancel the work after clearing ap->deferred_qc. Another potential fix would have been to let ata_scsi_deferred_qc_work() do nothing if ap->ops->qc_defer() returns non-zero. However, canceling the work when clearing ap->deferred_qc seems slightly more logical, as we hold the ap->lock when clearing ap->deferred_qc, so we know that the work cannot be holding the lock. (The function could be waiting for the lock, but that is okay since it will do nothing if ap->deferred_qc is not set.) Reported-by: syzbot+bcaf842a1e8ead8dfb89@syzkaller.appspotmail.com Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Fixes: eddb98ad9364 ("ata: libata-eh: correctly handle deferred qc timeouts") Reviewed-by: Igor Pylypiv Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-eh.c | 1 + drivers/ata/libata-scsi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b373cceb95d2..563432400f72 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -659,6 +659,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, */ WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); ap->deferred_qc = NULL; + cancel_work(&ap->deferred_qc_work); set_host_byte(scmd, DID_TIME_OUT); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } else if (i < ATA_MAX_QUEUE) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index c0dd75a0287c..ad798e5246b4 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1699,6 +1699,7 @@ void ata_scsi_requeue_deferred_qc(struct ata_port *ap) scmd = qc->scsicmd; ap->deferred_qc = NULL; + cancel_work(&ap->deferred_qc_work); ata_qc_free(qc); scmd->result = (DID_SOFT_ERROR << 16); scsi_done(scmd); From fb1091febd668398aa84c161b8d9a1834321e021 Mon Sep 17 00:00:00 2001 From: "Vlastimil Babka (SUSE)" Date: Mon, 2 Mar 2026 10:55:37 +0100 Subject: [PATCH 697/828] mm/slab: allow sheaf refill if blocking is not allowed Ming Lei reported [1] a regression in the ublk null target benchmark due to sheaves. The profile shows that the alloc_from_pcs() fastpath fails and allocations fall back to ___slab_alloc(). It also shows the allocations happen through mempool_alloc(). The strategy of mempool_alloc() is to call the underlying allocator (here slab) without __GFP_DIRECT_RECLAIM first. This does not play well with __pcs_replace_empty_main() checking for gfpflags_allow_blocking() to decide if it should refill an empty sheaf or fallback to the slowpath, so we end up falling back. We could change the mempool strategy but there might be other paths doing the same ting. So instead allow sheaf refill when blocking is not allowed, changing the condition to gfpflags_allow_spinning(). The original condition was unnecessarily restrictive. Note this doesn't fully resolve the regression [1] as another component of that are memoryless nodes, which is to be addressed separately. Reported-by: Ming Lei Fixes: e47c897a2949 ("slab: add sheaves to most caches") Link: https://lore.kernel.org/all/aZ0SbIqaIkwoW2mB@fedora/ [1] Link: https://patch.msgid.link/20260302095536.34062-2-vbabka@kernel.org Signed-off-by: Vlastimil Babka (SUSE) --- mm/slub.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b1e9f16ba435..20cb4f3b636d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4567,7 +4567,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4588,8 +4588,9 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + allow_spin = gfpflags_allow_spinning(gfp); + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); @@ -4599,9 +4600,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, stat(s, BARN_GET_FAIL); - can_alloc = gfpflags_allow_blocking(gfp); - - if (can_alloc) { + if (allow_spin) { if (pcs->spare) { empty = pcs->spare; pcs->spare = NULL; @@ -4611,8 +4610,9 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, } local_unlock(&s->cpu_sheaves->lock); + pcs = NULL; - if (!can_alloc) + if (!allow_spin) return NULL; if (empty) { @@ -4632,11 +4632,8 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, if (!full) return NULL; - /* - * we can reach here only when gfpflags_allow_blocking - * so this must not be an irq - */ - local_lock(&s->cpu_sheaves->lock); + if (!local_trylock(&s->cpu_sheaves->lock)) + goto barn_put; pcs = this_cpu_ptr(s->cpu_sheaves); /* @@ -4667,6 +4664,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return pcs; } +barn_put: barn_put_full_sheaf(barn, full); stat(s, BARN_PUT); From 6432f15c818cb30eec7c4ca378ecdebd9796f741 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Tue, 3 Mar 2026 22:57:22 +0900 Subject: [PATCH 698/828] mm/slab: change stride type from unsigned short to unsigned int Commit 7a8e71bc619d ("mm/slab: use stride to access slabobj_ext") defined the type of slab->stride as unsigned short, because the author initially planned to store stride within the lower 16 bits of the page_type field, but later stored it in unused bits in the counters field instead. However, the idea of having only 2-byte stride turned out to be a serious mistake. On systems with 64k pages, order-1 pages are 128k, which is larger than USHRT_MAX. It triggers a debug warning because s->size is 128k while stride, truncated to 2 bytes, becomes zero: ------------[ cut here ]------------ Warning! stride (0) != s->size (131072) WARNING: mm/slub.c:2231 at alloc_slab_obj_exts_early.constprop.0+0x524/0x534, CPU#6: systemd-sysctl/307 Modules linked in: CPU: 6 UID: 0 PID: 307 Comm: systemd-sysctl Not tainted 7.0.0-rc1+ #6 PREEMPTLAZY Hardware name: IBM,9009-22A POWER9 (architected) 0x4e0202 0xf000005 of:IBM,FW950.E0 (VL950_179) hv:phyp pSeries NIP: c0000000008a9ac0 LR: c0000000008a9abc CTR: 0000000000000000 REGS: c0000000141f7390 TRAP: 0700 Not tainted (7.0.0-rc1+) MSR: 8000000000029033 CR: 28004400 XER: 00000005 CFAR: c000000000279318 IRQMASK: 0 GPR00: c0000000008a9abc c0000000141f7630 c00000000252a300 c00000001427b200 GPR04: 0000000000000004 0000000000000000 c000000000278fd0 0000000000000000 GPR08: fffffffffffe0000 0000000000000000 0000000000000000 0000000022004400 GPR12: c000000000f644b0 c000000017ff8f00 0000000000000000 0000000000000000 GPR16: 0000000000000000 c0000000141f7aa0 0000000000000000 c0000000141f7a88 GPR20: 0000000000000000 0000000000400cc0 ffffffffffffffff c00000001427b180 GPR24: 0000000000000004 00000000000c0cc0 c000000004e89a20 c00000005de90011 GPR28: 0000000000010010 c00000005df00000 c000000006017f80 c00c000000177a00 NIP [c0000000008a9ac0] alloc_slab_obj_exts_early.constprop.0+0x524/0x534 LR [c0000000008a9abc] alloc_slab_obj_exts_early.constprop.0+0x520/0x534 Call Trace: [c0000000141f7630] [c0000000008a9abc] alloc_slab_obj_exts_early.constprop.0+0x520/0x534 (unreliable) [c0000000141f76c0] [c0000000008aafbc] allocate_slab+0x154/0x94c [c0000000141f7760] [c0000000008b41c0] refill_objects+0x124/0x16c [c0000000141f77c0] [c0000000008b4be0] __pcs_replace_empty_main+0x2b0/0x444 [c0000000141f7810] [c0000000008b9600] __kvmalloc_node_noprof+0x840/0x914 [c0000000141f7900] [c000000000a3dd40] seq_read_iter+0x60c/0xb00 [c0000000141f7a10] [c000000000b36b24] proc_reg_read_iter+0x154/0x1fc [c0000000141f7a50] [c0000000009cee7c] vfs_read+0x39c/0x4e4 [c0000000141f7b30] [c0000000009d0214] ksys_read+0x9c/0x180 [c0000000141f7b90] [c00000000003a8d0] system_call_exception+0x1e0/0x4b0 [c0000000141f7e50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec This leads to slab_obj_ext() returning the first slabobj_ext or all objects and confuses the reference counting of object cgroups [1] and memory (un)charging for memory cgroups [2]. Fortunately, the counters field has 32 unused bits instead of 16 on 64-bit CPUs, which is wide enough to hold any value of s->size. Change the type to unsigned int. Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/lkml/ca241daa-e7e7-4604-a48d-de91ec9184a5@linux.ibm.com [1] Closes: https://lore.kernel.org/all/ddff7c7d-c0c3-4780-808f-9a83268bbf0c@linux.ibm.com [2] Fixes: 7a8e71bc619d ("mm/slab: use stride to access slabobj_ext") Signed-off-by: Harry Yoo Link: https://patch.msgid.link/20260303135722.2680521-1-harry.yoo@oracle.com Reviewed-by: Hao Li Tested-by: Venkat Rao Bagalkote Signed-off-by: Vlastimil Babka (SUSE) --- mm/slab.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index f6ef862b60ef..e9ab292acd22 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -59,7 +59,7 @@ struct freelist_counters { * to save memory. In case ->stride field is not available, * such optimizations are disabled. */ - unsigned short stride; + unsigned int stride; #endif }; }; @@ -559,20 +559,20 @@ static inline void put_slab_obj_exts(unsigned long obj_exts) } #ifdef CONFIG_64BIT -static inline void slab_set_stride(struct slab *slab, unsigned short stride) +static inline void slab_set_stride(struct slab *slab, unsigned int stride) { slab->stride = stride; } -static inline unsigned short slab_get_stride(struct slab *slab) +static inline unsigned int slab_get_stride(struct slab *slab) { return slab->stride; } #else -static inline void slab_set_stride(struct slab *slab, unsigned short stride) +static inline void slab_set_stride(struct slab *slab, unsigned int stride) { VM_WARN_ON_ONCE(stride != sizeof(struct slabobj_ext)); } -static inline unsigned short slab_get_stride(struct slab *slab) +static inline unsigned int slab_get_stride(struct slab *slab) { return sizeof(struct slabobj_ext); } From 0fb59eaca18f1254ecdce34354eec3cb1b3b5e10 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 25 Feb 2026 10:55:01 +0800 Subject: [PATCH 699/828] pmdomain: rockchip: Fix PD_VCODEC for RK3588 >From the RK3588 TRM Table 7-1 RK3588 Voltage Domain and Power Domain Summary, PD_RKVDEC0/1 and PD_VENC0/1 rely on VD_VCODEC which require extra voltages to be applied, otherwise it breaks RK3588-evb1-v10 board after vdec support landed[1]. The panic looks like below: rockchip-pm-domain fd8d8000.power-management:power-controller: failed to set domain 'rkvdec0' on, val=0 rockchip-pm-domain fd8d8000.power-management:power-controller: failed to set domain 'rkvdec1' on, val=0 ... Hardware name: Rockchip RK3588S EVB1 V10 Board (DT) Workqueue: pm genpd_power_off_work_fn Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x40/0x84 dump_stack+0x18/0x24 vpanic+0x1ec/0x4fc vpanic+0x0/0x4fc check_panic_on_warn+0x0/0x94 arm64_serror_panic+0x6c/0x78 do_serror+0xc4/0xcc el1h_64_error_handler+0x3c/0x5c el1h_64_error+0x6c/0x70 regmap_mmio_read32le+0x18/0x24 (P) regmap_bus_reg_read+0xfc/0x130 regmap_read+0x188/0x1ac regmap_read+0x54/0x78 rockchip_pd_power+0xcc/0x5f0 rockchip_pd_power_off+0x1c/0x4c genpd_power_off+0x84/0x120 genpd_power_off+0x1b4/0x260 genpd_power_off_work_fn+0x38/0x58 process_scheduled_works+0x194/0x2c4 worker_thread+0x2ac/0x3d8 kthread+0x104/0x124 ret_from_fork+0x10/0x20 SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x3000000,000e0005,40230521,0400720b Memory Limit: none ---[ end Kernel panic - not syncing: Asynchronous SError Interrupt ]--- Chaoyi pointed out the PD_VCODEC is the parent of PD_RKVDEC0/1 and PD_VENC0/1, so checking the PD_VCODEC is enough. [1] https://lore.kernel.org/linux-rockchip/20251020212009.8852-2-detlev.casanova@collabora.com/ Fixes: db6df2e3fc16 ("pmdomain: rockchip: add regulator support") Cc: stable@vger.kernel.org Suggested-by: Chaoyi Chen Signed-off-by: Shawn Lin Reviewed-by: Chaoyi Chen Reviewed-by: Sebastian Reichel Signed-off-by: Ulf Hansson --- drivers/pmdomain/rockchip/pm-domains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c index 997e93c12951..44d34840ede7 100644 --- a/drivers/pmdomain/rockchip/pm-domains.c +++ b/drivers/pmdomain/rockchip/pm-domains.c @@ -1311,7 +1311,7 @@ static const struct rockchip_domain_info rk3576_pm_domains[] = { static const struct rockchip_domain_info rk3588_pm_domains[] = { [RK3588_PD_GPU] = DOMAIN_RK3588("gpu", 0x0, BIT(0), 0, 0x0, 0, BIT(1), 0x0, BIT(0), BIT(0), false, true), [RK3588_PD_NPU] = DOMAIN_RK3588("npu", 0x0, BIT(1), BIT(1), 0x0, 0, 0, 0x0, 0, 0, false, true), - [RK3588_PD_VCODEC] = DOMAIN_RK3588("vcodec", 0x0, BIT(2), BIT(2), 0x0, 0, 0, 0x0, 0, 0, false, false), + [RK3588_PD_VCODEC] = DOMAIN_RK3588("vcodec", 0x0, BIT(2), BIT(2), 0x0, 0, 0, 0x0, 0, 0, false, true), [RK3588_PD_NPUTOP] = DOMAIN_RK3588("nputop", 0x0, BIT(3), 0, 0x0, BIT(11), BIT(2), 0x0, BIT(1), BIT(1), false, false), [RK3588_PD_NPU1] = DOMAIN_RK3588("npu1", 0x0, BIT(4), 0, 0x0, BIT(12), BIT(3), 0x0, BIT(2), BIT(2), false, false), [RK3588_PD_NPU2] = DOMAIN_RK3588("npu2", 0x0, BIT(5), 0, 0x0, BIT(13), BIT(4), 0x0, BIT(3), BIT(3), false, false), From a58d487fb1a52579d3c37544ea371da78ed70c45 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 4 Mar 2026 09:56:16 +0100 Subject: [PATCH 700/828] drm/ttm/tests: Fix build failure on PREEMPT_RT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a compile error in the kunit tests when CONFIG_PREEMPT_RT is enabled, and the normal mutex is converted into a rtmutex. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202602261547.3bM6yVAS-lkp@intel.com/ Reviewed-by: Jouni Högander Link: https://patch.msgid.link/20260304085616.1216961-1-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/ttm/tests/ttm_bo_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c index d468f8322072..f3103307b5df 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c @@ -222,13 +222,13 @@ static void ttm_bo_reserve_interrupted(struct kunit *test) KUNIT_FAIL(test, "Couldn't create ttm bo reserve task\n"); /* Take a lock so the threaded reserve has to wait */ - mutex_lock(&bo->base.resv->lock.base); + dma_resv_lock(bo->base.resv, NULL); wake_up_process(task); msleep(20); err = kthread_stop(task); - mutex_unlock(&bo->base.resv->lock.base); + dma_resv_unlock(bo->base.resv); KUNIT_ASSERT_EQ(test, err, -ERESTARTSYS); } From 3d1973a0c76a78a4728cff13648a188ed486cf44 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Wed, 25 Feb 2026 20:30:23 +0100 Subject: [PATCH 701/828] x86/boot: Handle relative CONFIG_EFI_SBAT_FILE file paths CONFIG_EFI_SBAT_FILE can be a relative path. When compiling using a different output directory (O=) the build currently fails because it can't find the filename set in CONFIG_EFI_SBAT_FILE: arch/x86/boot/compressed/sbat.S: Assembler messages: arch/x86/boot/compressed/sbat.S:6: Error: file not found: kernel.sbat Add $(srctree) as include dir for sbat.o. [ bp: Massage commit message. ] Fixes: 61b57d35396a ("x86/efi: Implement support for embedding SBAT data for x86") Signed-off-by: Jan Stancek Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Vitaly Kuznetsov Cc: Link: https://patch.msgid.link/f4eda155b0cef91d4d316b4e92f5771cb0aa7187.1772047658.git.jstancek@redhat.com --- arch/x86/boot/compressed/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 68f9d7a1683b..b8b2b7bea1d3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -113,6 +113,7 @@ vmlinux-objs-$(CONFIG_EFI_SBAT) += $(obj)/sbat.o ifdef CONFIG_EFI_SBAT $(obj)/sbat.o: $(CONFIG_EFI_SBAT_FILE) +AFLAGS_sbat.o += -I $(srctree) endif $(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE From fbb143e4a6efa4a175e856fc898754b06cb13c4f Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 4 Mar 2026 07:19:55 +0000 Subject: [PATCH 702/828] ASoC: dt-bindings: renesas,rz-ssi: Document RZ/G3L SoC Document RZ/G3L SSIF-2 bindings. The RZ/G3L SSIF-2 IP is identical to one found on the RZ/G2L SoC. Signed-off-by: Biju Das Link: https://patch.msgid.link/20260304072000.6787-1-biju.das.jz@bp.renesas.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml b/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml index e4cdbf2202b9..1394f78281fc 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml +++ b/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml @@ -20,6 +20,7 @@ properties: - renesas,r9a07g044-ssi # RZ/G2{L,LC} - renesas,r9a07g054-ssi # RZ/V2L - renesas,r9a08g045-ssi # RZ/G3S + - renesas,r9a08g046-ssi # RZ/G3L - const: renesas,rz-ssi reg: From 325291b20f8a6f14b9c82edbf5d12e4e71f6adaa Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Wed, 4 Mar 2026 14:32:55 +0800 Subject: [PATCH 703/828] ASoC: amd: yc: Add DMI quirk for ASUS EXPERTBOOK PM1503CDA Add a DMI quirk for the ASUS EXPERTBOOK PM1503CDA fixing the issue where the internal microphone was not detected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221070 Cc: stable@vger.kernel.org Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260304063255.139331-1-zhangheng@kylinos.cn Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 7af4daeb4c6f..1324543b42d7 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -710,6 +710,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK BM1503CDA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "PM1503CDA"), + } + }, {} }; From 6932256d3a3764f3a5e06e2cb8603be45b6a9fef Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Wed, 25 Feb 2026 15:37:49 -0800 Subject: [PATCH 704/828] time/jiffies: Fix sysctl file error on configurations where USER_HZ < HZ Commit 2dc164a48e6fd ("sysctl: Create converter functions with two new macros") incorrectly returns error to user space when jiffies sysctl converter is used. The old overflow check got replaced with an unconditional one: + if (USER_HZ < HZ) + return -EINVAL; which will always be true on configurations with "USER_HZ < HZ". Remove the check; it is no longer needed as clock_t_to_jiffies() returns ULONG_MAX for the overflow case and proc_int_u2k_conv_uop() checks for "> INT_MAX" after conversion Fixes: 2dc164a48e6fd ("sysctl: Create converter functions with two new macros") Reported-by: Colm Harrington Signed-off-by: Gerd Rausch Signed-off-by: Joel Granados --- kernel/time/jiffies.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a5c7d15fce72..9daf8c5d9687 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -256,8 +256,6 @@ EXPORT_SYMBOL(proc_dointvec_jiffies); int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int dir, void *buffer, size_t *lenp, loff_t *ppos) { - if (SYSCTL_USER_TO_KERN(dir) && USER_HZ < HZ) - return -EINVAL; return proc_dointvec_conv(table, dir, buffer, lenp, ppos, do_proc_int_conv_userhz_jiffies); } From debc1a492b2695d05973994fb0f796dbd9ceaae6 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 3 Mar 2026 15:34:20 -0800 Subject: [PATCH 705/828] iomap: don't mark folio uptodate if read IO has bytes pending If a folio has ifs metadata attached to it and the folio is partially read in through an async IO helper with the rest of it then being read in through post-EOF zeroing or as inline data, and the helper successfully finishes the read first, then post-EOF zeroing / reading inline will mark the folio as uptodate in iomap_set_range_uptodate(). This is a problem because when the read completion path later calls iomap_read_end(), it will call folio_end_read(), which sets the uptodate bit using XOR semantics. Calling folio_end_read() on a folio that was already marked uptodate clears the uptodate bit. Fix this by not marking the folio as uptodate if the read IO has bytes pending. The folio uptodate state will be set in the read completion path through iomap_end_read() -> folio_end_read(). Reported-by: Wei Gao Suggested-by: Sasha Levin Tested-by: Wei Gao Reviewed-by: Darrick J. Wong Cc: stable@vger.kernel.org # v6.19 Link: https://lore.kernel.org/linux-fsdevel/aYbmy8JdgXwsGaPP@autotest-wegao.qe.prg2.suse.org/ Fixes: b2f35ac4146d ("iomap: add caller-provided callbacks for read and readahead") Signed-off-by: Joanne Koong Link: https://patch.msgid.link/20260303233420.874231-2-joannelkoong@gmail.com Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index bc82083e420a..00f0efaf12b2 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -80,18 +80,27 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off, { struct iomap_folio_state *ifs = folio->private; unsigned long flags; - bool uptodate = true; + bool mark_uptodate = true; if (folio_test_uptodate(folio)) return; if (ifs) { spin_lock_irqsave(&ifs->state_lock, flags); - uptodate = ifs_set_range_uptodate(folio, ifs, off, len); + /* + * If a read with bytes pending is in progress, we must not call + * folio_mark_uptodate(). The read completion path + * (iomap_read_end()) will call folio_end_read(), which uses XOR + * semantics to set the uptodate bit. If we set it here, the XOR + * in folio_end_read() will clear it, leaving the folio not + * uptodate. + */ + mark_uptodate = ifs_set_range_uptodate(folio, ifs, off, len) && + !ifs->read_bytes_pending; spin_unlock_irqrestore(&ifs->state_lock, flags); } - if (uptodate) + if (mark_uptodate) folio_mark_uptodate(folio); } From ace7dcc8181373a0338efa1686c5e36eb121dff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Wed, 25 Feb 2026 09:42:20 +0200 Subject: [PATCH 706/828] drm/dp: Add definition for Panel Replay full-line granularity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line granularity. Add definition for this. Cc: dri-devel@lists.freedesktop.org Signed-off-by: Jouni Högander Reviewed-by: Uma Shankar Acked-by: Maarten Lankhorst Link: https://patch.msgid.link/20260225074221.1744330-1-jouni.hogander@intel.com (cherry picked from commit b93311673263bb98a200ab1cb6304f969bdada5c) Signed-off-by: Joonas Lahtinen --- include/drm/display/drm_dp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index e4eebabab975..8b15d3eeb716 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -571,6 +571,8 @@ # define DP_PANEL_REPLAY_LINK_OFF_SUPPORTED_IN_PR_AFTER_ADAPTIVE_SYNC_SDP (1 << 7) #define DP_PANEL_REPLAY_CAP_X_GRANULARITY 0xb2 +# define DP_PANEL_REPLAY_FULL_LINE_GRANULARITY 0xffff + #define DP_PANEL_REPLAY_CAP_Y_GRANULARITY 0xb4 /* Link Configuration */ From a99cac460ddeb3705cb54a8421339f351586b25d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Wed, 25 Feb 2026 09:42:21 +0200 Subject: [PATCH 707/828] drm/i915/psr: Fix for Panel Replay X granularity DPCD register handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line granularity. Take this into account when handling Panel Replay X granularity informed by the panel. Fixes: 1cc854647450 ("drm/i915/psr: Use SU granularity information available in intel_connector") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7284 Tested-by: Mark Pearson Signed-off-by: Jouni Högander Reviewed-by: Uma Shankar Link: https://patch.msgid.link/20260225074221.1744330-2-jouni.hogander@intel.com (cherry picked from commit f5c8f824a495e849492f09a43bd965a8f4d86cb2) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 62208ffc5101..4ce1173a2e91 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1307,9 +1307,14 @@ static bool psr2_granularity_check(struct intel_crtc_state *crtc_state, u16 sink_y_granularity = crtc_state->has_panel_replay ? connector->dp.panel_replay_caps.su_y_granularity : connector->dp.psr_caps.su_y_granularity; - u16 sink_w_granularity = crtc_state->has_panel_replay ? - connector->dp.panel_replay_caps.su_w_granularity : - connector->dp.psr_caps.su_w_granularity; + u16 sink_w_granularity; + + if (crtc_state->has_panel_replay) + sink_w_granularity = connector->dp.panel_replay_caps.su_w_granularity == + DP_PANEL_REPLAY_FULL_LINE_GRANULARITY ? + crtc_hdisplay : connector->dp.panel_replay_caps.su_w_granularity; + else + sink_w_granularity = connector->dp.psr_caps.su_w_granularity; /* PSR2 HW only send full lines so we only need to validate the width */ if (crtc_hdisplay % sink_w_granularity) From 531bb98a030cc1073bd7ed9a502c0a3a781e92ee Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 4 Mar 2026 12:37:43 +0000 Subject: [PATCH 708/828] io_uring/zcrx: use READ_ONCE with user shared RQEs Refill queue entries are shared with the user space, use READ_ONCE when reading them. Fixes: 34a3e60821ab9 ("io_uring/zcrx: implement zerocopy receive pp memory provider"); Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 19b287d21f4b..0461edebb042 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -927,11 +927,12 @@ static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe, struct io_zcrx_ifq *ifq, struct net_iov **ret_niov) { + __u64 off = READ_ONCE(rqe->off); unsigned niov_idx, area_idx; struct io_zcrx_area *area; - area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT; - niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift; + area_idx = off >> IORING_ZCRX_AREA_SHIFT; + niov_idx = (off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift; if (unlikely(rqe->__pad || area_idx)) return false; From d320f160aa5ff36cdf83c645cca52b615e866e32 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 2 Mar 2026 09:30:02 -0800 Subject: [PATCH 709/828] iomap: reject delalloc mappings during writeback Filesystems should never provide a delayed allocation mapping to writeback; they're supposed to allocate the space before replying. This can lead to weird IO errors and crashes in the block layer if the filesystem is being malicious, or if it hadn't set iomap->dev because it's a delalloc mapping. Fix this by failing writeback on delalloc mappings. Currently no filesystems actually misbehave in this manner, but we ought to be stricter about things like that. Cc: stable@vger.kernel.org # v5.5 Fixes: 598ecfbaa742ac ("iomap: lift the xfs writeback code to iomap") Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/20260302173002.GL13829@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Christian Brauner --- fs/iomap/ioend.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index 4d1ef8a2cee9..60546fa14dfe 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -215,17 +215,18 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, WARN_ON_ONCE(!folio->private && map_len < dirty_len); switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - return -EIO; + case IOMAP_UNWRITTEN: + ioend_flags |= IOMAP_IOEND_UNWRITTEN; + break; + case IOMAP_MAPPED: + break; case IOMAP_HOLE: return map_len; default: - break; + WARN_ON_ONCE(1); + return -EIO; } - if (wpc->iomap.type == IOMAP_UNWRITTEN) - ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) ioend_flags |= IOMAP_IOEND_SHARED; if (folio_test_dropbehind(folio)) From a99d34e5ecb9a8f2212ee5a01140313bb115f9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 11 Feb 2026 11:41:59 +0100 Subject: [PATCH 710/828] Revert "drm/pagemap: Disable device-to-device migration" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit a69d1ab971a6 ("mm: Fix a hmm_range_fault() livelock / starvation problem") device-to-device migration is not functional again and the disabling can be reverted. Add the above commit as a Fixes: tag in order for the revert to not take place unless that commit is present. This reverts commit 10dd1eaa80a56d3cf6d7c36b5269c8fed617f001. Cc: Matthew Brost Fixes: b570f37a2ce4 ("mm: Fix a hmm_range_fault() livelock / starvation problem") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260211104159.114947-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 1a3c0049b3f56278c9caf2784c53f6ab435fd12c) Signed-off-by: Rodrigo Vivi [Rodrigo updated Fixes tag] --- drivers/gpu/drm/drm_pagemap.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index bdc79140875c..862675ac5bb2 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -480,18 +480,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, .start = start, .end = end, .pgmap_owner = pagemap->owner, - /* - * FIXME: MIGRATE_VMA_SELECT_DEVICE_PRIVATE intermittently - * causes 'xe_exec_system_allocator --r *race*no*' to trigger aa - * engine reset and a hard hang due to getting stuck on a folio - * lock. This should work and needs to be root-caused. The only - * downside of not selecting MIGRATE_VMA_SELECT_DEVICE_PRIVATE - * is that device-to-device migrations won’t work; instead, - * memory will bounce through system memory. This path should be - * rare and only occur when the madvise attributes of memory are - * changed or atomics are being used. - */ - .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT, + .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT | + MIGRATE_VMA_SELECT_DEVICE_PRIVATE, }; unsigned long i, npages = npages_in_range(start, end); unsigned long own_pages = 0, migrated_pages = 0; From b3368ecca9538b88ddf982ea99064860fd5add97 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Fri, 20 Feb 2026 17:53:08 -0500 Subject: [PATCH 711/828] drm/xe/gsc: Fix GSC proxy cleanup on early initialization failure xe_gsc_proxy_remove undoes what is done in both xe_gsc_proxy_init and xe_gsc_proxy_start; however, if we fail between those 2 calls, it is possible that the HW forcewake access hasn't been initialized yet and so we hit errors when the cleanup code tries to write GSC register. To avoid that, split the cleanup in 2 functions so that the HW cleanup is only called if the HW setup was completed successfully. Since the HW cleanup (interrupt disabling) is now removed from xe_gsc_proxy_remove, the cleanup on error paths in xe_gsc_proxy_start must be updated to disable interrupts before returning. Fixes: ff6cd29b690b ("drm/xe: Cleanup unwind of gt initialization") Signed-off-by: Zhanjun Dong Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260220225308.101469-1-zhanjun.dong@intel.com (cherry picked from commit 2b37c401b265c07b46408b5cb36a4b757c9b5060) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc_proxy.c | 43 +++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_gsc_types.h | 2 ++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 42438b21f235..707db650a2ae 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -435,15 +435,11 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) return 0; } -static void xe_gsc_proxy_remove(void *arg) +static void xe_gsc_proxy_stop(struct xe_gsc *gsc) { - struct xe_gsc *gsc = arg; struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - if (!gsc->proxy.component_added) - return; - /* disable HECI2 IRQs */ scoped_guard(xe_pm_runtime, xe) { CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); @@ -455,6 +451,30 @@ static void xe_gsc_proxy_remove(void *arg) } xe_gsc_wait_for_worker_completion(gsc); + gsc->proxy.started = false; +} + +static void xe_gsc_proxy_remove(void *arg) +{ + struct xe_gsc *gsc = arg; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + + if (!gsc->proxy.component_added) + return; + + /* + * GSC proxy start is an async process that can be ongoing during + * Xe module load/unload. Using devm managed action to register + * xe_gsc_proxy_stop could cause issues if Xe module unload has + * already started when the action is registered, potentially leading + * to the cleanup being called at the wrong time. Therefore, instead + * of registering a separate devm action to undo what is done in + * proxy start, we call it from here, but only if the start has + * completed successfully (tracked with the 'started' flag). + */ + if (gsc->proxy.started) + xe_gsc_proxy_stop(gsc); component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); gsc->proxy.component_added = false; @@ -510,6 +530,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) */ int xe_gsc_proxy_start(struct xe_gsc *gsc) { + struct xe_gt *gt = gsc_to_gt(gsc); int err; /* enable the proxy interrupt in the GSC shim layer */ @@ -521,12 +542,18 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc) */ err = xe_gsc_proxy_request_handler(gsc); if (err) - return err; + goto err_irq_disable; if (!xe_gsc_proxy_init_done(gsc)) { - xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); - return -EIO; + xe_gt_err(gt, "GSC FW reports proxy init not completed\n"); + err = -EIO; + goto err_irq_disable; } + gsc->proxy.started = true; return 0; + +err_irq_disable: + gsc_proxy_irq_toggle(gsc, false); + return err; } diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 97c056656df0..5aaa2a75861f 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -58,6 +58,8 @@ struct xe_gsc { struct mutex mutex; /** @proxy.component_added: whether the component has been added */ bool component_added; + /** @proxy.started: whether the proxy has been started */ + bool started; /** @proxy.bo: object to store message to and from the GSC */ struct xe_bo *bo; /** @proxy.to_gsc: map of the memory used to send messages to the GSC */ From 89865e6dc8487b627302bdced3f965cd0c406835 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 27 Feb 2026 08:43:41 -0800 Subject: [PATCH 712/828] drm/xe/xe2_hpg: Correct implementation of Wa_16025250150 Wa_16025250150 asks us to set five register fields of the register to 0x1 each. However we were just OR'ing this into the existing register value (which has a default of 0x4 for each nibble-sized field) resulting in final field values of 0x5 instead of the desired 0x1. Correct the RTP programming (use FIELD_SET instead of SET) to ensure each field is assigned to exactly the value we want. Cc: Aradhya Bhatia Cc: Tejas Upadhyay Cc: stable@vger.kernel.org # v6.16+ Fixes: 7654d51f1fd8 ("drm/xe/xe2hpg: Add Wa_16025250150") Reviewed-by: Ngai-Mint Kwan Link: https://patch.msgid.link/20260227164341.3600098-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit d139209ef88e48af1f6731cd45440421c757b6b5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7b1bd79ab17..462c2fa712e0 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -241,12 +241,13 @@ static const struct xe_rtp_entry_sr gt_was[] = { { XE_RTP_NAME("16025250150"), XE_RTP_RULES(GRAPHICS_VERSION(2001)), - XE_RTP_ACTIONS(SET(LSN_VC_REG2, - LSN_LNI_WGT(1) | - LSN_LNE_WGT(1) | - LSN_DIM_X_WGT(1) | - LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) + XE_RTP_ACTIONS(FIELD_SET(LSN_VC_REG2, + LSN_LNI_WGT_MASK | LSN_LNE_WGT_MASK | + LSN_DIM_X_WGT_MASK | LSN_DIM_Y_WGT_MASK | + LSN_DIM_Z_WGT_MASK, + LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | + LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | + LSN_DIM_Z_WGT(1))) }, /* Xe2_HPM */ From 3091723785def05ebfe6a50866f87a044ae314ba Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 4 Feb 2026 17:28:11 +0000 Subject: [PATCH 713/828] drm/xe/reg_sr: Fix leak on xa_store failure Free the newly allocated entry when xa_store() fails to avoid a memory leak on the error path. v2: use goto fail_free. (Bala) Fixes: e5283bd4dfec ("drm/xe/reg_sr: Remove register pool") Cc: Balasubramani Vivekanandan Cc: Matt Roper Signed-off-by: Shuicheng Lin Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260204172810.1486719-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper (cherry picked from commit 6bc6fec71ac45f52db609af4e62bdb96b9f5fadb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 2e5c78940b41..a07be161cfa2 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -98,10 +98,12 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, *pentry = *e; ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); if (ret) - goto fail; + goto fail_free; return 0; +fail_free: + kfree(pentry); fail: xe_gt_err(gt, "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n", From 0cfe9c4838f1147713f6b5c02094cd4dc0c598fa Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 23 Feb 2026 23:21:45 +0530 Subject: [PATCH 714/828] drm/xe: Fix memory leak in xe_vm_madvise_ioctl When check_bo_args_are_sane() validation fails, jump to the new free_vmas cleanup label to properly free the allocated resources. This ensures proper cleanup in this error path. Fixes: 293032eec4ba ("drm/xe/bo: Update atomic_access attribute on madvise") Cc: stable@vger.kernel.org # v6.18+ Reviewed-by: Shuicheng Lin Signed-off-by: Varun Gupta Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260223175145.1532801-1-varun.gupta@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 29bd06faf727a4b76663e4be0f7d770e2d2a7965) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_madvise.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 95bf53cc29e3..bc39a9a9790c 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -453,7 +453,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil madvise_range.num_vmas, args->atomic.val)) { err = -EINVAL; - goto madv_fini; + goto free_vmas; } } @@ -490,6 +490,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil err_fini: if (madvise_range.has_bo_vmas) drm_exec_fini(&exec); +free_vmas: kfree(madvise_range.vmas); madvise_range.vmas = NULL; madv_fini: From 8da8df43124128e0478beadb58faa9cab56a3f13 Mon Sep 17 00:00:00 2001 From: Yang Xiuwei Date: Wed, 4 Mar 2026 12:51:19 +0800 Subject: [PATCH 715/828] block: use __bio_add_page in bio_copy_kern Since the bio is allocated with the exact number of pages needed via blk_rq_map_bio_alloc(), and the loop iterates exactly that many times, bio_add_page() cannot fail due to insufficient space. Switch to __bio_add_page() and remove the dead error handling code. Suggested-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Yang Xiuwei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-map.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 4533094d9458..c6fa2238d578 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -398,8 +398,7 @@ static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int le if (op_is_write(op)) memcpy(page_address(page), p, bytes); - if (bio_add_page(bio, page, bytes, 0) < bytes) - break; + __bio_add_page(bio, page, bytes, 0); len -= bytes; p += bytes; From 212dd8477653fe72c2a6a99143cd662f6430cf4f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 27 Feb 2026 15:51:42 +0000 Subject: [PATCH 716/828] arm64: Silence sparse warnings caused by the type casting in (cmp)xchg The arm64 xchg/cmpxchg() wrappers cast the arguments to (unsigned long) prior to invoking the static inline functions implementing the operation. Some restrictive type annotations (e.g. __bitwise) lead to sparse warnings like below: sparse warnings: (new ones prefixed by >>) fs/crypto/bio.c:67:17: sparse: sparse: cast from restricted blk_status_t >> fs/crypto/bio.c:67:17: sparse: sparse: cast to restricted blk_status_t Force the casting in the arm64 xchg/cmpxchg() wrappers to silence sparse. Signed-off-by: Catalin Marinas Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202602230947.uNRsPyBn-lkp@intel.com/ Link: https://lore.kernel.org/r/202602230947.uNRsPyBn-lkp@intel.com/ Cc: Will Deacon Cc: Mark Rutland Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index d7a540736741..6cf3cd6873f5 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -91,8 +91,9 @@ __XCHG_GEN(_mb) #define __xchg_wrapper(sfx, ptr, x) \ ({ \ __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret = (__force __typeof__(*(ptr))) \ + __arch_xchg##sfx((__force unsigned long)(x), (ptr), \ + sizeof(*(ptr))); \ __ret; \ }) @@ -175,9 +176,10 @@ __CMPXCHG_GEN(_mb) #define __cmpxchg_wrapper(sfx, ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg##sfx((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ + __ret = (__force __typeof__(*(ptr))) \ + __cmpxchg##sfx((ptr), (__force unsigned long)(o), \ + (__force unsigned long)(n), \ + sizeof(*(ptr))); \ __ret; \ }) From 82169dace41cbaa951341b0f80f4570be3b2dec0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 18 Feb 2026 10:52:04 +0100 Subject: [PATCH 717/828] xenbus: add xenbus_device parameter to xenbus_read_driver_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to prepare checking the xenbus device status in xenbus_read_driver_state(), add the pointer to struct xenbus_device as a parameter. Tested-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Acked-by: "Martin K. Petersen" # SCSI Acked-by: Jakub Kicinski Acked-by: Bjorn Helgaas # drivers/pci/xen-pcifront.c Signed-off-by: Juergen Gross Message-ID: <20260218095205.453657-2-jgross@suse.com> --- drivers/net/xen-netfront.c | 34 +++++++++++----------- drivers/pci/xen-pcifront.c | 8 ++--- drivers/scsi/xen-scsifront.c | 2 +- drivers/xen/xen-pciback/xenbus.c | 10 +++---- drivers/xen/xenbus/xenbus_client.c | 4 ++- drivers/xen/xenbus/xenbus_probe.c | 6 ++-- drivers/xen/xenbus/xenbus_probe_frontend.c | 2 +- include/xen/xenbus.h | 3 +- 8 files changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7c2220366623..e2da977c9c50 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1646,7 +1646,7 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* avoid the race with XDP headroom adjustment */ wait_event(module_wq, - xenbus_read_driver_state(np->xbdev->otherend) == + xenbus_read_driver_state(np->xbdev, np->xbdev->otherend) == XenbusStateReconfigured); np->netfront_xdp_enabled = true; @@ -1764,9 +1764,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) do { xenbus_switch_state(dev, XenbusStateInitialising); err = wait_event_timeout(module_wq, - xenbus_read_driver_state(dev->otherend) != + xenbus_read_driver_state(dev, dev->otherend) != XenbusStateClosed && - xenbus_read_driver_state(dev->otherend) != + xenbus_read_driver_state(dev, dev->otherend) != XenbusStateUnknown, XENNET_TIMEOUT); } while (!err); @@ -2627,31 +2627,31 @@ static void xennet_bus_close(struct xenbus_device *dev) { int ret; - if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed) + if (xenbus_read_driver_state(dev, dev->otherend) == XenbusStateClosed) return; do { xenbus_switch_state(dev, XenbusStateClosing); ret = wait_event_timeout(module_wq, - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosing || - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosed || - xenbus_read_driver_state(dev->otherend) == - XenbusStateUnknown, - XENNET_TIMEOUT); + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateClosing || + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateUnknown, + XENNET_TIMEOUT); } while (!ret); - if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed) + if (xenbus_read_driver_state(dev, dev->otherend) == XenbusStateClosed) return; do { xenbus_switch_state(dev, XenbusStateClosed); ret = wait_event_timeout(module_wq, - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosed || - xenbus_read_driver_state(dev->otherend) == - XenbusStateUnknown, - XENNET_TIMEOUT); + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateUnknown, + XENNET_TIMEOUT); } while (!ret); } diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 11636634ae51..cd22bf984024 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -856,7 +856,7 @@ static void pcifront_try_connect(struct pcifront_device *pdev) int err; /* Only connect once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != XenbusStateInitialised) return; @@ -876,7 +876,7 @@ static int pcifront_try_disconnect(struct pcifront_device *pdev) enum xenbus_state prev_state; - prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + prev_state = xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename); if (prev_state >= XenbusStateClosing) goto out; @@ -895,7 +895,7 @@ out: static void pcifront_attach_devices(struct pcifront_device *pdev) { - if (xenbus_read_driver_state(pdev->xdev->nodename) == + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) == XenbusStateReconfiguring) pcifront_connect(pdev); } @@ -909,7 +909,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev) struct pci_dev *pci_dev; char str[64]; - state = xenbus_read_driver_state(pdev->xdev->nodename); + state = xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename); if (state == XenbusStateInitialised) { dev_dbg(&pdev->xdev->dev, "Handle skipped connect.\n"); /* We missed Connected and need to initialize. */ diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 924025305753..ef74d4da5ab0 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -1175,7 +1175,7 @@ static void scsifront_backend_changed(struct xenbus_device *dev, return; } - if (xenbus_read_driver_state(dev->nodename) == + if (xenbus_read_driver_state(dev, dev->nodename) == XenbusStateInitialised) scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index b11e401f1b1e..4bd1c7a8957e 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -149,12 +149,12 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) mutex_lock(&pdev->dev_lock); /* Make sure we only do this setup once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != XenbusStateInitialised) goto out; /* Wait for frontend to state that it has published the configuration */ - if (xenbus_read_driver_state(pdev->xdev->otherend) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->otherend) != XenbusStateInitialised) goto out; @@ -374,7 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev, dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); mutex_lock(&pdev->dev_lock); - if (xenbus_read_driver_state(pdev->xdev->nodename) != state) + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != state) goto out; err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", @@ -572,7 +572,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) /* It's possible we could get the call to setup twice, so make sure * we're not already connected. */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != XenbusStateInitWait) goto out; @@ -662,7 +662,7 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch, struct xen_pcibk_device *pdev = container_of(watch, struct xen_pcibk_device, be_watch); - switch (xenbus_read_driver_state(pdev->xdev->nodename)) { + switch (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename)) { case XenbusStateInitWait: xen_pcibk_setup_backend(pdev); break; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 2dc874fb5506..e502435145ae 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -931,12 +931,14 @@ static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr) /** * xenbus_read_driver_state - read state from a store path + * @dev: xenbus device pointer * @path: path for driver * * Returns: the state of the driver rooted at the given store path, or * XenbusStateUnknown if no state can be read. */ -enum xenbus_state xenbus_read_driver_state(const char *path) +enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev, + const char *path) { enum xenbus_state result; int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 9f9011cd7447..2eed06ba5d38 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -191,7 +191,7 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, return; } - state = xenbus_read_driver_state(dev->otherend); + state = xenbus_read_driver_state(dev, dev->otherend); dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", state, xenbus_strstate(state), dev->otherend_watch.node, path); @@ -364,7 +364,7 @@ void xenbus_dev_remove(struct device *_dev) * closed. */ if (!drv->allow_rebind || - xenbus_read_driver_state(dev->nodename) == XenbusStateClosing) + xenbus_read_driver_state(dev, dev->nodename) == XenbusStateClosing) xenbus_switch_state(dev, XenbusStateClosed); } EXPORT_SYMBOL_GPL(xenbus_dev_remove); @@ -514,7 +514,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, size_t stringlen; char *tmpstring; - enum xenbus_state state = xenbus_read_driver_state(nodename); + enum xenbus_state state = xenbus_read_driver_state(NULL, nodename); if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index f04707d1f667..ca04609730df 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -253,7 +253,7 @@ static int print_device_status(struct device *dev, void *data) } else if (xendev->state < XenbusStateConnected) { enum xenbus_state rstate = XenbusStateUnknown; if (xendev->otherend) - rstate = xenbus_read_driver_state(xendev->otherend); + rstate = xenbus_read_driver_state(xendev, xendev->otherend); pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n", xendev->nodename, xendev->state, rstate); } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index c94caf852aea..15319da65b7f 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -228,7 +228,8 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port); int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port); -enum xenbus_state xenbus_read_driver_state(const char *path); +enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev, + const char *path); __printf(3, 4) void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); From e2dcf9065536ab4a1b00828ff0d19f7d282dfecc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 18 Feb 2026 10:52:05 +0100 Subject: [PATCH 718/828] xen/xenbus: better handle backend crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the backend domain crashes, coordinated device cleanup is not possible (as it involves waiting for the backend state change). In that case, toolstack forcefully removes frontend xenstore entries. xenbus_dev_changed() handles this case, and triggers device cleanup. It's possible that toolstack manages to connect new device in that place, before xenbus_dev_changed() notices the old one is missing. If that happens, new one won't be probed and will forever remain in XenbusStateInitialising. Fix this by checking the frontend's state in Xenstore. In case it has been reset to XenbusStateInitialising by Xen tools, consider this being the result of an unplug+plug operation. It's important that cleanup on such unplug doesn't modify Xenstore entries (especially the "state" key) as it belong to the new device to be probed - changing it would derail establishing connection to the new backend (most likely, closing the device before it was even connected). Handle this case by setting new xenbus_device->vanished flag to true, and check it before changing state entry. And even if xenbus_dev_changed() correctly detects the device was forcefully removed, the cleanup handling is still racy. Since this whole handling doesn't happened in a single Xenstore transaction, it's possible that toolstack might put a new device there already. Avoid re-creating the state key (which in the case of loosing the race would actually close newly attached device). The problem does not apply to frontend domain crash, as this case involves coordinated cleanup. Problem originally reported at https://lore.kernel.org/xen-devel/aOZvivyZ9YhVWDLN@mail-itl/T/#t, including reproduction steps. Based-on-patch-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Message-ID: <20260218095205.453657-3-jgross@suse.com> --- drivers/xen/xenbus/xenbus_client.c | 13 +++++++++-- drivers/xen/xenbus/xenbus_probe.c | 36 ++++++++++++++++++++++++++++++ include/xen/xenbus.h | 1 + 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index e502435145ae..19672b08a680 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -226,8 +226,9 @@ __xenbus_switch_state(struct xenbus_device *dev, struct xenbus_transaction xbt; int current_state; int err, abort; + bool vanished = false; - if (state == dev->state) + if (state == dev->state || dev->vanished) return 0; again: @@ -242,6 +243,10 @@ again: err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); if (err != 1) goto abort; + if (current_state != dev->state && current_state == XenbusStateInitialising) { + vanished = true; + goto abort; + } err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); if (err) { @@ -256,7 +261,7 @@ abort: if (err == -EAGAIN && !abort) goto again; xenbus_switch_fatal(dev, depth, err, "ending transaction"); - } else + } else if (!vanished) dev->state = state; return 0; @@ -941,6 +946,10 @@ enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev, const char *path) { enum xenbus_state result; + + if (dev && dev->vanished) + return XenbusStateUnknown; + int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); if (err) result = XenbusStateUnknown; diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 2eed06ba5d38..eb260eceb4d2 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -444,6 +444,9 @@ static void xenbus_cleanup_devices(const char *path, struct bus_type *bus) info.dev = NULL; bus_for_each_dev(bus, NULL, &info, cleanup_dev); if (info.dev) { + dev_warn(&info.dev->dev, + "device forcefully removed from xenstore\n"); + info.dev->vanished = true; device_unregister(&info.dev->dev); put_device(&info.dev->dev); } @@ -659,6 +662,39 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) return; dev = xenbus_device_find(root, &bus->bus); + /* + * Backend domain crash results in not coordinated frontend removal, + * without going through XenbusStateClosing. If this is a new instance + * of the same device Xen tools will have reset the state to + * XenbusStateInitializing. + * It might be that the backend crashed early during the init phase of + * device setup, in which case the known state would have been + * XenbusStateInitializing. So test the backend domid to match the + * saved one. In case the new backend happens to have the same domid as + * the old one, we can just carry on, as there is no inconsistency + * resulting in this case. + */ + if (dev && !strcmp(bus->root, "device")) { + enum xenbus_state state = xenbus_read_driver_state(dev, dev->nodename); + unsigned int backend = xenbus_read_unsigned(root, "backend-id", + dev->otherend_id); + + if (state == XenbusStateInitialising && + (state != dev->state || backend != dev->otherend_id)) { + /* + * State has been reset, assume the old one vanished + * and new one needs to be probed. + */ + dev_warn(&dev->dev, + "state reset occurred, reconnecting\n"); + dev->vanished = true; + } + if (dev->vanished) { + device_unregister(&dev->dev); + put_device(&dev->dev); + dev = NULL; + } + } if (!dev) xenbus_probe_node(bus, type, root); else diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 15319da65b7f..8ca15743af7f 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -80,6 +80,7 @@ struct xenbus_device { const char *devicetype; const char *nodename; const char *otherend; + bool vanished; int otherend_id; struct xenbus_watch otherend_watch; struct device dev; From c25c4aa3f79a488cc270507935a29c07dc6bddfc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 27 Feb 2026 18:53:06 +0000 Subject: [PATCH 719/828] arm64: mm: Add PTE_DIRTY back to PAGE_KERNEL* to fix kexec/hibernation Commit 143937ca51cc ("arm64, mm: avoid always making PTE dirty in pte_mkwrite()") changed pte_mkwrite_novma() to only clear PTE_RDONLY when PTE_DIRTY is set. This was to allow writable-clean PTEs for swap pages that haven't actually been written. However, this broke kexec and hibernation for some platforms. Both go through trans_pgd_create_copy() -> _copy_pte(), which calls pte_mkwrite_novma() to make the temporary linear-map copy fully writable. With the updated pte_mkwrite_novma(), read-only kernel pages (without PTE_DIRTY) remain read-only in the temporary mapping. While such behaviour is fine for user pages where hardware DBM or trapping will make them writeable, subsequent in-kernel writes by the kexec relocation code will fault. Add PTE_DIRTY back to all _PAGE_KERNEL* protection definitions. This was the case prior to 5.4, commit aa57157be69f ("arm64: Ensure VM_WRITE|VM_SHARED ptes are clean by default"). With the kernel linear-map PTEs always having PTE_DIRTY set, pte_mkwrite_novma() correctly clears PTE_RDONLY. Fixes: 143937ca51cc ("arm64, mm: avoid always making PTE dirty in pte_mkwrite()") Signed-off-by: Catalin Marinas Cc: stable@vger.kernel.org Reported-by: Jianpeng Chang Link: https://lore.kernel.org/r/20251204062722.3367201-1-jianpeng.chang.cn@windriver.com Cc: Will Deacon Cc: Huang, Ying Cc: Guenter Roeck Reviewed-by: Huang Ying Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2b32639160de..f560e6420267 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -50,11 +50,11 @@ #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define _PAGE_KERNEL (PROT_NORMAL) -#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) -#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) -#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN) -#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) +#define _PAGE_KERNEL (PROT_NORMAL | PTE_DIRTY) +#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY | PTE_DIRTY) +#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY | PTE_DIRTY) +#define _PAGE_KERNEL_EXEC ((PROT_NORMAL & ~PTE_PXN) | PTE_DIRTY) +#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT | PTE_DIRTY) #define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) From c3320153769f05fd7fe9d840cb555dd3080ae424 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Fri, 27 Feb 2026 19:19:28 -0500 Subject: [PATCH 720/828] nvme: fix memory allocation in nvme_pr_read_keys() nvme_pr_read_keys() takes num_keys from userspace and uses it to calculate the allocation size for rse via struct_size(). The upper limit is PR_KEYS_MAX (64K). A malicious or buggy userspace can pass a large num_keys value that results in a 4MB allocation attempt at most, causing a warning in the page allocator when the order exceeds MAX_PAGE_ORDER. To fix this, use kvzalloc() instead of kzalloc(). This bug has the same reasoning and fix with the patch below: https://lore.kernel.org/linux-block/20251212013510.3576091-1-kartikey406@gmail.com/ Warning log: WARNING: mm/page_alloc.c:5216 at __alloc_frozen_pages_noprof+0x5aa/0x2300 mm/page_alloc.c:5216, CPU#1: syz-executor117/272 Modules linked in: CPU: 1 UID: 0 PID: 272 Comm: syz-executor117 Not tainted 6.19.0 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:__alloc_frozen_pages_noprof+0x5aa/0x2300 mm/page_alloc.c:5216 Code: ff 83 bd a8 fe ff ff 0a 0f 86 69 fb ff ff 0f b6 1d f9 f9 c4 04 80 fb 01 0f 87 3b 76 30 ff 83 e3 01 75 09 c6 05 e4 f9 c4 04 01 <0f> 0b 48 c7 85 70 fe ff ff 00 00 00 00 e9 8f fd ff ff 31 c0 e9 0d RSP: 0018:ffffc90000fcf450 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 1ffff920001f9ea0 RDX: 0000000000000000 RSI: 000000000000000b RDI: 0000000000040dc0 RBP: ffffc90000fcf648 R08: ffff88800b6c3380 R09: 0000000000000001 R10: ffffc90000fcf840 R11: ffff88807ffad280 R12: 0000000000000000 R13: 0000000000040dc0 R14: 0000000000000001 R15: ffffc90000fcf620 FS: 0000555565db33c0(0000) GS:ffff8880be26c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000002000000c CR3: 0000000003b72000 CR4: 00000000000006f0 Call Trace: alloc_pages_mpol+0x236/0x4d0 mm/mempolicy.c:2486 alloc_frozen_pages_noprof+0x149/0x180 mm/mempolicy.c:2557 ___kmalloc_large_node+0x10c/0x140 mm/slub.c:5598 __kmalloc_large_node_noprof+0x25/0xc0 mm/slub.c:5629 __do_kmalloc_node mm/slub.c:5645 [inline] __kmalloc_noprof+0x483/0x6f0 mm/slub.c:5669 kmalloc_noprof include/linux/slab.h:961 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] nvme_pr_read_keys+0x8f/0x4c0 drivers/nvme/host/pr.c:245 blkdev_pr_read_keys block/ioctl.c:456 [inline] blkdev_common_ioctl+0x1b71/0x29b0 block/ioctl.c:730 blkdev_ioctl+0x299/0x700 block/ioctl.c:786 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl fs/ioctl.c:583 [inline] __x64_sys_ioctl+0x1bf/0x220 fs/ioctl.c:583 x64_sys_call+0x1280/0x21b0 mnt/fuzznvme_1/fuzznvme/linux-build/v6.19/./arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x71/0x330 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fb893d3108d Code: 28 c3 e8 46 1e 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffff61f2f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffff61f3138 RCX: 00007fb893d3108d RDX: 0000000020000040 RSI: 00000000c01070ce RDI: 0000000000000003 RBP: 0000000000000001 R08: 0000000000000000 R09: 00007ffff61f3138 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 00007ffff61f3128 R14: 00007fb893dae530 R15: 0000000000000001 Fixes: 5fd96a4e15de (nvme: Add pr_ops read_keys support) Acked-by: Chao Shi Acked-by: Weidong Zhu Acked-by: Dave Tian Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Sungwoo Kim Signed-off-by: Keith Busch --- drivers/nvme/host/pr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index ad2ecc2f49a9..fe7dbe264815 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -242,7 +242,7 @@ static int nvme_pr_read_keys(struct block_device *bdev, if (rse_len > U32_MAX) return -EINVAL; - rse = kzalloc(rse_len, GFP_KERNEL); + rse = kvzalloc(rse_len, GFP_KERNEL); if (!rse) return -ENOMEM; @@ -267,7 +267,7 @@ static int nvme_pr_read_keys(struct block_device *bdev, } free_rse: - kfree(rse); + kvfree(rse); return ret; } From 48084cc153a5b0fbf0aa98d47670d3be0b9f64d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Mar 2026 11:55:40 +0100 Subject: [PATCH 721/828] x86/numa: Store extra copy of numa_nodes_parsed The topology setup code needs to know the total number of physical nodes enumerated in SRAT; however NUMA_EMU can cause the existing numa_nodes_parsed bitmap to be fictitious. Therefore, keep a copy of the bitmap specifically to retain the physical node count. Suggested-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Tested-by: K Prateek Nayak Tested-by: Zhang Rui Tested-by: Chen Yu Tested-by: Kyle Meyer Link: https://patch.msgid.link/20260303110059.889884023@infradead.org --- arch/x86/include/asm/numa.h | 6 ++++++ arch/x86/mm/numa.c | 8 ++++++++ arch/x86/mm/srat.c | 2 ++ 3 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 53ba39ce010c..a9063f332fa6 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -22,6 +22,7 @@ extern int numa_off; */ extern s16 __apicid_to_node[MAX_LOCAL_APIC]; extern nodemask_t numa_nodes_parsed __initdata; +extern nodemask_t numa_phys_nodes_parsed __initdata; static inline void set_apicid_to_node(int apicid, s16 node) { @@ -48,6 +49,7 @@ extern void __init init_cpu_to_node(void); extern void numa_add_cpu(unsigned int cpu); extern void numa_remove_cpu(unsigned int cpu); extern void init_gi_nodes(void); +extern int num_phys_nodes(void); #else /* CONFIG_NUMA */ static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } @@ -55,6 +57,10 @@ static inline void init_cpu_to_node(void) { } static inline void numa_add_cpu(unsigned int cpu) { } static inline void numa_remove_cpu(unsigned int cpu) { } static inline void init_gi_nodes(void) { } +static inline int num_phys_nodes(void) +{ + return 1; +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DEBUG_PER_CPU_MAPS diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 7a97327140df..99d0a9332c14 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -48,6 +48,8 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; +nodemask_t numa_phys_nodes_parsed __initdata; + int numa_cpu_node(int cpu) { u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); @@ -57,6 +59,11 @@ int numa_cpu_node(int cpu) return NUMA_NO_NODE; } +int __init num_phys_nodes(void) +{ + return bitmap_weight(numa_phys_nodes_parsed.bits, MAX_NUMNODES); +} + cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); @@ -210,6 +217,7 @@ static int __init dummy_numa_init(void) 0LLU, PFN_PHYS(max_pfn) - 1); node_set(0, numa_nodes_parsed); + node_set(0, numa_phys_nodes_parsed); numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); return 0; diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 6f8e0f21c710..44ca66651756 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -57,6 +57,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) } set_apicid_to_node(apic_id, node); node_set(node, numa_nodes_parsed); + node_set(node, numa_phys_nodes_parsed); pr_debug("SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -97,6 +98,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) set_apicid_to_node(apic_id, node); node_set(node, numa_nodes_parsed); + node_set(node, numa_phys_nodes_parsed); pr_debug("SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } From ae6730ff42b3a13d94b405edeb5e40108b6d21b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Mar 2026 11:55:41 +0100 Subject: [PATCH 722/828] x86/topo: Add topology_num_nodes_per_package() Use the MADT and SRAT table data to compute __num_nodes_per_package. Specifically, SRAT has already been parsed in x86_numa_init(), which is called before acpi_boot_init() which parses MADT. So both are available in topology_init_possible_cpus(). This number is useful to divinate the various Intel CoD/SNC and AMD NPS modes, since the platforms are failing to provide this otherwise. Doing it this way is independent of the number of online CPUs and other such shenanigans. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Tested-by: Tony Luck Tested-by: K Prateek Nayak Tested-by: Zhang Rui Tested-by: Chen Yu Tested-by: Kyle Meyer Link: https://patch.msgid.link/20260303110100.004091624@infradead.org --- arch/x86/include/asm/topology.h | 6 ++++++ arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/topology.c | 13 +++++++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 1fadf0cf520c..0ba9bdb99871 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -155,6 +155,7 @@ extern unsigned int __max_logical_packages; extern unsigned int __max_threads_per_core; extern unsigned int __num_threads_per_package; extern unsigned int __num_cores_per_package; +extern unsigned int __num_nodes_per_package; const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c); @@ -179,6 +180,11 @@ static inline unsigned int topology_num_threads_per_package(void) return __num_threads_per_package; } +static inline unsigned int topology_num_nodes_per_package(void) +{ + return __num_nodes_per_package; +} + #ifdef CONFIG_X86_LOCAL_APIC int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); #else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1c3261cae40c..a8ff4376c286 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -95,6 +95,9 @@ EXPORT_SYMBOL(__max_dies_per_package); unsigned int __max_logical_packages __ro_after_init = 1; EXPORT_SYMBOL(__max_logical_packages); +unsigned int __num_nodes_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_nodes_per_package); + unsigned int __num_cores_per_package __ro_after_init = 1; EXPORT_SYMBOL(__num_cores_per_package); diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 23190a786d31..eafcb1fc185a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "cpu.h" @@ -492,11 +493,19 @@ void __init topology_init_possible_cpus(void) set_nr_cpu_ids(allowed); cnta = domain_weight(TOPO_PKG_DOMAIN); - cntb = domain_weight(TOPO_DIE_DOMAIN); __max_logical_packages = cnta; + + pr_info("Max. logical packages: %3u\n", __max_logical_packages); + + cntb = num_phys_nodes(); + __num_nodes_per_package = DIV_ROUND_UP(cntb, cnta); + + pr_info("Max. logical nodes: %3u\n", cntb); + pr_info("Num. nodes per package:%3u\n", __num_nodes_per_package); + + cntb = domain_weight(TOPO_DIE_DOMAIN); __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); - pr_info("Max. logical packages: %3u\n", cnta); pr_info("Max. logical dies: %3u\n", cntb); pr_info("Max. dies per package: %3u\n", __max_dies_per_package); From 717b64d58cff6fb97f97be07e382ed7641167a56 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Mar 2026 11:55:42 +0100 Subject: [PATCH 723/828] x86/topo: Replace x86_has_numa_in_package .. with the brand spanking new topology_num_nodes_per_package(). Having the topology setup determine this value during MADT/SRAT parsing before SMP bringup avoids having to detect this situation when building the SMP topology masks. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Tested-by: Tony Luck Tested-by: K Prateek Nayak Tested-by: Zhang Rui Tested-by: Chen Yu Tested-by: Kyle Meyer Link: https://patch.msgid.link/20260303110100.123701837@infradead.org --- arch/x86/kernel/smpboot.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5cd6950ab672..db3e481cdbb2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -468,13 +468,6 @@ static int x86_cluster_flags(void) } #endif -/* - * Set if a package/die has multiple NUMA nodes inside. - * AMD Magny-Cours, Intel Cluster-on-Die, and Intel - * Sub-NUMA Clustering have this. - */ -static bool x86_has_numa_in_package; - static struct sched_domain_topology_level x86_topology[] = { SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), #ifdef CONFIG_SCHED_CLUSTER @@ -496,7 +489,7 @@ static void __init build_sched_topology(void) * PKG domain since the NUMA domains will auto-magically create the * right spanning domains based on the SLIT. */ - if (x86_has_numa_in_package) { + if (topology_num_nodes_per_package() > 1) { unsigned int pkgdom = ARRAY_SIZE(x86_topology) - 2; memset(&x86_topology[pkgdom], 0, sizeof(x86_topology[pkgdom])); @@ -550,7 +543,7 @@ int arch_sched_node_distance(int from, int to) case INTEL_GRANITERAPIDS_X: case INTEL_ATOM_DARKMONT_X: - if (!x86_has_numa_in_package || topology_max_packages() == 1 || + if (topology_max_packages() == 1 || topology_num_nodes_per_package() == 1 || d < REMOTE_DISTANCE) return d; @@ -606,7 +599,7 @@ void set_cpu_sibling_map(int cpu) o = &cpu_data(i); if (match_pkg(c, o) && !topology_same_node(c, o)) - x86_has_numa_in_package = true; + WARN_ON_ONCE(topology_num_nodes_per_package() == 1); if ((i == cpu) || (has_smt && match_smt(c, o))) link_mask(topology_sibling_cpumask, cpu, i); From 528d89a4707e5bfd86e30823c45dbb66877df900 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Mar 2026 11:55:43 +0100 Subject: [PATCH 724/828] x86/topo: Fix SNC topology mess Per 4d6dd05d07d0 ("sched/topology: Fix sched domain build error for GNR, CWF in SNC-3 mode"), the original crazy SNC-3 SLIT table was: node distances: node 0 1 2 3 4 5 0: 10 15 17 21 28 26 1: 15 10 15 23 26 23 2: 17 15 10 26 23 21 3: 21 28 26 10 15 17 4: 23 26 23 15 10 15 5: 26 23 21 17 15 10 And per: https://lore.kernel.org/lkml/20250825075642.GQ3245006@noisy.programming.kicks-ass.net/ The suggestion was to average the off-trace clusters to restore sanity. However, 4d6dd05d07d0 implements this under various assumptions: - anything GNR/CWF with numa_in_package; - there will never be more than 2 packages; - the off-trace cluster will have distance >20 And then HPE shows up with a machine that matches the Vendor-Family-Model checks but looks like this: Here's an 8 socket (2 chassis) HPE system with SNC enabled: node 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0: 10 12 16 16 16 16 18 18 40 40 40 40 40 40 40 40 1: 12 10 16 16 16 16 18 18 40 40 40 40 40 40 40 40 2: 16 16 10 12 18 18 16 16 40 40 40 40 40 40 40 40 3: 16 16 12 10 18 18 16 16 40 40 40 40 40 40 40 40 4: 16 16 18 18 10 12 16 16 40 40 40 40 40 40 40 40 5: 16 16 18 18 12 10 16 16 40 40 40 40 40 40 40 40 6: 18 18 16 16 16 16 10 12 40 40 40 40 40 40 40 40 7: 18 18 16 16 16 16 12 10 40 40 40 40 40 40 40 40 8: 40 40 40 40 40 40 40 40 10 12 16 16 16 16 18 18 9: 40 40 40 40 40 40 40 40 12 10 16 16 16 16 18 18 10: 40 40 40 40 40 40 40 40 16 16 10 12 18 18 16 16 11: 40 40 40 40 40 40 40 40 16 16 12 10 18 18 16 16 12: 40 40 40 40 40 40 40 40 16 16 18 18 10 12 16 16 13: 40 40 40 40 40 40 40 40 16 16 18 18 12 10 16 16 14: 40 40 40 40 40 40 40 40 18 18 16 16 16 16 10 12 15: 40 40 40 40 40 40 40 40 18 18 16 16 16 16 12 10 10 = Same chassis and socket 12 = Same chassis and socket (SNC) 16 = Same chassis and adjacent socket 18 = Same chassis and non-adjacent socket 40 = Different chassis Turns out, the 'max 2 packages' thing is only relevant to the SNC-3 parts, the smaller parts do 8 sockets (like usual). The above SLIT table is sane, but violates the previous assumptions and trips a WARN. Now that the topology code has a sensible measure of nodes-per-package, we can use that to divinate the SNC mode at hand, and only fix up SNC-3 topologies. There is a 'healthy' amount of paranoia code validating the assumptions on the SLIT table, a simple pr_err(FW_BUG) print on failure and a fallback to using the regular table. Lets see how long this lasts :-) Fixes: 4d6dd05d07d0 ("sched/topology: Fix sched domain build error for GNR, CWF in SNC-3 mode") Reported-by: Kyle Meyer Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Tested-by: K Prateek Nayak Tested-by: Zhang Rui Tested-by: Chen Yu Tested-by: Kyle Meyer Link: https://patch.msgid.link/20260303110100.238361290@infradead.org --- arch/x86/kernel/smpboot.c | 188 ++++++++++++++++++++++++++++---------- 1 file changed, 142 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index db3e481cdbb2..294a8ea60298 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -506,33 +506,149 @@ static void __init build_sched_topology(void) } #ifdef CONFIG_NUMA -static int sched_avg_remote_distance; -static int avg_remote_numa_distance(void) +/* + * Test if the on-trace cluster at (N,N) is symmetric. + * Uses upper triangle iteration to avoid obvious duplicates. + */ +static bool slit_cluster_symmetric(int N) { - int i, j; - int distance, nr_remote, total_distance; + int u = topology_num_nodes_per_package(); - if (sched_avg_remote_distance > 0) - return sched_avg_remote_distance; - - nr_remote = 0; - total_distance = 0; - for_each_node_state(i, N_CPU) { - for_each_node_state(j, N_CPU) { - distance = node_distance(i, j); - - if (distance >= REMOTE_DISTANCE) { - nr_remote++; - total_distance += distance; - } + for (int k = 0; k < u; k++) { + for (int l = k; l < u; l++) { + if (node_distance(N + k, N + l) != + node_distance(N + l, N + k)) + return false; } } - if (nr_remote) - sched_avg_remote_distance = total_distance / nr_remote; - else - sched_avg_remote_distance = REMOTE_DISTANCE; - return sched_avg_remote_distance; + return true; +} + +/* + * Return the package-id of the cluster, or ~0 if indeterminate. + * Each node in the on-trace cluster should have the same package-id. + */ +static u32 slit_cluster_package(int N) +{ + int u = topology_num_nodes_per_package(); + u32 pkg_id = ~0; + + for (int n = 0; n < u; n++) { + const struct cpumask *cpus = cpumask_of_node(N + n); + int cpu; + + for_each_cpu(cpu, cpus) { + u32 id = topology_logical_package_id(cpu); + + if (pkg_id == ~0) + pkg_id = id; + if (pkg_id != id) + return ~0; + } + } + + return pkg_id; +} + +/* + * Validate the SLIT table is of the form expected for SNC, specifically: + * + * - each on-trace cluster should be symmetric, + * - each on-trace cluster should have a unique package-id. + * + * If you NUMA_EMU on top of SNC, you get to keep the pieces. + */ +static bool slit_validate(void) +{ + int u = topology_num_nodes_per_package(); + u32 pkg_id, prev_pkg_id = ~0; + + for (int pkg = 0; pkg < topology_max_packages(); pkg++) { + int n = pkg * u; + + /* + * Ensure the on-trace cluster is symmetric and each cluster + * has a different package id. + */ + if (!slit_cluster_symmetric(n)) + return false; + pkg_id = slit_cluster_package(n); + if (pkg_id == ~0) + return false; + if (pkg && pkg_id == prev_pkg_id) + return false; + + prev_pkg_id = pkg_id; + } + + return true; +} + +/* + * Compute a sanitized SLIT table for SNC; notably SNC-3 can end up with + * asymmetric off-trace clusters, reflecting physical assymmetries. However + * this leads to 'unfortunate' sched_domain configurations. + * + * For example dual socket GNR with SNC-3: + * + * node distances: + * node 0 1 2 3 4 5 + * 0: 10 15 17 21 28 26 + * 1: 15 10 15 23 26 23 + * 2: 17 15 10 26 23 21 + * 3: 21 28 26 10 15 17 + * 4: 23 26 23 15 10 15 + * 5: 26 23 21 17 15 10 + * + * Fix things up by averaging out the off-trace clusters; resulting in: + * + * node 0 1 2 3 4 5 + * 0: 10 15 17 24 24 24 + * 1: 15 10 15 24 24 24 + * 2: 17 15 10 24 24 24 + * 3: 24 24 24 10 15 17 + * 4: 24 24 24 15 10 15 + * 5: 24 24 24 17 15 10 + */ +static int slit_cluster_distance(int i, int j) +{ + static int slit_valid = -1; + int u = topology_num_nodes_per_package(); + long d = 0; + int x, y; + + if (slit_valid < 0) { + slit_valid = slit_validate(); + if (!slit_valid) + pr_err(FW_BUG "SLIT table doesn't have the expected form for SNC -- fixup disabled!\n"); + else + pr_info("Fixing up SNC SLIT table.\n"); + } + + /* + * Is this a unit cluster on the trace? + */ + if ((i / u) == (j / u) || !slit_valid) + return node_distance(i, j); + + /* + * Off-trace cluster. + * + * Notably average out the symmetric pair of off-trace clusters to + * ensure the resulting SLIT table is symmetric. + */ + x = i - (i % u); + y = j - (j % u); + + for (i = x; i < x + u; i++) { + for (j = y; j < y + u; j++) { + d += node_distance(i, j); + d += node_distance(j, i); + } + } + + return d / (2*u*u); } int arch_sched_node_distance(int from, int to) @@ -542,34 +658,14 @@ int arch_sched_node_distance(int from, int to) switch (boot_cpu_data.x86_vfm) { case INTEL_GRANITERAPIDS_X: case INTEL_ATOM_DARKMONT_X: - - if (topology_max_packages() == 1 || topology_num_nodes_per_package() == 1 || - d < REMOTE_DISTANCE) + if (topology_max_packages() == 1 || + topology_num_nodes_per_package() < 3) return d; /* - * With SNC enabled, there could be too many levels of remote - * NUMA node distances, creating NUMA domain levels - * including local nodes and partial remote nodes. - * - * Trim finer distance tuning for NUMA nodes in remote package - * for the purpose of building sched domains. Group NUMA nodes - * in the remote package in the same sched group. - * Simplify NUMA domains and avoid extra NUMA levels including - * different remote NUMA nodes and local nodes. - * - * GNR and CWF don't expect systems with more than 2 packages - * and more than 2 hops between packages. Single average remote - * distance won't be appropriate if there are more than 2 - * packages as average distance to different remote packages - * could be different. + * Handle SNC-3 asymmetries. */ - WARN_ONCE(topology_max_packages() > 2, - "sched: Expect only up to 2 packages for GNR or CWF, " - "but saw %d packages when building sched domains.", - topology_max_packages()); - - d = avg_remote_numa_distance(); + return slit_cluster_distance(from, to); } return d; } From 59674fc9d0bfd96ce8a776680ee1cf22c28c9ac7 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 3 Mar 2026 11:55:44 +0100 Subject: [PATCH 725/828] x86/resctrl: Fix SNC detection Now that the x86 topology code has a sensible nodes-per-package measure, that does not depend on the online status of CPUs, use this to divinate the SNC mode. Note that when Cluster on Die (CoD) is configured on older systems this will also show multiple NUMA nodes per package. Intel Resource Director Technology is incomaptible with CoD. Print a warning and do not use the fixup MSR_RMID_SNC_CONFIG. Signed-off-by: Tony Luck Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Tested-by: Zhang Rui Tested-by: Chen Yu Link: https://patch.msgid.link/aaCxbbgjL6OZ6VMd@agluck-desk3 Link: https://patch.msgid.link/20260303110100.367976706@infradead.org --- arch/x86/kernel/cpu/resctrl/monitor.c | 36 ++++----------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e6a154240b8d..9bd87bae4983 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -364,7 +364,7 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d) msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); } -/* CPU models that support MSR_RMID_SNC_CONFIG */ +/* CPU models that support SNC and MSR_RMID_SNC_CONFIG */ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_ICELAKE_X, 0), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0), @@ -375,40 +375,14 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { {} }; -/* - * There isn't a simple hardware bit that indicates whether a CPU is running - * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the - * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in - * the same NUMA node as CPU0. - * It is not possible to accurately determine SNC state if the system is - * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes - * to L3 caches. It will be OK if system is booted with hyperthreading - * disabled (since this doesn't affect the ratio). - */ static __init int snc_get_config(void) { - struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); - const cpumask_t *node0_cpumask; - int cpus_per_node, cpus_per_l3; - int ret; + int ret = topology_num_nodes_per_package(); - if (!x86_match_cpu(snc_cpu_ids) || !ci) + if (ret > 1 && !x86_match_cpu(snc_cpu_ids)) { + pr_warn("CoD enabled system? Resctrl not supported\n"); return 1; - - cpus_read_lock(); - if (num_online_cpus() != num_present_cpus()) - pr_warn("Some CPUs offline, SNC detection may be incorrect\n"); - cpus_read_unlock(); - - node0_cpumask = cpumask_of_node(cpu_to_node(0)); - - cpus_per_node = cpumask_weight(node0_cpumask); - cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map); - - if (!cpus_per_node || !cpus_per_l3) - return 1; - - ret = cpus_per_l3 / cpus_per_node; + } /* sanity check: Only valid results are 1, 2, 3, 4, 6 */ switch (ret) { From d658686a1331db3bb108ca079d76deb3208ed949 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 2 Mar 2026 16:45:40 +0100 Subject: [PATCH 726/828] sched/deadline: Fix missing ENQUEUE_REPLENISH during PI de-boosting Running stress-ng --schedpolicy 0 on an RT kernel on a big machine might lead to the following WARNINGs (edited). sched: DL de-boosted task PID 22725: REPLENISH flag missing WARNING: CPU: 93 PID: 0 at kernel/sched/deadline.c:239 dequeue_task_dl+0x15c/0x1f8 ... (running_bw underflow) Call trace: dequeue_task_dl+0x15c/0x1f8 (P) dequeue_task+0x80/0x168 deactivate_task+0x24/0x50 push_dl_task+0x264/0x2e0 dl_task_timer+0x1b0/0x228 __hrtimer_run_queues+0x188/0x378 hrtimer_interrupt+0xfc/0x260 ... The problem is that when a SCHED_DEADLINE task (lock holder) is changed to a lower priority class via sched_setscheduler(), it may fail to properly inherit the parameters of potential DEADLINE donors if it didn't already inherit them in the past (shorter deadline than donor's at that time). This might lead to bandwidth accounting corruption, as enqueue_task_dl() won't recognize the lock holder as boosted. The scenario occurs when: 1. A DEADLINE task (donor) blocks on a PI mutex held by another DEADLINE task (holder), but the holder doesn't inherit parameters (e.g., it already has a shorter deadline) 2. sched_setscheduler() changes the holder from DEADLINE to a lower class while still holding the mutex 3. The holder should now inherit DEADLINE parameters from the donor and be enqueued with ENQUEUE_REPLENISH, but this doesn't happen Fix the issue by introducing __setscheduler_dl_pi(), which detects when a DEADLINE (proper or boosted) task gets setscheduled to a lower priority class. In case, the function makes the task inherit DEADLINE parameters of the donoer (pi_se) and sets ENQUEUE_REPLENISH flag to ensure proper bandwidth accounting during the next enqueue operation. Fixes: 2279f540ea7d ("sched/deadline: Fix priority inheritance with multiple scheduling classes") Reported-by: Bruno Goncalves Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260302-upstream-fix-deadline-piboost-b4-v3-1-6ba32184a9e0@redhat.com --- kernel/sched/syscalls.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 6f10db3646e7..cadb0e9fe19b 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -284,6 +284,35 @@ static bool check_same_owner(struct task_struct *p) uid_eq(cred->euid, pcred->uid)); } +#ifdef CONFIG_RT_MUTEXES +static inline void __setscheduler_dl_pi(int newprio, int policy, + struct task_struct *p, + struct sched_change_ctx *scope) +{ + /* + * In case a DEADLINE task (either proper or boosted) gets + * setscheduled to a lower priority class, check if it neeeds to + * inherit parameters from a potential pi_task. In that case make + * sure replenishment happens with the next enqueue. + */ + + if (dl_prio(newprio) && !dl_policy(policy)) { + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + if (pi_task) { + p->dl.pi_se = pi_task->dl.pi_se; + scope->flags |= ENQUEUE_REPLENISH; + } + } +} +#else /* !CONFIG_RT_MUTEXES */ +static inline void __setscheduler_dl_pi(int newprio, int policy, + struct task_struct *p, + struct sched_change_ctx *scope) +{ +} +#endif /* !CONFIG_RT_MUTEXES */ + #ifdef CONFIG_UCLAMP_TASK static int uclamp_validate(struct task_struct *p, @@ -655,6 +684,7 @@ change: __setscheduler_params(p, attr); p->sched_class = next_class; p->prio = newprio; + __setscheduler_dl_pi(newprio, policy, p, scope); } __setscheduler_uclamp(p, attr); From b5ef09a77d0b5213268300eedd8a7d28b4e92d47 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 26 Feb 2026 17:03:07 -0800 Subject: [PATCH 727/828] x86/entry/vdso32: Work around libgcc unwinder bug The unwinder code in libgcc has a long standing bug which causes it to fail to pick up the signal frame CFI flag. This is a generic bug across all platforms. It affects the __kernel_sigreturn and __kernel_rt_sigreturn vdso entry points on i386. The x86-64 kernel doesn't provide a sigreturn stub, and so there is no kernel-provided code that is affected on x86-64. libgcc does have a legacy fallback path which happens to work as long as the bytes immediately before each of the sigreturn functions fall outside any function. This patch adds a nop before the ALIGN to each of the sigreturn stubs to ensure that this is, indeed, the case. The rest of the patch is just a comment which documents the invariants that need to be maintained for this legacy path to work correctly. This is a manifest bug: in the current vdso, __kernel_vsyscall is a multiple of 16 bytes long and thus __kernel_sigreturn does not have any padding in front of it. Closes: https://lore.kernel.org/lkml/f3412cc3e8f66d1853cc9d572c0f2fab076872b1.camel@xry111.site Fixes: 884961618ee5 ("x86/entry/vdso32: Remove open-coded DWARF in sigreturn.S") Reported-by: Xi Ruoyao Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Peter Zijlstra (Intel) Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124050 Link: https://patch.msgid.link/20260227010308.310342-1-hpa@zytor.com --- arch/x86/entry/vdso/vdso32/sigreturn.S | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index b433353bc8e3..b33fcc501ba3 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -35,9 +35,38 @@ #endif .endm +/* + * WARNING: + * + * A bug in the libgcc unwinder as of at least gcc 15.2 (2026) means that + * the unwinder fails to recognize the signal frame flag. + * + * There is a hacky legacy fallback path in libgcc which ends up + * getting invoked instead. It happens to work as long as BOTH of the + * following conditions are true: + * + * 1. There is at least one byte before the each of the sigreturn + * functions which falls outside any function. This is enforced by + * an explicit nop instruction before the ALIGN. + * 2. The code sequences between the entry point up to and including + * the int $0x80 below need to match EXACTLY. Do not change them + * in any way. The exact byte sequences are: + * + * __kernel_sigreturn: + * 0: 58 pop %eax + * 1: b8 77 00 00 00 mov $0x77,%eax + * 6: cd 80 int $0x80 + * + * __kernel_rt_sigreturn: + * 0: b8 ad 00 00 00 mov $0xad,%eax + * 5: cd 80 int $0x80 + * + * For details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124050 + */ .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function + nop /* libgcc hack: see comment above */ ALIGN __kernel_sigreturn: STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext @@ -52,6 +81,7 @@ SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) .globl __kernel_rt_sigreturn .type __kernel_rt_sigreturn,@function + nop /* libgcc hack: see comment above */ ALIGN __kernel_rt_sigreturn: STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext From 340cea84f691c5206561bb2e0147158fe02070be Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 4 Mar 2026 18:15:53 +0530 Subject: [PATCH 728/828] cifs: open files should not hold ref on superblock Today whenever we deal with a file, in addition to holding a reference on the dentry, we also get a reference on the superblock. This happens in two cases: 1. when a new cinode is allocated 2. when an oplock break is being processed The reasoning for holding the superblock ref was to make sure that when umount happens, if there are users of inodes and dentries, it does not try to clean them up and wait for the last ref to superblock to be dropped by last of such users. But the side effect of doing that is that umount silently drops a ref on the superblock and we could have deferred closes and lease breaks still holding these refs. Ideally, we should ensure that all of these users of inodes and dentries are cleaned up at the time of umount, which is what this code is doing. This code change allows these code paths to use a ref on the dentry (and hence the inode). That way, umount is ensured to clean up SMB client resources when it's the last ref on the superblock (For ex: when same objects are shared). The code change also moves the call to close all the files in deferred close list to the umount code path. It also waits for oplock_break workers to be flushed before calling kill_anon_super (which eventually frees up those objects). Fixes: 24261fc23db9 ("cifs: delay super block destruction until all cifsFileInfo objects are gone") Fixes: 705c79101ccf ("smb: client: fix use-after-free in cifs_oplock_break") Cc: Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 7 +++++-- fs/smb/client/cifsproto.h | 1 + fs/smb/client/file.c | 11 ---------- fs/smb/client/misc.c | 42 +++++++++++++++++++++++++++++++++++++++ fs/smb/client/trace.h | 2 ++ 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 427558404aa5..b6e3db993cc6 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -332,10 +332,14 @@ static void cifs_kill_sb(struct super_block *sb) /* * We need to release all dentries for the cached directories - * before we kill the sb. + * and close all deferred file handles before we kill the sb. */ if (cifs_sb->root) { close_all_cached_dirs(cifs_sb); + cifs_close_all_deferred_files_sb(cifs_sb); + + /* Wait for all pending oplock breaks to complete */ + flush_workqueue(cifsoplockd_wq); /* finally release root dentry */ dput(cifs_sb->root); @@ -868,7 +872,6 @@ static void cifs_umount_begin(struct super_block *sb) spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); - cifs_close_all_deferred_files(tcon); /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ if (tcon->ses && tcon->ses->server) { diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 96d6b5325aa3..800a7e418c32 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -261,6 +261,7 @@ void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode); void cifs_close_all_deferred_files(struct cifs_tcon *tcon); +void cifs_close_all_deferred_files_sb(struct cifs_sb_info *cifs_sb); void cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, struct dentry *dentry); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index f3ddcdf406c8..cffcf82c1b69 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -711,8 +711,6 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); - cifs_sb_active(inode->i_sb); - /* * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. @@ -767,7 +765,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) struct inode *inode = d_inode(cifs_file->dentry); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsLockInfo *li, *tmp; - struct super_block *sb = inode->i_sb; /* * Delete any outstanding lock records. We'll lose them when the file @@ -785,7 +782,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) cifs_put_tlink(cifs_file->tlink); dput(cifs_file->dentry); - cifs_sb_deactive(sb); kfree(cifs_file->symlink_target); kfree(cifs_file); } @@ -3163,12 +3159,6 @@ void cifs_oplock_break(struct work_struct *work) __u64 persistent_fid, volatile_fid; __u16 net_fid; - /* - * Hold a reference to the superblock to prevent it and its inodes from - * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put() - * may release the last reference to the sb and trigger inode eviction. - */ - cifs_sb_active(sb); wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); @@ -3253,7 +3243,6 @@ oplock_break_ack: cifs_put_tlink(tlink); out: cifs_done_oplock_break(cinode); - cifs_sb_deactive(sb); } static int cifs_swap_activate(struct swap_info_struct *sis, diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index bc24c92b8b95..2aff1cab6c31 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -28,6 +28,11 @@ #include "fs_context.h" #include "cached_dir.h" +struct tcon_list { + struct list_head entry; + struct cifs_tcon *tcon; +}; + /* The xid serves as a useful identifier for each incoming vfs request, in a similar way to the mid which is useful to track each sent smb, and CurrentXid can also provide a running counter (although it @@ -554,6 +559,43 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) } } +void cifs_close_all_deferred_files_sb(struct cifs_sb_info *cifs_sb) +{ + struct rb_root *root = &cifs_sb->tlink_tree; + struct rb_node *node; + struct cifs_tcon *tcon; + struct tcon_link *tlink; + struct tcon_list *tmp_list, *q; + LIST_HEAD(tcon_head); + + spin_lock(&cifs_sb->tlink_tree_lock); + for (node = rb_first(root); node; node = rb_next(node)) { + tlink = rb_entry(node, struct tcon_link, tl_rbnode); + tcon = tlink_tcon(tlink); + if (IS_ERR(tcon)) + continue; + tmp_list = kmalloc_obj(struct tcon_list, GFP_ATOMIC); + if (tmp_list == NULL) + break; + tmp_list->tcon = tcon; + /* Take a reference on tcon to prevent it from being freed */ + spin_lock(&tcon->tc_lock); + ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_close_defer_files); + spin_unlock(&tcon->tc_lock); + list_add_tail(&tmp_list->entry, &tcon_head); + } + spin_unlock(&cifs_sb->tlink_tree_lock); + + list_for_each_entry_safe(tmp_list, q, &tcon_head, entry) { + cifs_close_all_deferred_files(tmp_list->tcon); + list_del(&tmp_list->entry); + cifs_put_tcon(tmp_list->tcon, netfs_trace_tcon_ref_put_close_defer_files); + kfree(tmp_list); + } +} + void cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, struct dentry *dentry) { diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 9228f95cae2b..acfbb63086ea 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -176,6 +176,7 @@ EM(netfs_trace_tcon_ref_get_cached_laundromat, "GET Ch-Lau") \ EM(netfs_trace_tcon_ref_get_cached_lease_break, "GET Ch-Lea") \ EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \ + EM(netfs_trace_tcon_ref_get_close_defer_files, "GET Cl-Def") \ EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \ EM(netfs_trace_tcon_ref_get_find, "GET Find ") \ EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \ @@ -187,6 +188,7 @@ EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \ EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \ EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \ + EM(netfs_trace_tcon_ref_put_close_defer_files, "PUT Cl-Def") \ EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \ EM(netfs_trace_tcon_ref_put_dfs_refer, "PUT DfsRfr") \ EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \ From 0100e495cdf0436bd4ed2dc034d385b44cb5993c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 21 Feb 2026 10:38:47 +0800 Subject: [PATCH 729/828] arm64: make runtime const not usable by modules Similar as commit 284922f4c563 ("x86: uaccess: don't use runtime-const rewriting in modules") does, make arm64's runtime const not usable by modules too, to "make sure this doesn't get forgotten the next time somebody wants to do runtime constant optimizations". The reason is well explained in the above commit: "The runtime-const infrastructure was never designed to handle the modular case, because the constant fixup is only done at boot time for core kernel code." Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon --- arch/arm64/include/asm/runtime-const.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h index be5915669d23..c3dbd3ae68f6 100644 --- a/arch/arm64/include/asm/runtime-const.h +++ b/arch/arm64/include/asm/runtime-const.h @@ -2,6 +2,10 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #include /* Sigh. You can still run arm64 in BE mode */ From 68785c5e79e0fc1eacf63026fbba32be3867f410 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 25 Feb 2026 22:51:06 -0500 Subject: [PATCH 730/828] drm/amd/pm: remove invalid gpu_metrics.energy_accumulator on smu v13.0.x v1: The metrics->EnergyAccumulator field has been deprecated on newer pmfw. v2: add smu 13.0.0/13.0.7/13.0.10 support. Signed-off-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 8de9edb35976fa56565dc8fbb5d1310e8e10187c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +++++++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e030f1e186cb..b32c053950c9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2034,6 +2034,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, smu, SMU_DRIVER_TABLE_GPU_METRICS); SmuMetricsExternal_t metrics_ext; SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + uint32_t mp1_ver = amdgpu_ip_version(smu->adev, MP1_HWIP, 0); int ret = 0; ret = smu_cmn_get_metrics_table(smu, @@ -2058,7 +2059,12 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if ((mp1_ver == IP_VERSION(13, 0, 0) && smu->smc_fw_version <= 0x004e1e00) || + (mp1_ver == IP_VERSION(13, 0, 10) && smu->smc_fw_version <= 0x00500800)) + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + else + gpu_metrics->energy_accumulator = UINT_MAX; if (metrics->AverageGfxActivity <= SMU_13_0_0_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index af0482c9caa7..f08cfa510a8a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2065,7 +2065,8 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + gpu_metrics->energy_accumulator = smu->smc_fw_version <= 0x00521400 ? + metrics->EnergyAccumulator : UINT_MAX; if (metrics->AverageGfxActivity <= SMU_13_0_7_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; From 2c1030f2e84885cc58bffef6af67d5b9d2e7098f Mon Sep 17 00:00:00 2001 From: Alysa Liu Date: Thu, 5 Feb 2026 11:21:45 -0500 Subject: [PATCH 731/828] drm/amdgpu: Fix use-after-free race in VM acquire Replace non-atomic vm->process_info assignment with cmpxchg() to prevent race when parent/child processes sharing a drm_file both try to acquire the same VM after fork(). Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alysa Liu Signed-off-by: Alex Deucher (cherry picked from commit c7c573275ec20db05be769288a3e3bb2250ec618) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 06c1913d5a3f..29b400cdd6d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1439,7 +1439,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, *process_info = info; } - vm->process_info = *process_info; + if (cmpxchg(&vm->process_info, NULL, *process_info) != NULL) { + ret = -EINVAL; + goto already_acquired; + } /* Validate page directory and attach eviction fence */ ret = amdgpu_bo_reserve(vm->root.bo, true); @@ -1479,6 +1482,7 @@ validate_pd_fail: amdgpu_bo_unreserve(vm->root.bo); reserve_pd_fail: vm->process_info = NULL; +already_acquired: if (info) { dma_fence_put(&info->eviction_fence->base); *process_info = NULL; From a145bbff6f53ab80757a15eba5ad2ba8e3bdc9dc Mon Sep 17 00:00:00 2001 From: sguttula Date: Wed, 25 Feb 2026 13:57:01 +0530 Subject: [PATCH 732/828] drm/amdgpu/psp: Use Indirect access address for GFX to PSP mailbox The reason the RAP is not granting access to 0x58200 is that a dedicated RSMU slot would have to be spent for this address range, and MPASP is close to running out of RSMU slots. This will help to fix PSP TOC load failure during secureboot. GFX Driver Need to use indirect access for SMN address regs. Signed-off-by: sguttula Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 9b822e26eea3899003aa8a89d5e2c4408e066e20) --- drivers/gpu/drm/amd/amdgpu/psp_v15_0.c | 20 +++++++++---------- .../include/asic_reg/mp/mp_15_0_0_offset.h | 18 +++++++++++++++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c index 723ddae17644..73a709773e85 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c @@ -69,12 +69,12 @@ static int psp_v15_0_0_ring_stop(struct psp_context *psp, 0x80000000, 0x80000000, false); } else { /* Write the ring destroy command*/ - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x80000000, false); } @@ -116,7 +116,7 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp, } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x80000000, false); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); @@ -125,23 +125,23 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp, /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_69, psp_ring_reg); /* Write high address of the ring to C2PMSG_70 */ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_70, psp_ring_reg); /* Write size of ring to C2PMSG_71 */ psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_71, psp_ring_reg); /* Write the ring initialization command to C2PMSG_64 */ psp_ring_reg = ring_type; psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x8000FFFF, false); } @@ -174,7 +174,7 @@ static uint32_t psp_v15_0_0_ring_get_wptr(struct psp_context *psp) if (amdgpu_sriov_vf(adev)) data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); else - data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + data = RREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67); return data; } @@ -188,7 +188,7 @@ static void psp_v15_0_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); } else - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67, value); } static const struct psp_funcs psp_v15_0_0_funcs = { diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h index 0e4c195297a4..fe97943b9b97 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h @@ -82,6 +82,24 @@ #define regMPASP_SMN_IH_SW_INT_CTRL 0x0142 #define regMPASP_SMN_IH_SW_INT_CTRL_BASE_IDX 0 +// addressBlock: mp_SmuMpASPPub_PcruDec +// base address: 0x3800000 +#define regMPASP_PCRU1_MPASP_C2PMSG_64 0x4280 +#define regMPASP_PCRU1_MPASP_C2PMSG_64_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_65 0x4281 +#define regMPASP_PCRU1_MPASP_C2PMSG_65_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_66 0x4282 +#define regMPASP_PCRU1_MPASP_C2PMSG_66_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_67 0x4283 +#define regMPASP_PCRU1_MPASP_C2PMSG_67_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_68 0x4284 +#define regMPASP_PCRU1_MPASP_C2PMSG_68_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_69 0x4285 +#define regMPASP_PCRU1_MPASP_C2PMSG_69_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_70 0x4286 +#define regMPASP_PCRU1_MPASP_C2PMSG_70_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_71 0x4287 +#define regMPASP_PCRU1_MPASP_C2PMSG_71_BASE_IDX 3 // addressBlock: mp_SmuMp1_SmnDec // base address: 0x0 From 048c1c4e51715ffddd4189745c07f530f34fbe37 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Feb 2026 12:41:32 +0000 Subject: [PATCH 733/828] drm/amdgpu/userq: Consolidate wait ioctl exit path If we gate the fence destruction with a check telling us whether there are valid pointers in there we can eliminate the need for dual, basically identical, exit paths. Reviewed-by: Alex Deucher Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher (cherry picked from commit bea29bb0dd29012949cd44fdb122465a9fd5cf91) --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 28 ++++--------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 7e9cf1868cc9..77969a6017a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -983,32 +983,14 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, r = -EFAULT; goto free_fences; } - - kfree(fences); - kfree(fence_info); } - drm_exec_fini(&exec); - for (i = 0; i < num_read_bo_handles; i++) - drm_gem_object_put(gobj_read[i]); - kfree(gobj_read); - - for (i = 0; i < num_write_bo_handles; i++) - drm_gem_object_put(gobj_write[i]); - kfree(gobj_write); - - kfree(timeline_points); - kfree(timeline_handles); - kfree(syncobj_handles); - kfree(bo_handles_write); - kfree(bo_handles_read); - - return 0; - free_fences: - while (num_fences-- > 0) - dma_fence_put(fences[num_fences]); - kfree(fences); + if (fences) { + while (num_fences-- > 0) + dma_fence_put(fences[num_fences]); + kfree(fences); + } free_fence_info: kfree(fence_info); exec_fini: From 65b5c326ce4103620c977b8dcb1699bdac4da143 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 2 Mar 2026 18:50:46 +0530 Subject: [PATCH 734/828] drm/amdgpu/userq: refcount userqueues to avoid any race conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid race condition and avoid UAF cases, implement kref based queues and protect the below operations using xa lock a. Getting a queue from xarray b. Increment/Decrement it's refcount Every time some one want to access a queue, always get via amdgpu_userq_get to make sure we have locks in place and get the object if active. A userqueue is destroyed on the last refcount is dropped which typically would be via IOCTL or during fini. v2: Add the missing drop in one the condition in the signal ioclt [Alex] v3: remove the queue from the xarray first in the free queue ioctl path [Christian] - Pass queue to the amdgpu_userq_put directly. - make amdgpu_userq_put xa_lock free since we are doing put for each get only and final put is done via destroy and we remove the queue from xa with lock. - use userq_put in fini too so cleanup is done fully. v4: Use xa_erase directly rather than doing load and erase in free ioctl. Also remove some of the error logs which could be exploited by the user to flood the logs [Christian] Signed-off-by: Sunil Khatri Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 4952189b284d4d847f92636bb42dd747747129c0) Cc: # 048c1c4e5171: drm/amdgpu/userq: Consolidate wait ioctl exit path Cc: --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 116 ++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 4 + .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 14 ++- 3 files changed, 95 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 9d67b770bcc2..7c450350847d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) return ret; } -static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, - int queue_id) +static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; @@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, uq_funcs->mqd_destroy(queue); amdgpu_userq_fence_driver_free(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ - xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id); xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); queue->userq_mgr = NULL; list_del(&queue->userq_va_list); @@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, up_read(&adev->reset_domain->sem); } -static struct amdgpu_usermode_queue * -amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) -{ - return xa_load(&uq_mgr->userq_xa, qid); -} - void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr) @@ -625,22 +617,13 @@ unref_bo: } static int -amdgpu_userq_destroy(struct drm_file *filp, int queue_id) +amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - struct amdgpu_fpriv *fpriv = filp->driver_priv; - struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_usermode_queue *queue; int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); - queue = amdgpu_userq_find(uq_mgr, queue_id); - if (!queue) { - drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); - mutex_unlock(&uq_mgr->userq_mutex); - return -EINVAL; - } amdgpu_userq_wait_for_last_fence(queue); /* Cancel any pending hang detection work and cleanup */ if (queue->hang_detect_fence) { @@ -672,7 +655,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); queue->state = AMDGPU_USERQ_STATE_HUNG; } - amdgpu_userq_cleanup(queue, queue_id); + amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -680,6 +663,37 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) return r; } +static void amdgpu_userq_kref_destroy(struct kref *kref) +{ + int r; + struct amdgpu_usermode_queue *queue = + container_of(kref, struct amdgpu_usermode_queue, refcount); + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; + + r = amdgpu_userq_destroy(uq_mgr, queue); + if (r) + drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r); +} + +struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid) +{ + struct amdgpu_usermode_queue *queue; + + xa_lock(&uq_mgr->userq_xa); + queue = xa_load(&uq_mgr->userq_xa, qid); + if (queue) + kref_get(&queue->refcount); + xa_unlock(&uq_mgr->userq_xa); + + return queue; +} + +void amdgpu_userq_put(struct amdgpu_usermode_queue *queue) +{ + if (queue) + kref_put(&queue->refcount, amdgpu_userq_kref_destroy); +} + static int amdgpu_userq_priority_permit(struct drm_file *filp, int priority) { @@ -834,6 +848,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } + /* drop this refcount during queue destroy */ + kref_init(&queue->refcount); + /* Wait for mode-1 reset to complete */ down_read(&adev->reset_domain->sem); r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); @@ -985,7 +1002,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_userq *args = data; - int r; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_usermode_queue *queue; + int r = 0; if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; @@ -1000,11 +1019,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, drm_file_err(filp, "Failed to create usermode queue\n"); break; - case AMDGPU_USERQ_OP_FREE: - r = amdgpu_userq_destroy(filp, args->in.queue_id); - if (r) - drm_file_err(filp, "Failed to destroy usermode queue\n"); + case AMDGPU_USERQ_OP_FREE: { + xa_lock(&fpriv->userq_mgr.userq_xa); + queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id); + xa_unlock(&fpriv->userq_mgr.userq_xa); + if (!queue) + return -ENOENT; + + amdgpu_userq_put(queue); break; + } default: drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); @@ -1023,16 +1047,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; + if (!amdgpu_userq_buffer_vas_mapped(queue)) { drm_file_err(uq_mgr->file, "trying restore queue without va mapping\n"); queue->state = AMDGPU_USERQ_STATE_INVALID_VA; + amdgpu_userq_put(queue); continue; } r = amdgpu_userq_restore_helper(queue); if (r) ret = r; + + amdgpu_userq_put(queue); } if (ret) @@ -1266,9 +1297,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; + amdgpu_userq_put(queue); } if (ret) @@ -1301,16 +1336,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) int ret; xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; + struct dma_fence *f = queue->last_fence; - if (!f || dma_fence_is_signaled(f)) + if (!f || dma_fence_is_signaled(f)) { + amdgpu_userq_put(queue); continue; + } ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); if (ret <= 0) { drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", f->context, f->seqno); + amdgpu_userq_put(queue); return -ETIMEDOUT; } + amdgpu_userq_put(queue); } return 0; @@ -1361,20 +1404,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { struct amdgpu_usermode_queue *queue; - unsigned long queue_id; + unsigned long queue_id = 0; - cancel_delayed_work_sync(&userq_mgr->resume_work); + for (;;) { + xa_lock(&userq_mgr->userq_xa); + queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX, + XA_PRESENT); + if (queue) + __xa_erase(&userq_mgr->userq_xa, queue_id); + xa_unlock(&userq_mgr->userq_xa); - mutex_lock(&userq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(userq_mgr); - xa_for_each(&userq_mgr->userq_xa, queue_id, queue) { - amdgpu_userq_wait_for_last_fence(queue); - amdgpu_userq_unmap_helper(queue); - amdgpu_userq_cleanup(queue, queue_id); + if (!queue) + break; + + amdgpu_userq_put(queue); } xa_destroy(&userq_mgr->userq_xa); - mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 5845d8959034..736c1d38297c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -74,6 +74,7 @@ struct amdgpu_usermode_queue { struct dentry *debugfs_queue; struct delayed_work hang_detect_work; struct dma_fence *hang_detect_fence; + struct kref refcount; struct list_head userq_va_list; }; @@ -112,6 +113,9 @@ struct amdgpu_db_info { struct amdgpu_userq_obj *db_obj; }; +struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid); +void amdgpu_userq_put(struct amdgpu_usermode_queue *queue); + int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 77969a6017a4..5239b06b9ab0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -466,7 +466,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_userq_signal *args = data; struct drm_gem_object **gobj_write = NULL; struct drm_gem_object **gobj_read = NULL; - struct amdgpu_usermode_queue *queue; + struct amdgpu_usermode_queue *queue = NULL; struct amdgpu_userq_fence *userq_fence; struct drm_syncobj **syncobj = NULL; u32 *bo_handles_write, num_write_bo_handles; @@ -553,7 +553,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } /* Retrieve the user queue */ - queue = xa_load(&userq_mgr->userq_xa, args->queue_id); + queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; goto put_gobj_write; @@ -648,6 +648,9 @@ free_syncobj: free_syncobj_handles: kfree(syncobj_handles); + if (queue) + amdgpu_userq_put(queue); + return r; } @@ -660,7 +663,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_userq_wait *wait_info = data; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; - struct amdgpu_usermode_queue *waitq; + struct amdgpu_usermode_queue *waitq = NULL; struct drm_gem_object **gobj_write; struct drm_gem_object **gobj_read; struct dma_fence **fences = NULL; @@ -926,7 +929,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, */ num_fences = dma_fence_dedup_array(fences, num_fences); - waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id); + waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); if (!waitq) { r = -EINVAL; goto free_fences; @@ -1014,5 +1017,8 @@ free_bo_handles_write: free_bo_handles_read: kfree(bo_handles_read); + if (waitq) + amdgpu_userq_put(waitq); + return r; } From e48a869957a70cc39b4090cd27c36a86f8db9b92 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Wed, 25 Feb 2026 09:51:35 +0100 Subject: [PATCH 735/828] timekeeping: Fix timex status validation for auxiliary clocks The timekeeping_validate_timex() function validates the timex status of an auxiliary system clock even when the status is not to be changed, which causes unexpected errors for applications that make read-only clock_adjtime() calls, or set some other timex fields, but without clearing the status field. Do the AUX-specific status validation only when the modes field contains ADJ_STATUS, i.e. the application is actually trying to change the status. This makes the AUX-specific clock_adjtime() behavior consistent with CLOCK_REALTIME. Fixes: 4eca49d0b621 ("timekeeping: Prepare do_adtimex() for auxiliary clocks") Signed-off-by: Miroslav Lichvar Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260225085231.276751-1-mlichvar@redhat.com --- kernel/time/timekeeping.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 91fa2003351c..c07e562ee4c1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2653,7 +2653,8 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux if (aux_clock) { /* Auxiliary clocks are similar to TAI and do not have leap seconds */ - if (txc->status & (STA_INS | STA_DEL)) + if (txc->modes & ADJ_STATUS && + txc->status & (STA_INS | STA_DEL)) return -EINVAL; /* No TAI offset setting */ @@ -2661,7 +2662,8 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux return -EINVAL; /* No PPS support either */ - if (txc->status & (STA_PPSFREQ | STA_PPSTIME)) + if (txc->modes & ADJ_STATUS && + txc->status & (STA_PPSFREQ | STA_PPSTIME)) return -EINVAL; } From f82859c84a9862e242c904e72fb0625fef9b24e7 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Mar 2026 11:50:12 -0800 Subject: [PATCH 736/828] accel/amdxdna: Fix major version check on NPU1 platform Add the missing major number in npu1_fw_feature_table. Without the major version specified, the firmware feature check fails, preventing new firmware commands from being enabled on the NPU1 platform. With the correct major version populated, the driver properly detects firmware support and enables the new command. Fixes: f1eac46fe5f7 ("accel/amdxdna: Update firmware version check for latest firmware") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260304195012.3616908-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/npu1_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index 6e3d3ca69c04..1320e924e548 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -67,7 +67,7 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = { static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { { .major = 5, .min_minor = 7 }, - { .features = BIT_U64(AIE2_NPU_COMMAND), .min_minor = 8 }, + { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 5, .min_minor = 8 }, { 0 } }; From 8f3c6f08ababad2e3bdd239728cf66a9949446b4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 24 Feb 2026 13:17:50 +1000 Subject: [PATCH 737/828] nouveau/dpcd: return EBUSY for aux xfer if the device is asleep If we have runtime suspended, and userspace wants to use /dev/drm_dp_* then just tell it the device is busy instead of crashing in the GSP code. WARNING: CPU: 2 PID: 565741 at drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c:164 r535_gsp_msgq_wait+0x9a/0xb0 [nouveau] CPU: 2 UID: 0 PID: 565741 Comm: fwupd Not tainted 6.18.10-200.fc43.x86_64 #1 PREEMPT(lazy) Hardware name: LENOVO 20QTS0PQ00/20QTS0PQ00, BIOS N2OET65W (1.52 ) 08/05/2024 RIP: 0010:r535_gsp_msgq_wait+0x9a/0xb0 [nouveau] This is a simple fix to get backported. We should probably engineer a proper power domain solution to wake up devices and keep them awake while fw updates are happening. Cc: stable@vger.kernel.org Fixes: 8894f4919bc4 ("drm/nouveau: register a drm_dp_aux channel for each dp connector") Reviewed-by: Lyude Paul Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20260224031750.791621-1-airlied@gmail.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_connector.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 00d4530aea71..cc239492c7f0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1230,6 +1230,9 @@ nouveau_connector_aux_xfer(struct drm_dp_aux *obj, struct drm_dp_aux_msg *msg) u8 size = msg->size; int ret; + if (pm_runtime_suspended(nv_connector->base.dev->dev)) + return -EBUSY; + nv_encoder = find_encoder(&nv_connector->base, DCB_OUTPUT_DP); if (!nv_encoder) return -ENODEV; From 170a4b21f49b3dcff3115b4c90758f0a0d77375a Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Wed, 4 Mar 2026 22:20:39 +0100 Subject: [PATCH 738/828] hwmon: (max6639) fix inverted polarity According to MAX6639 documentation: D1: PWM Output Polarity. PWM output is low at 100% duty cycle when this bit is set to zero. PWM output is high at 100% duty cycle when this bit is set to 1. Up to commit 0f33272b60ed ("hwmon: (max6639) : Update hwmon init using info structure"), the polarity was set to high (0x2) when no platform data was set. After the patch, the polarity register wasn't set anymore if no platform data was specified. Nowadays, since commit 7506ebcd662b ("hwmon: (max6639) : Configure based on DT property"), it is always set to low which doesn't match with the comment above and change the behavior compared to versions prior 0f33272b60ed. Fixes: 0f33272b60ed ("hwmon: (max6639) : Update hwmon init using info structure") Signed-off-by: Olivier Sobrie Link: https://lore.kernel.org/r/20260304212039.570274-1-olivier@sobrie.be Signed-off-by: Guenter Roeck --- drivers/hwmon/max6639.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c index a0a1dbbda887..9a3c515efe2e 100644 --- a/drivers/hwmon/max6639.c +++ b/drivers/hwmon/max6639.c @@ -607,7 +607,7 @@ static int max6639_init_client(struct i2c_client *client, return err; /* Fans PWM polarity high by default */ - err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x00); + err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x02); if (err) return err; From 126fe7ef12ffe42fdc600fe22df733e96fa418ec Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Mon, 2 Mar 2026 18:42:01 -0800 Subject: [PATCH 739/828] mailmap: Add entry for Joe Damato My Fastly email address is no longer used. Add a mailmap entry to reflect that. Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260303024202.2526604-1-joe@dama.to Signed-off-by: Jakub Kicinski --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index e1cf6bb85d33..a02e9aa9aed4 100644 --- a/.mailmap +++ b/.mailmap @@ -396,6 +396,7 @@ Jiri Slaby Jisheng Zhang Jisheng Zhang Jishnu Prakash +Joe Damato Joel Granados Johan Hovold Johan Hovold From 18b43bec5437914e3b51bb08dc6c6e7b0a2df4d1 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 3 Mar 2026 11:37:20 +0800 Subject: [PATCH 740/828] mailmap: reflect my gmail as default Use my gmail instead so that I can easily handle those emails that CC me. Signed-off-by: Jason Xing Link: https://patch.msgid.link/20260303033720.84108-1-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index a02e9aa9aed4..7fefbcc82d1a 100644 --- a/.mailmap +++ b/.mailmap @@ -348,6 +348,7 @@ Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe +Jason Xing Javi Merino Jayachandran C From 25dd70a03b1f5f3aa71e1a5091ecd9cd2a13ee43 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Wed, 4 Mar 2026 15:51:17 -0800 Subject: [PATCH 741/828] hwmon: (pmbus/q54sj108a2) fix stack overflow in debugfs read The q54sj108a2_debugfs_read function suffers from a stack buffer overflow due to incorrect arguments passed to bin2hex(). The function currently passes 'data' as the destination and 'data_char' as the source. Because bin2hex() converts each input byte into two hex characters, a 32-byte block read results in 64 bytes of output. Since 'data' is only 34 bytes (I2C_SMBUS_BLOCK_MAX + 2), this writes 30 bytes past the end of the buffer onto the stack. Additionally, the arguments were swapped: it was reading from the zero-initialized 'data_char' and writing to 'data', resulting in all-zero output regardless of the actual I2C read. Fix this by: 1. Expanding 'data_char' to 66 bytes to safely hold the hex output. 2. Correcting the bin2hex() argument order and using the actual read count. 3. Using a pointer to select the correct output buffer for the final simple_read_from_buffer call. Fixes: d014538aa385 ("hwmon: (pmbus) Driver for Delta power supplies Q54SJ108A2") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260304235116.1045-1-sanman.p211993@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/q54sj108a2.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/q54sj108a2.c b/drivers/hwmon/pmbus/q54sj108a2.c index fc030ca34480..d5d60a9af8c5 100644 --- a/drivers/hwmon/pmbus/q54sj108a2.c +++ b/drivers/hwmon/pmbus/q54sj108a2.c @@ -79,7 +79,8 @@ static ssize_t q54sj108a2_debugfs_read(struct file *file, char __user *buf, int idx = *idxp; struct q54sj108a2_data *psu = to_psu(idxp, idx); char data[I2C_SMBUS_BLOCK_MAX + 2] = { 0 }; - char data_char[I2C_SMBUS_BLOCK_MAX + 2] = { 0 }; + char data_char[I2C_SMBUS_BLOCK_MAX * 2 + 2] = { 0 }; + char *out = data; char *res; switch (idx) { @@ -150,27 +151,27 @@ static ssize_t q54sj108a2_debugfs_read(struct file *file, char __user *buf, if (rc < 0) return rc; - res = bin2hex(data, data_char, 32); - rc = res - data; - + res = bin2hex(data_char, data, rc); + rc = res - data_char; + out = data_char; break; case Q54SJ108A2_DEBUGFS_FLASH_KEY: rc = i2c_smbus_read_block_data(psu->client, PMBUS_FLASH_KEY_WRITE, data); if (rc < 0) return rc; - res = bin2hex(data, data_char, 4); - rc = res - data; - + res = bin2hex(data_char, data, rc); + rc = res - data_char; + out = data_char; break; default: return -EINVAL; } - data[rc] = '\n'; + out[rc] = '\n'; rc += 2; - return simple_read_from_buffer(buf, count, ppos, data, rc); + return simple_read_from_buffer(buf, count, ppos, out, rc); } static ssize_t q54sj108a2_debugfs_write(struct file *file, const char __user *buf, From 7f083faf59d14c04e01ec05a7507f036c965acf8 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Sat, 28 Feb 2026 23:53:07 +0900 Subject: [PATCH 742/828] net: sched: avoid qdisc_reset_all_tx_gt() vs dequeue race for lockless qdiscs When shrinking the number of real tx queues, netif_set_real_num_tx_queues() calls qdisc_reset_all_tx_gt() to flush qdiscs for queues which will no longer be used. qdisc_reset_all_tx_gt() currently serializes qdisc_reset() with qdisc_lock(). However, for lockless qdiscs, the dequeue path is serialized by qdisc_run_begin/end() using qdisc->seqlock instead, so qdisc_reset() can run concurrently with __qdisc_run() and free skbs while they are still being dequeued, leading to UAF. This can easily be reproduced on e.g. virtio-net by imposing heavy traffic while frequently changing the number of queue pairs: iperf3 -ub0 -c $peer -t 0 & while :; do ethtool -L eth0 combined 1 ethtool -L eth0 combined 2 done With KASAN enabled, this leads to reports like: BUG: KASAN: slab-use-after-free in __qdisc_run+0x133f/0x1760 ... Call Trace: ... __qdisc_run+0x133f/0x1760 __dev_queue_xmit+0x248f/0x3550 ip_finish_output2+0xa42/0x2110 ip_output+0x1a7/0x410 ip_send_skb+0x2e6/0x480 udp_send_skb+0xb0a/0x1590 udp_sendmsg+0x13c9/0x1fc0 ... Allocated by task 1270 on cpu 5 at 44.558414s: ... alloc_skb_with_frags+0x84/0x7c0 sock_alloc_send_pskb+0x69a/0x830 __ip_append_data+0x1b86/0x48c0 ip_make_skb+0x1e8/0x2b0 udp_sendmsg+0x13a6/0x1fc0 ... Freed by task 1306 on cpu 3 at 44.558445s: ... kmem_cache_free+0x117/0x5e0 pfifo_fast_reset+0x14d/0x580 qdisc_reset+0x9e/0x5f0 netif_set_real_num_tx_queues+0x303/0x840 virtnet_set_channels+0x1bf/0x260 [virtio_net] ethnl_set_channels+0x684/0xae0 ethnl_default_set_doit+0x31a/0x890 ... Serialize qdisc_reset_all_tx_gt() against the lockless dequeue path by taking qdisc->seqlock for TCQ_F_NOLOCK qdiscs, matching the serialization model already used by dev_reset_queue(). Additionally clear QDISC_STATE_NON_EMPTY after reset so the qdisc state reflects an empty queue, avoiding needless re-scheduling. Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Signed-off-by: Koichiro Den Link: https://patch.msgid.link/20260228145307.3955532-1-den@valinux.co.jp Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3a7268b567e..d5d55cb21686 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -778,13 +778,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; + bool nolock; for (; i < dev->num_tx_queues; i++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } } } } From 165573e41f2f66ef98940cf65f838b2cb575d9d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 20:55:27 +0000 Subject: [PATCH 743/828] tcp: secure_seq: add back ports to TS offset This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets") tcp_tw_recycle went away in 2017. Zhouyan Deng reported off-path TCP source port leakage via SYN cookie side-channel that can be fixed in multiple ways. One of them is to bring back TCP ports in TS offset randomization. As a bonus, we perform a single siphash() computation to provide both an ISN and a TS offset. Fixes: 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets") Reported-by: Zhouyan Deng Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Acked-by: Florian Westphal Link: https://patch.msgid.link/20260302205527.1982836-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/secure_seq.h | 45 ++++++++++++++++++---- include/net/tcp.h | 6 ++- net/core/secure_seq.c | 80 +++++++++++++++------------------------- net/ipv4/syncookies.c | 11 ++++-- net/ipv4/tcp_input.c | 8 +++- net/ipv4/tcp_ipv4.c | 37 +++++++++---------- net/ipv6/syncookies.c | 11 ++++-- net/ipv6/tcp_ipv6.c | 37 +++++++++---------- 8 files changed, 127 insertions(+), 108 deletions(-) diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index cddebafb9f77..6f996229167b 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -5,16 +5,47 @@ #include struct net; +extern struct net init_net; + +union tcp_seq_and_ts_off { + struct { + u32 seq; + u32 ts_off; + }; + u64 hash64; +}; u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr); +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} + +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} #endif /* _NET_SECURE_SEQ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index eb8bf63fdafc..978eea2d5df0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -2464,8 +2465,9 @@ struct tcp_request_sock_ops { struct flowi *fl, struct request_sock *req, u32 tw_isn); - u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); + union tcp_seq_and_ts_off (*init_seq_and_ts_off)( + const struct net *net, + const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 9a3965680451..6a6f2cda5aae 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -20,7 +20,6 @@ #include static siphash_aligned_key_t net_secret; -static siphash_aligned_key_t ts_secret; #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) @@ -28,11 +27,6 @@ static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); } - -static __always_inline void ts_secret_init(void) -{ - net_get_random_once(&ts_secret, sizeof(ts_secret)); -} #endif #ifdef CONFIG_INET @@ -53,28 +47,9 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr) -{ - const struct { - struct in6_addr saddr; - struct in6_addr daddr; - } __aligned(SIPHASH_ALIGNMENT) combined = { - .saddr = *(struct in6_addr *)saddr, - .daddr = *(struct in6_addr *)daddr, - }; - - if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) - return 0; - - ts_secret_init(); - return siphash(&combined, offsetofend(typeof(combined), daddr), - &ts_secret); -} -EXPORT_IPV6_MOD(secure_tcpv6_ts_off); - -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport) +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; @@ -87,14 +62,20 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, .sport = sport, .dport = dport }; - u32 hash; + union tcp_seq_and_ts_off st; net_secret_init(); - hash = siphash(&combined, offsetofend(typeof(combined), dport), - &net_secret); - return seq_scale(hash); + + st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport), + &net_secret); + + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) + st.ts_off = 0; + + st.seq = seq_scale(st.seq); + return st; } -EXPORT_SYMBOL(secure_tcpv6_seq); +EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) -{ - if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) - return 0; - - ts_secret_init(); - return siphash_2u32((__force u32)saddr, (__force u32)daddr, - &ts_secret); -} - /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) { - u32 hash; + u32 ports = (__force u32)sport << 16 | (__force u32)dport; + union tcp_seq_and_ts_off st; net_secret_init(); - hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u32)sport << 16 | (__force u32)dport, - &net_secret); - return seq_scale(hash); + + st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr, + ports, &net_secret); + + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) + st.ts_off = 0; + + st.seq = seq_scale(st.seq); + return st; } -EXPORT_SYMBOL_GPL(secure_tcp_seq); +EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 061751aabc8e..fc3affd9c801 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(net, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr); + union tcp_seq_and_ts_off st; + + st = secure_tcp_seq_and_ts_off(net, + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); + tsoff = st.ts_off; tcp_opt.rcv_tsecr -= tsoff; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7b03f2460751..cba89733d121 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7646,6 +7646,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; + union tcp_seq_and_ts_off st; struct request_sock *req; bool want_cookie = false; struct dst_entry *dst; @@ -7715,9 +7716,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; + if (tmp_opt.tstamp_ok || (!want_cookie && !isn)) + st = af_ops->init_seq_and_ts_off(net, skb); + if (tmp_opt.tstamp_ok) { tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); - tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); + tcp_rsk(req)->ts_off = st.ts_off; } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); @@ -7739,7 +7743,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb); + isn = st.seq; } tcp_ecn_create_request(req, skb, sk, dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 910c25cb24e1..c7b2463c2e25 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -105,17 +105,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { static DEFINE_MUTEX(tcp_exit_batch_mutex); -static u32 tcp_v4_init_seq(const struct sk_buff *skb) +static union tcp_seq_and_ts_off +tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcp_seq(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); -} - -static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) -{ - return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + return secure_tcp_seq_and_ts_off(net, + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -327,15 +324,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len rt = NULL; if (likely(!tp->repair)) { + union tcp_seq_and_ts_off st; + + st = secure_tcp_seq_and_ts_off(net, + inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); if (!tp->write_seq) - WRITE_ONCE(tp->write_seq, - secure_tcp_seq(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port)); - WRITE_ONCE(tp->tsoffset, - secure_tcp_ts_off(net, inet->inet_saddr, - inet->inet_daddr)); + WRITE_ONCE(tp->write_seq, st.seq); + WRITE_ONCE(tp->tsoffset, st.ts_off); } atomic_set(&inet->inet_id, get_random_u16()); @@ -1677,8 +1675,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_seq, - .init_ts_off = tcp_v4_init_ts_off, + .init_seq_and_ts_off = tcp_v4_init_seq_and_ts_off, .send_synack = tcp_v4_send_synack, }; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7e007f013ec8..4f6f0d751d6c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -151,9 +151,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(net, - ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32); + union tcp_seq_and_ts_off st; + + st = secure_tcpv6_seq_and_ts_off(net, + ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); + tsoff = st.ts_off; tcp_opt.rcv_tsecr -= tsoff; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5195a46b951e..bb09d5ccf599 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -105,18 +105,14 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_seq(const struct sk_buff *skb) +static union tcp_seq_and_ts_off +tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); -} - -static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) -{ - return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32); + return secure_tcpv6_seq_and_ts_off(net, + ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, @@ -320,14 +316,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { + union tcp_seq_and_ts_off st; + + st = secure_tcpv6_seq_and_ts_off(net, + np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); if (!tp->write_seq) - WRITE_ONCE(tp->write_seq, - secure_tcpv6_seq(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport)); - tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32); + WRITE_ONCE(tp->write_seq, st.seq); + tp->tsoffset = st.ts_off; } if (tcp_fastopen_defer_connect(sk, &err)) @@ -817,8 +815,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq = tcp_v6_init_seq, - .init_ts_off = tcp_v6_init_ts_off, + .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off, .send_synack = tcp_v6_send_synack, }; From f7d92f11bd33a6eb49c7c812255ef4ab13681f0f Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Mon, 2 Mar 2026 18:32:37 +0200 Subject: [PATCH 744/828] net: nfc: nci: Fix zero-length proprietary notifications NCI NFC controllers may have proprietary OIDs with zero-length payload. One example is: drivers/nfc/nxp-nci/core.c, NXP_NCI_RF_TXLDO_ERROR_NTF. Allow a zero length payload in proprietary notifications *only*. Before: -- >8 -- kernel: nci: nci_recv_frame: len 3 -- >8 -- After: -- >8 -- kernel: nci: nci_recv_frame: len 3 kernel: nci: nci_ntf_packet: NCI RX: MT=ntf, PBF=0, GID=0x1, OID=0x23, plen=0 kernel: nci: nci_ntf_packet: unknown ntf opcode 0x123 kernel: nfc nfc0: NFC: RF transmitter couldn't start. Bad power and/or configuration? -- >8 -- After fixing the hardware: -- >8 -- kernel: nci: nci_recv_frame: len 27 kernel: nci: nci_ntf_packet: NCI RX: MT=ntf, PBF=0, GID=0x1, OID=0x5, plen=24 kernel: nci: nci_rf_intf_activated_ntf_packet: rf_discovery_id 1 -- >8 -- Fixes: d24b03535e5e ("nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet") Signed-off-by: Ian Ray Link: https://patch.msgid.link/20260302163238.140576-1-ian.ray@gehealthcare.com Signed-off-by: Jakub Kicinski --- net/nfc/nci/core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6e9b76e2cc56..e95eef85971f 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1482,10 +1482,20 @@ static bool nci_valid_size(struct sk_buff *skb) unsigned int hdr_size = NCI_CTRL_HDR_SIZE; if (skb->len < hdr_size || - !nci_plen(skb->data) || skb->len < hdr_size + nci_plen(skb->data)) { return false; } + + if (!nci_plen(skb->data)) { + /* Allow zero length in proprietary notifications (0x20 - 0x3F). */ + if (nci_opcode_oid(nci_opcode(skb->data)) >= 0x20 && + nci_mt(skb->data) == NCI_MT_NTF_PKT) + return true; + + /* Disallow zero length otherwise. */ + return false; + } + return true; } From a4c2b8be2e5329e7fac6e8f64ddcb8958155cfcb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2026 01:56:40 +0000 Subject: [PATCH 745/828] net_sched: sch_fq: clear q->band_pkt_count[] in fq_reset() When/if a NIC resets, queues are deactivated by dev_deactivate_many(), then reactivated when the reset operation completes. fq_reset() removes all the skbs from various queues. If we do not clear q->band_pkt_count[], these counters keep growing and can eventually reach sch->limit, preventing new packets to be queued. Many thanks to Praveen for discovering the root cause. Fixes: 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") Diagnosed-by: Praveen Kaligineedi Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260304015640.961780-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 80235e85f844..05084c9af48e 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -827,6 +827,7 @@ static void fq_reset(struct Qdisc *sch) for (idx = 0; idx < FQ_BANDS; idx++) { q->band_flows[idx].new_flows.first = NULL; q->band_flows[idx].old_flows.first = NULL; + q->band_pkt_count[idx] = 0; } q->delayed = RB_ROOT; q->flows = 0; From 40bf00ec2ee271df5ba67593991760adf8b5d0ed Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 2 Mar 2026 16:32:56 -0800 Subject: [PATCH 746/828] net: devmem: use READ_ONCE/WRITE_ONCE on binding->dev binding->dev is protected on the write-side in mp_dmabuf_devmem_uninstall() against concurrent writes, but due to the concurrent bare reads in net_devmem_get_binding() and validate_xmit_unreadable_skb() it should be wrapped in a READ_ONCE/WRITE_ONCE pair to make sure no compiler optimizations play with the underlying register in unforeseen ways. Doesn't present a critical bug because the known compiler optimizations don't result in bad behavior. There is no tearing on u64, and load omissions/invented loads would only break if additional binding->dev references were inlined together (they aren't right now). This just more strictly follows the linux memory model (i.e., "Lock-Protected Writes With Lockless Reads" in tools/memory-model/Documentation/access-marking.txt). Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260302-devmem-membar-fix-v2-1-5b33c9cbc28b@meta.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 +- net/core/devmem.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4af4cf2d63a4..20610a192ec7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3987,7 +3987,7 @@ static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb, if (shinfo->nr_frags > 0) { niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0])); if (net_is_devmem_iov(niov) && - net_devmem_iov_binding(niov)->dev != dev) + READ_ONCE(net_devmem_iov_binding(niov)->dev) != dev) goto out_free; } diff --git a/net/core/devmem.c b/net/core/devmem.c index 8c9aad776bf4..69d79aee07ef 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -396,7 +396,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, * net_device. */ dst_dev = dst_dev_rcu(dst); - if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { + if (unlikely(!dst_dev) || + unlikely(dst_dev != READ_ONCE(binding->dev))) { err = -ENODEV; goto out_unlock; } @@ -513,7 +514,8 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv, xa_erase(&binding->bound_rxqs, xa_idx); if (xa_empty(&binding->bound_rxqs)) { mutex_lock(&binding->lock); - binding->dev = NULL; + ASSERT_EXCLUSIVE_WRITER(binding->dev); + WRITE_ONCE(binding->dev, NULL); mutex_unlock(&binding->lock); } break; From 7bd4b0c4779f978a6528c9b7937d2ca18e936e2c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:23:41 -0800 Subject: [PATCH 747/828] nfc: nci: free skb on nci_transceive early error paths nci_transceive() takes ownership of the skb passed by the caller, but the -EPROTO, -EINVAL, and -EBUSY error paths return without freeing it. Due to issues clearing NCI_DATA_EXCHANGE fixed by subsequent changes the nci/nci_dev selftest hits the error path occasionally in NIPA, and kmemleak detects leaks: unreferenced object 0xff11000015ce6a40 (size 640): comm "nci_dev", pid 3954, jiffies 4295441246 hex dump (first 32 bytes): 6b 6b 6b 6b 00 a4 00 0c 02 e1 03 6b 6b 6b 6b 6b kkkk.......kkkkk 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk backtrace (crc 7c40cc2a): kmem_cache_alloc_node_noprof+0x492/0x630 __alloc_skb+0x11e/0x5f0 alloc_skb_with_frags+0xc6/0x8f0 sock_alloc_send_pskb+0x326/0x3f0 nfc_alloc_send_skb+0x94/0x1d0 rawsock_sendmsg+0x162/0x4c0 do_syscall_64+0x117/0xfc0 Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260303162346.2071888-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/nfc/nci/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index e95eef85971f..e859b834c0f1 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1035,18 +1035,23 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct nci_conn_info *conn_info; conn_info = ndev->rf_conn_info; - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return -EPROTO; + } pr_debug("target_idx %d, len %d\n", target->idx, skb->len); if (!ndev->target_active_prot) { pr_err("unable to exchange data, no active target\n"); + kfree_skb(skb); return -EINVAL; } - if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) { + kfree_skb(skb); return -EBUSY; + } /* store cb and context to be used on receiving data */ conn_info->data_exchange_cb = cb; From d42449d2c17cdf06d1f63268557450bd3f051e9a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:23:42 -0800 Subject: [PATCH 748/828] nfc: digital: free skb on digital_in_send error paths digital_in_send() takes ownership of the skb passed by the caller (nfc_data_exchange), make sure it's freed on all error paths. Found looking around the real driver for similar bugs to the one just fixed in nci. Fixes: 2c66daecc409 ("NFC Digital: Add NFC-A technology support") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260303162346.2071888-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/nfc/digital_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 3670bb33732e..7cb1e6aaae90 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -707,8 +707,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, int rc; data_exch = kzalloc_obj(*data_exch); - if (!data_exch) + if (!data_exch) { + kfree_skb(skb); return -ENOMEM; + } data_exch->cb = cb; data_exch->cb_context = cb_context; @@ -731,8 +733,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, data_exch); exit: - if (rc) + if (rc) { + kfree_skb(skb); kfree(data_exch); + } return rc; } From 66083581945bd5b8e99fe49b5aeb83d03f62d053 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:23:43 -0800 Subject: [PATCH 749/828] nfc: nci: complete pending data exchange on device close In nci_close_device(), complete any pending data exchange before closing. The data exchange callback (e.g. rawsock_data_exchange_complete) holds a socket reference. NIPA occasionally hits this leak: unreferenced object 0xff1100000f435000 (size 2048): comm "nci_dev", pid 3954, jiffies 4295441245 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 27 00 01 40 00 00 00 00 00 00 00 00 00 00 00 00 '..@............ backtrace (crc ec2b3c5): __kmalloc_noprof+0x4db/0x730 sk_prot_alloc.isra.0+0xe4/0x1d0 sk_alloc+0x36/0x760 rawsock_create+0xd1/0x540 nfc_sock_create+0x11f/0x280 __sock_create+0x22d/0x630 __sys_socket+0x115/0x1d0 __x64_sys_socket+0x72/0xd0 do_syscall_64+0x117/0xfc0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: 38f04c6b1b68 ("NFC: protect nci_data_exchange transactions") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260303162346.2071888-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/nfc/nci/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index e859b834c0f1..43d871525dbc 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -567,6 +567,10 @@ static int nci_close_device(struct nci_dev *ndev) flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); timer_delete_sync(&ndev->data_timer); + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + nci_data_exchange_complete(ndev, NULL, + ndev->cur_conn_id, + -ENODEV); mutex_unlock(&ndev->req_lock); return 0; } @@ -598,6 +602,11 @@ static int nci_close_device(struct nci_dev *ndev) flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); + timer_delete_sync(&ndev->data_timer); + + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id, + -ENODEV); /* Clear flags except NCI_UNREG */ ndev->flags &= BIT(NCI_UNREG); From 0efdc02f4f6d52f8ca5d5889560f325a836ce0a8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:23:44 -0800 Subject: [PATCH 750/828] nfc: nci: clear NCI_DATA_EXCHANGE before calling completion callback Move clear_bit(NCI_DATA_EXCHANGE) before invoking the data exchange callback in nci_data_exchange_complete(). The callback (e.g. rawsock_data_exchange_complete) may immediately schedule another data exchange via schedule_work(tx_work). On a multi-CPU system, tx_work can run and reach nci_transceive() before the current nci_data_exchange_complete() clears the flag, causing test_and_set_bit(NCI_DATA_EXCHANGE) to return -EBUSY and the new transfer to fail. This causes intermittent flakes in nci/nci_dev in NIPA: # # RUN NCI.NCI1_0.t4t_tag_read ... # # t4t_tag_read: Test terminated by timeout # # FAIL NCI.NCI1_0.t4t_tag_read # not ok 3 NCI.NCI1_0.t4t_tag_read Fixes: 38f04c6b1b68 ("NFC: protect nci_data_exchange transactions") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260303162346.2071888-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/nfc/nci/data.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 78f4131af3cf..5f98c73db5af 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -33,7 +33,8 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) { kfree_skb(skb); - goto exit; + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); + return; } cb = conn_info->data_exchange_cb; @@ -45,6 +46,12 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, timer_delete_sync(&ndev->data_timer); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); + /* Mark the exchange as done before calling the callback. + * The callback (e.g. rawsock_data_exchange_complete) may + * want to immediately queue another data exchange. + */ + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); + if (cb) { /* forward skb to nfc core */ cb(cb_context, skb, err); @@ -54,9 +61,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, /* no waiting callback, free skb */ kfree_skb(skb); } - -exit: - clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); } /* ----------------- NCI TX Data ----------------- */ From d793458c45df2aed498d7f74145eab7ee22d25aa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:23:45 -0800 Subject: [PATCH 751/828] nfc: rawsock: cancel tx_work before socket teardown In rawsock_release(), cancel any pending tx_work and purge the write queue before orphaning the socket. rawsock_tx_work runs on the system workqueue and calls nfc_data_exchange which dereferences the NCI device. Without synchronization, tx_work can race with socket and device teardown when a process is killed (e.g. by SIGKILL), leading to use-after-free or leaked references. Set SEND_SHUTDOWN first so that if tx_work is already running it will see the flag and skip transmitting, then use cancel_work_sync to wait for any in-progress execution to finish, and finally purge any remaining queued skbs. Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260303162346.2071888-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/nfc/rawsock.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index b049022399ae..f7d7a599fade 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -67,6 +67,17 @@ static int rawsock_release(struct socket *sock) if (sock->type == SOCK_RAW) nfc_sock_unlink(&raw_sk_list, sk); + if (sk->sk_state == TCP_ESTABLISHED) { + /* Prevent rawsock_tx_work from starting new transmits and + * wait for any in-progress work to finish. This must happen + * before the socket is orphaned to avoid a race where + * rawsock_tx_work runs after the NCI device has been freed. + */ + sk->sk_shutdown |= SEND_SHUTDOWN; + cancel_work_sync(&nfc_rawsock(sk)->tx_work); + rawsock_write_queue_purge(sk); + } + sock_orphan(sk); sock_put(sk); From 8c09412e584d9bcc0e71d758ec1008d1c8d1a326 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 3 Mar 2026 11:56:02 +0100 Subject: [PATCH 752/828] selftests: mptcp: more stable simult_flows tests By default, the netem qdisc can keep up to 1000 packets under its belly to deal with the configured rate and delay. The simult flows test-case simulates very low speed links, to avoid problems due to slow CPUs and the TCP stack tend to transmit at a slightly higher rate than the (virtual) link constraints. All the above causes a relatively large amount of packets being enqueued in the netem qdiscs - the longer the transfer, the longer the queue - producing increasingly high TCP RTT samples and consequently increasingly larger receive buffer size due to DRS. When the receive buffer size becomes considerably larger than the needed size, the tests results can flake, i.e. because minimal inaccuracy in the pacing rate can lead to a single subflow usage towards the end of the connection for a considerable amount of data. Address the issue explicitly setting netem limits suitable for the configured link speeds and unflake all the affected tests. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-1-4b5462b6f016@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 806aaa7d2d61..d11a8b949aab 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -237,10 +237,13 @@ run_test() for dev in ns2eth1 ns2eth2; do tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 done - tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 - tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 - tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 - tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # keep the queued pkts number low, or the RTT estimator will see + # increasing latency over time. + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50 # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) From fb8d0bccb221080630efcd9660c9f9349e53cc9e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 3 Mar 2026 11:56:03 +0100 Subject: [PATCH 753/828] mptcp: pm: avoid sending RM_ADDR over same subflow RM_ADDR are sent over an active subflow, the first one in the subflows list. There is then a high chance the initial subflow is picked. With the in-kernel PM, when an endpoint is removed, a RM_ADDR is sent, then linked subflows are closed. This is done for each active MPTCP connection. MPTCP endpoints are likely removed because the attached network is no longer available or usable. In this case, it is better to avoid sending this RM_ADDR over the subflow that is going to be removed, but prefer sending it over another active and non stale subflow, if any. This modification avoids situations where the other end is not notified when a subflow is no longer usable: typically when the endpoint linked to the initial subflow is removed, especially on the server side. Fixes: 8dd5efb1f91b ("mptcp: send ack for rm_addr") Cc: stable@vger.kernel.org Reported-by: Frank Lorenz Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/612 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-2-4b5462b6f016@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 55 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 7298836469b3..57a456690406 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); } -void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) +static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow, + const struct mptcp_rm_list *rm_list) { - struct mptcp_subflow_context *subflow, *alt = NULL; + u8 i, id = subflow_get_local_id(subflow); + + for (i = 0; i < rm_list->nr; i++) { + if (rm_list->ids[i] == id) + return true; + } + + return false; +} + +static void +mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); @@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) return; mptcp_for_each_subflow(msk, subflow) { - if (__mptcp_subflow_active(subflow)) { - if (!subflow->stale) { - mptcp_pm_send_ack(msk, subflow, false, false); - return; - } + if (!__mptcp_subflow_active(subflow)) + continue; - if (!alt) - alt = subflow; + if (unlikely(subflow->stale)) { + if (!stale) + stale = subflow; + } else if (unlikely(rm_list && + subflow_in_rm_list(subflow, rm_list))) { + if (!same_id) + same_id = subflow; + } else { + goto send_ack; } } - if (alt) - mptcp_pm_send_ack(msk, alt, false, false); + if (same_id) + subflow = same_id; + else if (stale) + subflow = stale; + else + return; + +send_ack: + mptcp_pm_send_ack(msk, subflow, false, false); +} + +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) +{ + mptcp_pm_addr_send_ack_avoid_list(msk, NULL); } int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, @@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); - mptcp_pm_addr_send_ack(msk); + mptcp_pm_addr_send_ack_avoid_list(msk, rm_list); return 0; } From 560edd99b5f58b2d4bbe3c8e51e1eed68d887b0e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 3 Mar 2026 11:56:04 +0100 Subject: [PATCH 754/828] selftests: mptcp: join: check RM_ADDR not sent over same subflow This validates the previous commit: RM_ADDR were sent over the first found active subflow which could be the same as the one being removed. It is more likely to loose this notification. For this check, RM_ADDR are explicitly dropped when trying to send them over the initial subflow, when removing the endpoint attached to it. If it is dropped, the test will complain because some RM_ADDR have not been received. Note that only the RM_ADDR are dropped, to allow the linked subflow to be quickly and cleanly closed. To only drop those RM_ADDR, a cBPF byte code is used. If the IPTables commands fail, that's OK, the tests will continue to pass, but not validate this part. This can be ignored: another subtest fully depends on such command, and will be marked as skipped. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 8dd5efb1f91b ("mptcp: send ack for rm_addr") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-3-4b5462b6f016@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index dc1f200aaa81..058ad5a13d24 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -104,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, 6 0 0 65535, 6 0 0 0" +# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption +# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c && +# (ip[66] & 0xf0) == 0x40'" +CBPF_MPTCP_SUBOPTION_RM_ADDR="13, + 48 0 0 0, + 84 0 0 240, + 21 0 9 64, + 48 0 0 32, + 84 0 0 240, + 21 0 6 192, + 48 0 0 53, + 21 0 4 12, + 48 0 0 66, + 84 0 0 240, + 21 0 1 64, + 6 0 0 65535, + 6 0 0 0" + init_partial() { capout=$(mktemp) @@ -4217,6 +4235,14 @@ endpoint_tests() chk_subflow_nr "after no reject" 3 chk_mptcp_info subflows 2 subflows 2 + # To make sure RM_ADDR are sent over a different subflow, but + # allow the rest to quickly and cleanly close the subflow + local ipt=1 + ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \ + -p tcp -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ + -j DROP || ipt=0 local i for i in $(seq 3); do pm_nl_del_endpoint $ns2 1 10.0.1.2 @@ -4229,6 +4255,7 @@ endpoint_tests() chk_subflow_nr "after re-add id 0 ($i)" 3 chk_mptcp_info subflows 3 subflows 3 done + [ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1 mptcp_lib_kill_group_wait $tests_pid @@ -4288,11 +4315,20 @@ endpoint_tests() chk_mptcp_info subflows 2 subflows 2 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + # To make sure RM_ADDR are sent over a different subflow, but + # allow the rest to quickly and cleanly close the subflow + local ipt=1 + ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \ + -p tcp -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ + -j DROP || ipt=0 pm_nl_del_endpoint $ns1 42 10.0.1.1 sleep 0.5 chk_subflow_nr "after delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + [ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1 pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal wait_mpj 4 From 579a752464a64cb5f9139102f0e6b90a1f595ceb Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 3 Mar 2026 11:56:05 +0100 Subject: [PATCH 755/828] mptcp: pm: in-kernel: always mark signal+subflow endp as used Syzkaller managed to find a combination of actions that was generating this warning: msk->pm.local_addr_used == 0 WARNING: net/mptcp/pm_kernel.c:1071 at __mark_subflow_endp_available net/mptcp/pm_kernel.c:1071 [inline], CPU#1: syz.2.17/961 WARNING: net/mptcp/pm_kernel.c:1071 at mptcp_nl_remove_subflow_and_signal_addr net/mptcp/pm_kernel.c:1103 [inline], CPU#1: syz.2.17/961 WARNING: net/mptcp/pm_kernel.c:1071 at mptcp_pm_nl_del_addr_doit+0x81d/0x8f0 net/mptcp/pm_kernel.c:1210, CPU#1: syz.2.17/961 Modules linked in: CPU: 1 UID: 0 PID: 961 Comm: syz.2.17 Not tainted 6.19.0-08368-gfafda3b4b06b #22 PREEMPT(full) Hardware name: QEMU Ubuntu 25.10 PC v2 (i440FX + PIIX, + 10.1 machine, 1996), BIOS 1.17.0-debian-1.17.0-1build1 04/01/2014 RIP: 0010:__mark_subflow_endp_available net/mptcp/pm_kernel.c:1071 [inline] RIP: 0010:mptcp_nl_remove_subflow_and_signal_addr net/mptcp/pm_kernel.c:1103 [inline] RIP: 0010:mptcp_pm_nl_del_addr_doit+0x81d/0x8f0 net/mptcp/pm_kernel.c:1210 Code: 89 c5 e8 46 30 6f fe e9 21 fd ff ff 49 83 ed 80 e8 38 30 6f fe 4c 89 ef be 03 00 00 00 e8 db 49 df fe eb ac e8 24 30 6f fe 90 <0f> 0b 90 e9 1d ff ff ff e8 16 30 6f fe eb 05 e8 0f 30 6f fe e8 9a RSP: 0018:ffffc90001663880 EFLAGS: 00010293 RAX: ffffffff82de1a6c RBX: 0000000000000000 RCX: ffff88800722b500 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff8880158b22d0 R08: 0000000000010425 R09: ffffffffffffffff R10: ffffffff82de18ba R11: 0000000000000000 R12: ffff88800641a640 R13: ffff8880158b1880 R14: ffff88801ec3c900 R15: ffff88800641a650 FS: 00005555722c3500(0000) GS:ffff8880f909d000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f66346e0f60 CR3: 000000001607c000 CR4: 0000000000350ef0 Call Trace: genl_family_rcv_msg_doit+0x117/0x180 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x3a8/0x3f0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x16d/0x240 net/netlink/af_netlink.c:2550 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] netlink_unicast+0x3e9/0x4c0 net/netlink/af_netlink.c:1344 netlink_sendmsg+0x4aa/0x5b0 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg+0xc9/0xf0 net/socket.c:742 ____sys_sendmsg+0x272/0x3b0 net/socket.c:2592 ___sys_sendmsg+0x2de/0x320 net/socket.c:2646 __sys_sendmsg net/socket.c:2678 [inline] __do_sys_sendmsg net/socket.c:2683 [inline] __se_sys_sendmsg net/socket.c:2681 [inline] __x64_sys_sendmsg+0x110/0x1a0 net/socket.c:2681 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x143/0x440 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f66346f826d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc83d8bdc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f6634985fa0 RCX: 00007f66346f826d RDX: 00000000040000b0 RSI: 0000200000000740 RDI: 0000000000000007 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6634985fa8 R13: 00007f6634985fac R14: 0000000000000000 R15: 0000000000001770 The actions that caused that seem to be: - Set the MPTCP subflows limit to 0 - Create an MPTCP endpoint with both the 'signal' and 'subflow' flags - Create a new MPTCP connection from a different address: an ADD_ADDR linked to the MPTCP endpoint will be sent ('signal' flag), but no subflows is initiated ('subflow' flag) - Remove the MPTCP endpoint In this case, msk->pm.local_addr_used has been kept to 0 -- because no subflows have been created -- but the corresponding bit in msk->pm.id_avail_bitmap has been cleared when the ADD_ADDR has been sent. This later causes a splat when removing the MPTCP endpoint because msk->pm.local_addr_used has been kept to 0. Now, if an endpoint has both the signal and subflow flags, but it is not possible to create subflows because of the limits or the c-flag case, then the local endpoint counter is still incremented: the endpoint is used at the end. This avoids issues later when removing the endpoint and calling __mark_subflow_endp_available(), which expects msk->pm.local_addr_used to have been previously incremented if the endpoint was marked as used according to msk->pm.id_avail_bitmap. Note that signal_and_subflow variable is reset to false when the limits and the c-flag case allows subflows creation. Also, local_addr_used is only incremented for non ID0 subflows. Fixes: 85df533a787b ("mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/613 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-4-4b5462b6f016@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index b5316a6c7d1b..b2b9df43960e 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -418,6 +418,15 @@ subflow: } exit: + /* If an endpoint has both the signal and subflow flags, but it is not + * possible to create subflows -- the 'while' loop body above never + * executed -- then still mark the endp as used, which is somehow the + * case. This avoids issues later when removing the endpoint and calling + * __mark_subflow_endp_available(), which expects the increment here. + */ + if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id) + msk->pm.local_addr_used++; + mptcp_pm_nl_check_work_pending(msk); } From 1777f349ff41b62dfe27454b69c27b0bc99ffca5 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 3 Mar 2026 11:56:06 +0100 Subject: [PATCH 756/828] selftests: mptcp: join: check removing signal+subflow endp This validates the previous commit: endpoints with both the signal and subflow flags should always be marked as used even if it was not possible to create new subflows due to the MPTCP PM limits. For this test, an extra endpoint is created with both the signal and the subflow flags, and limits are set not to create extra subflows. In this case, an ADD_ADDR is sent, but no subflows are created. Still, the local endpoint is marked as used, and no warning is fired when removing the endpoint, after having sent a RM_ADDR. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 85df533a787b ("mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-5-4b5462b6f016@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 058ad5a13d24..a3144d7298a5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2626,6 +2626,19 @@ remove_tests() chk_rst_nr 0 0 fi + # signal+subflow with limits, remove + if reset "remove signal+subflow with limits"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow + pm_nl_set_limits $ns2 0 0 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 1 + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 + fi + # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 From 35dfedce442c4060cfe5b98368bc9643fb995716 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Tue, 3 Mar 2026 14:58:25 +0000 Subject: [PATCH 757/828] net: stmmac: Fix error handling in VLAN add and delete paths stmmac_vlan_rx_add_vid() updates active_vlans and the VLAN hash register before writing the HW filter entry. If the filter write fails, it leaves a stale VID in active_vlans and the hash register. stmmac_vlan_rx_kill_vid() has the reverse problem: it clears active_vlans before removing the HW filter. On failure, the VID is gone from active_vlans but still present in the HW filter table. To fix this, reorder the operations to update the hash table first, then attempt the HW filter operation. If the HW filter fails, roll back both the active_vlans bitmap and the hash table by calling stmmac_vlan_update() again. Fixes: ed64639bc1e0 ("net: stmmac: Add support for VLAN Rx filtering") Signed-off-by: Ovidiu Panait Link: https://patch.msgid.link/20260303145828.7845-2-ovidiu.panait.rb@renesas.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e00aa42a1961..b6a127316ab5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6794,9 +6794,13 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid if (priv->hw->num_vlan) { ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); - if (ret) + if (ret) { + clear_bit(vid, priv->active_vlans); + stmmac_vlan_update(priv, is_double); goto err_pm_put; + } } + err_pm_put: pm_runtime_put(priv->device); @@ -6820,15 +6824,21 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi is_double = true; clear_bit(vid, priv->active_vlans); + ret = stmmac_vlan_update(priv, is_double); + if (ret) { + set_bit(vid, priv->active_vlans); + goto del_vlan_error; + } if (priv->hw->num_vlan) { ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); - if (ret) + if (ret) { + set_bit(vid, priv->active_vlans); + stmmac_vlan_update(priv, is_double); goto del_vlan_error; + } } - ret = stmmac_vlan_update(priv, is_double); - del_vlan_error: pm_runtime_put(priv->device); From e38200e361cbe331806dc454c76c11c7cd95e1b9 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Tue, 3 Mar 2026 14:58:26 +0000 Subject: [PATCH 758/828] net: stmmac: Improve double VLAN handling The double VLAN bits (EDVLP, ESVL, DOVLTC) are handled inconsistently between the two vlan_update_hash() implementations: - dwxgmac2_update_vlan_hash() explicitly clears the double VLAN bits when is_double is false, meaning that adding a 802.1Q VLAN will disable double VLAN mode: $ ip link add link eth0 name eth0.200 type vlan id 200 protocol 802.1ad $ ip link add link eth0 name eth0.100 type vlan id 100 # Double VLAN bits no longer set - vlan_update_hash() sets these bits and only clears them when the last VLAN has been removed, so double VLAN mode remains enabled even after all 802.1AD VLANs are removed. Address both issues by tracking the number of active 802.1AD VLANs in priv->num_double_vlans. Pass this count to stmmac_vlan_update() so both implementations correctly set the double VLAN bits when any 802.1AD VLAN is active, and clear them only when none remain. Also update vlan_update_hash() to explicitly clear the double VLAN bits when is_double is false, matching the dwxgmac2 behavior. Signed-off-by: Ovidiu Panait Link: https://patch.msgid.link/20260303145828.7845-3-ovidiu.panait.rb@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++++++++---- .../net/ethernet/stmicro/stmmac/stmmac_vlan.c | 8 ++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 51c96a738151..33667a26708c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -323,6 +323,7 @@ struct stmmac_priv { void __iomem *ptpaddr; void __iomem *estaddr; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned int num_double_vlans; int sfty_irq; int sfty_ce_irq; int sfty_ue_irq; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b6a127316ab5..01d181c2c68e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6775,6 +6775,7 @@ static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double) static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int num_double_vlans; bool is_double = false; int ret; @@ -6786,7 +6787,8 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid is_double = true; set_bit(vid, priv->active_vlans); - ret = stmmac_vlan_update(priv, is_double); + num_double_vlans = priv->num_double_vlans + is_double; + ret = stmmac_vlan_update(priv, num_double_vlans); if (ret) { clear_bit(vid, priv->active_vlans); goto err_pm_put; @@ -6796,11 +6798,13 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); if (ret) { clear_bit(vid, priv->active_vlans); - stmmac_vlan_update(priv, is_double); + stmmac_vlan_update(priv, priv->num_double_vlans); goto err_pm_put; } } + priv->num_double_vlans = num_double_vlans; + err_pm_put: pm_runtime_put(priv->device); @@ -6813,6 +6817,7 @@ err_pm_put: static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int num_double_vlans; bool is_double = false; int ret; @@ -6824,7 +6829,8 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi is_double = true; clear_bit(vid, priv->active_vlans); - ret = stmmac_vlan_update(priv, is_double); + num_double_vlans = priv->num_double_vlans - is_double; + ret = stmmac_vlan_update(priv, num_double_vlans); if (ret) { set_bit(vid, priv->active_vlans); goto del_vlan_error; @@ -6834,11 +6840,13 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); if (ret) { set_bit(vid, priv->active_vlans); - stmmac_vlan_update(priv, is_double); + stmmac_vlan_update(priv, priv->num_double_vlans); goto del_vlan_error; } } + priv->num_double_vlans = num_double_vlans; + del_vlan_error: pm_runtime_put(priv->device); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index b18404dd5a8b..de1a70e1c86e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -183,6 +183,10 @@ static void vlan_update_hash(struct mac_device_info *hw, u32 hash, value |= VLAN_EDVLP; value |= VLAN_ESVL; value |= VLAN_DOVLTC; + } else { + value &= ~VLAN_EDVLP; + value &= ~VLAN_ESVL; + value &= ~VLAN_DOVLTC; } writel(value, ioaddr + VLAN_TAG); @@ -193,6 +197,10 @@ static void vlan_update_hash(struct mac_device_info *hw, u32 hash, value |= VLAN_EDVLP; value |= VLAN_ESVL; value |= VLAN_DOVLTC; + } else { + value &= ~VLAN_EDVLP; + value &= ~VLAN_ESVL; + value &= ~VLAN_DOVLTC; } writel(value | perfect_match, ioaddr + VLAN_TAG); From bd7ad51253a76fb35886d01cfe9a37f0e4ed6709 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Tue, 3 Mar 2026 14:58:27 +0000 Subject: [PATCH 759/828] net: stmmac: Fix VLAN HW state restore When the network interface is opened or resumed, a DMA reset is performed, which resets all hardware state, including VLAN state. Currently, only the resume path is restoring the VLAN state via stmmac_restore_hw_vlan_rx_fltr(), but that is incomplete: the VLAN hash table and the VLAN_TAG control bits are not restored. Therefore, add stmmac_vlan_restore(), which restores the full VLAN state by updating both the HW filter entries and the hash table, and call it from both the open and resume paths. The VLAN restore is moved outside of phylink_rx_clk_stop_block/unblock in the resume path because receive clock stop is already disabled when stmmac supports VLAN. Also, remove the hash readback code in vlan_restore_hw_rx_fltr() that attempts to restore VTHM by reading VLAN_HASH_TABLE, as it always reads zero after DMA reset, making it dead code. Fixes: 3cd1cfcba26e ("net: stmmac: Implement VLAN Hash Filtering in XGMAC") Fixes: ed64639bc1e0 ("net: stmmac: Add support for VLAN Rx filtering") Signed-off-by: Ovidiu Panait Link: https://patch.msgid.link/20260303145828.7845-4-ovidiu.panait.rb@renesas.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 24 +++++++++++++++++-- .../net/ethernet/stmicro/stmmac/stmmac_vlan.c | 10 -------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 01d181c2c68e..6d4ce35e990f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -156,6 +156,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue); static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan); +static int stmmac_vlan_restore(struct stmmac_priv *priv); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -4107,6 +4108,8 @@ static int __stmmac_open(struct net_device *dev, phylink_start(priv->phylink); + stmmac_vlan_restore(priv); + ret = stmmac_request_irq(dev); if (ret) goto irq_error; @@ -6853,6 +6856,23 @@ del_vlan_error: return ret; } +static int stmmac_vlan_restore(struct stmmac_priv *priv) +{ + int ret; + + if (!(priv->dev->features & NETIF_F_VLAN_FEATURES)) + return 0; + + if (priv->hw->num_vlan) + stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw); + + ret = stmmac_vlan_update(priv, priv->num_double_vlans); + if (ret) + netdev_err(priv->dev, "Failed to restore VLANs\n"); + + return ret; +} + static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct stmmac_priv *priv = netdev_priv(dev); @@ -8277,10 +8297,10 @@ int stmmac_resume(struct device *dev) stmmac_init_coalesce(priv); phylink_rx_clk_stop_block(priv->phylink); stmmac_set_rx_mode(ndev); - - stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw); phylink_rx_clk_stop_unblock(priv->phylink); + stmmac_vlan_restore(priv); + stmmac_enable_all_queues(priv); stmmac_enable_all_dma_irq(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index de1a70e1c86e..fcc34867405e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -139,9 +139,6 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev, static void vlan_restore_hw_rx_fltr(struct net_device *dev, struct mac_device_info *hw) { - void __iomem *ioaddr = hw->pcsr; - u32 value; - u32 hash; u32 val; int i; @@ -158,13 +155,6 @@ static void vlan_restore_hw_rx_fltr(struct net_device *dev, vlan_write_filter(dev, hw, i, val); } } - - hash = readl(ioaddr + VLAN_HASH_TABLE); - if (hash & VLAN_VLHT) { - value = readl(ioaddr + VLAN_TAG); - value |= VLAN_VTHM; - writel(value, ioaddr + VLAN_TAG); - } } static void vlan_update_hash(struct mac_device_info *hw, u32 hash, From 2cd70e3968f505996d5fefdf7ca684f0f4575734 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Tue, 3 Mar 2026 14:58:28 +0000 Subject: [PATCH 760/828] net: stmmac: Defer VLAN HW configuration when interface is down VLAN register accesses on the MAC side require the PHY RX clock to be active. When the network interface is down, the PHY is suspended and the RX clock is unavailable, causing VLAN operations to fail with timeouts. The VLAN core automatically removes VID 0 after the interface goes down and re-adds it when it comes back up, so these timeouts happen during normal interface down/up: # ip link set end1 down renesas-gbeth 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter renesas-gbeth 15c40000.ethernet end1: failed to kill vid 0081/0 Adding VLANs while the interface is down also fails: # ip link add link end1 name end1.10 type vlan id 10 renesas-gbeth 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter RTNETLINK answers: Device or resource busy To fix this, check if the interface is up before accessing VLAN registers. The software state is always kept up to date regardless of interface state. When the interface is brought up, stmmac_vlan_restore() is called to write the VLAN state to hardware. Fixes: ed64639bc1e0 ("net: stmmac: Add support for VLAN Rx filtering") Signed-off-by: Ovidiu Panait Link: https://patch.msgid.link/20260303145828.7845-5-ovidiu.panait.rb@renesas.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++ .../net/ethernet/stmicro/stmmac/stmmac_vlan.c | 42 ++++++++++--------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6d4ce35e990f..6827c99bde8c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6769,6 +6769,9 @@ static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double) hash = 0; } + if (!netif_running(priv->dev)) + return 0; + return stmmac_update_vlan_hash(priv, priv->hw, hash, pmatch, is_double); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index fcc34867405e..e24efe3bfedb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -76,7 +76,9 @@ static int vlan_add_hw_rx_fltr(struct net_device *dev, } hw->vlan_filter[0] = vid; - vlan_write_single(dev, vid); + + if (netif_running(dev)) + vlan_write_single(dev, vid); return 0; } @@ -97,12 +99,15 @@ static int vlan_add_hw_rx_fltr(struct net_device *dev, return -EPERM; } - ret = vlan_write_filter(dev, hw, index, val); + if (netif_running(dev)) { + ret = vlan_write_filter(dev, hw, index, val); + if (ret) + return ret; + } - if (!ret) - hw->vlan_filter[index] = val; + hw->vlan_filter[index] = val; - return ret; + return 0; } static int vlan_del_hw_rx_fltr(struct net_device *dev, @@ -115,7 +120,9 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev, if (hw->num_vlan == 1) { if ((hw->vlan_filter[0] & VLAN_TAG_VID) == vid) { hw->vlan_filter[0] = 0; - vlan_write_single(dev, 0); + + if (netif_running(dev)) + vlan_write_single(dev, 0); } return 0; } @@ -124,22 +131,23 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev, for (i = 0; i < hw->num_vlan; i++) { if ((hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) && ((hw->vlan_filter[i] & VLAN_TAG_DATA_VID) == vid)) { - ret = vlan_write_filter(dev, hw, i, 0); - if (!ret) - hw->vlan_filter[i] = 0; - else - return ret; + if (netif_running(dev)) { + ret = vlan_write_filter(dev, hw, i, 0); + if (ret) + return ret; + } + + hw->vlan_filter[i] = 0; } } - return ret; + return 0; } static void vlan_restore_hw_rx_fltr(struct net_device *dev, struct mac_device_info *hw) { - u32 val; int i; /* Single Rx VLAN Filter */ @@ -149,12 +157,8 @@ static void vlan_restore_hw_rx_fltr(struct net_device *dev, } /* Extended Rx VLAN Filter Enable */ - for (i = 0; i < hw->num_vlan; i++) { - if (hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) { - val = hw->vlan_filter[i]; - vlan_write_filter(dev, hw, i, val); - } - } + for (i = 0; i < hw->num_vlan; i++) + vlan_write_filter(dev, hw, i, hw->vlan_filter[i]); } static void vlan_update_hash(struct mac_device_info *hw, u32 hash, From 91d7e9df42598af28ca440b95b16a4e51a408771 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 4 Mar 2026 20:27:52 -0300 Subject: [PATCH 761/828] drm/ttm: Fix bo resource use-after-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When allocating a lot of buffers and putting the TTM under memory pressure, during swapout, it might crash the system with the stack trace below. It turns out that ttm_bo_swapout_cb might replace bo->resource when it moves it to system cached. When commit c06da4b3573a ("drm/ttm: Tidy usage of local variables a little bit") used a local variable for bo->resource, it used the freed resource later in the function, leading to a UAF. Move back to using bo->resource in all cases in that function instead of a local variable. [ 604.814275] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 604.814284] #PF: supervisor read access in kernel mode [ 604.814288] #PF: error_code(0x0000) - not-present page [ 604.814291] PGD 0 P4D 0 [ 604.814296] Oops: Oops: 0000 [#1] SMP NOPTI [ 604.814303] CPU: 2 UID: 0 PID: 4408 Comm: vulkan Tainted: G W 7.0.0-rc2-00001-gc50a051e6aca #21 PREEMPT(full) aef6eb0c02036a7c8a5e62e0c84a30c2be90688d [ 604.814309] Tainted: [W]=WARN [ 604.814311] Hardware name: Valve Jupiter/Jupiter, BIOS F7A0133 08/05/2024 [ 604.814314] RIP: 0010:ttm_resource_move_to_lru_tail+0x100/0x160 [ttm] [ 604.814329] Code: 5b 5d e9 83 b4 1b cb 48 63 d2 48 c1 e0 04 48 8b 4e 40 48 8d 7e 40 48 8b ac d3 d8 00 00 00 48 89 c3 48 8d 54 05 68 48 8b 46 48 <48> 3b 38 0f 85 b3 3b 00 00 48 3b 79 08 0f 85 a9 3b 00 00 48 89 41 [ 604.814332] RSP: 0018:ffffcfe54e3d7578 EFLAGS: 00010256 [ 604.814336] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8cf09eced300 [ 604.814339] RDX: 0000000000000068 RSI: ffff8cf1d4c1fc00 RDI: ffff8cf1d4c1fc40 [ 604.814341] RBP: 0000000000000000 R08: ffff8cf09eced300 R09: 0000000000000000 [ 604.814344] R10: 0000000000000000 R11: 0000000000000016 R12: ffff8cf1d4c1fc00 [ 604.814346] R13: 0000000000000400 R14: ffff8cf096289c00 R15: ffff8cf084c8f688 [ 604.814349] FS: 00007f00531b7780(0000) GS:ffff8cf4217a0000(0000) knlGS:0000000000000000 [ 604.814352] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 604.814355] CR2: 0000000000000000 CR3: 000000018e3df000 CR4: 0000000000350ef0 [ 604.814358] Call Trace: [ 604.814362] [ 604.814368] ttm_bo_swapout_cb+0x24c/0x280 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814380] ttm_lru_walk_for_evict+0xac/0x1d0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814394] ttm_bo_swapout+0x5b/0x80 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814405] ttm_global_swapout+0x63/0x100 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814415] ttm_tt_populate+0x82/0x130 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814424] ttm_bo_populate+0x37/0xa0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814433] ttm_bo_handle_move_mem+0x157/0x170 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814443] ttm_bo_validate+0xd9/0x180 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814453] ttm_bo_init_reserved+0xa0/0x1b0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.814461] ? srso_return_thunk+0x5/0x5f [ 604.814469] amdgpu_bo_create+0x1f5/0x500 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.814855] ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.815182] amdgpu_bo_create_user+0x3d/0x70 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.815504] amdgpu_gem_create_ioctl+0x16c/0x3b0 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.815830] ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.816155] ? __pfx_amdgpu_gem_create_ioctl+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.816478] drm_ioctl_kernel+0xae/0x100 [ 604.816486] drm_ioctl+0x283/0x510 [ 604.816491] ? __pfx_amdgpu_gem_create_ioctl+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.816819] amdgpu_drm_ioctl+0x4a/0x80 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.817135] __x64_sys_ioctl+0x96/0xe0 [ 604.817142] do_syscall_64+0x11b/0x7e0 [ 604.817148] ? srso_return_thunk+0x5/0x5f [ 604.817152] ? srso_return_thunk+0x5/0x5f [ 604.817156] ? walk_system_ram_range+0xb0/0x110 [ 604.817161] ? srso_return_thunk+0x5/0x5f [ 604.817165] ? __pte_offset_map+0x1b/0xb0 [ 604.817170] ? srso_return_thunk+0x5/0x5f [ 604.817174] ? pte_offset_map_lock+0x87/0xf0 [ 604.817179] ? srso_return_thunk+0x5/0x5f [ 604.817183] ? insert_pfn+0x9f/0x1f0 [ 604.817188] ? srso_return_thunk+0x5/0x5f [ 604.817192] ? vmf_insert_pfn_prot+0x97/0x190 [ 604.817197] ? srso_return_thunk+0x5/0x5f [ 604.817201] ? ttm_bo_vm_fault_reserved+0x1a6/0x3f0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302] [ 604.817213] ? srso_return_thunk+0x5/0x5f [ 604.817217] ? amdgpu_gem_fault+0xe2/0x100 [amdgpu 361516226706227f4403914dbfdd3f90996136ca] [ 604.817542] ? srso_return_thunk+0x5/0x5f [ 604.817546] ? __do_fault+0x33/0x180 [ 604.817550] ? srso_return_thunk+0x5/0x5f [ 604.817554] ? do_fault+0x178/0x610 [ 604.817559] ? srso_return_thunk+0x5/0x5f [ 604.817562] ? __handle_mm_fault+0x9be/0x1120 [ 604.817567] ? srso_return_thunk+0x5/0x5f [ 604.817574] ? srso_return_thunk+0x5/0x5f [ 604.817578] ? count_memcg_events+0xc4/0x160 [ 604.817583] ? srso_return_thunk+0x5/0x5f [ 604.817587] ? handle_mm_fault+0x1d7/0x2e0 [ 604.817593] ? srso_return_thunk+0x5/0x5f [ 604.817596] ? do_user_addr_fault+0x173/0x660 [ 604.817602] ? srso_return_thunk+0x5/0x5f [ 604.817607] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 604.817612] RIP: 0033:0x7f00532cef4d [ 604.817617] Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00 [ 604.817620] RSP: 002b:00007ffd69ab0650 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 604.817624] RAX: ffffffffffffffda RBX: 00007ffd69ab07d0 RCX: 00007f00532cef4d [ 604.817627] RDX: 00007ffd69ab0700 RSI: 00000000c0206440 RDI: 0000000000000005 [ 604.817629] RBP: 00007ffd69ab06a0 R08: 00007f00533a0ac0 R09: 0000000000000000 [ 604.817632] R10: 00007ffd69ab07c0 R11: 0000000000000246 R12: 00007ffd69ab0700 [ 604.817634] R13: 00000000c0206440 R14: 0000000000000005 R15: 0000000000000243 [ 604.817642] Cc: Tvrtko Ursulin Cc: Christian König Fixes: c06da4b3573a ("drm/ttm: Tidy usage of local variables a little bit") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20260304-ttm_bo_res_uaf-v1-1-43f20125b67f@igalia.com --- drivers/gpu/drm/ttm/ttm_bo.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index acb9197db879..0765d69423d2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1107,8 +1107,7 @@ struct ttm_bo_swapout_walk { static s64 ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) { - struct ttm_resource *res = bo->resource; - struct ttm_place place = { .mem_type = res->mem_type }; + struct ttm_place place = { .mem_type = bo->resource->mem_type }; struct ttm_bo_swapout_walk *swapout_walk = container_of(walk, typeof(*swapout_walk), walk); struct ttm_operation_ctx *ctx = walk->arg.ctx; @@ -1148,7 +1147,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) /* * Move to system cached */ - if (res->mem_type != TTM_PL_SYSTEM) { + if (bo->resource->mem_type != TTM_PL_SYSTEM) { struct ttm_resource *evict_mem; struct ttm_place hop; @@ -1180,15 +1179,15 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) if (ttm_tt_is_populated(tt)) { spin_lock(&bdev->lru_lock); - ttm_resource_del_bulk_move(res, bo); + ttm_resource_del_bulk_move(bo->resource, bo); spin_unlock(&bdev->lru_lock); ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags); spin_lock(&bdev->lru_lock); if (ret) - ttm_resource_add_bulk_move(res, bo); - ttm_resource_move_to_lru_tail(res); + ttm_resource_add_bulk_move(bo->resource, bo); + ttm_resource_move_to_lru_tail(bo->resource); spin_unlock(&bdev->lru_lock); } From ce8ee8583ed83122405eabaa8fb351be4d9dc65c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 5 Mar 2026 11:15:50 +0800 Subject: [PATCH 762/828] block: use trylock to avoid lockdep circular dependency in sysfs Use trylock instead of blocking lock acquisition for update_nr_hwq_lock in queue_requests_store() and elv_iosched_store() to avoid circular lock dependency with kernfs active reference during concurrent disk deletion: update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) kn->active -> update_nr_hwq_lock (via sysfs write path) Return -EBUSY when the lock is not immediately available. Reported-and-tested-by: Yi Zhang Closes: https://lore.kernel.org/linux-block/CAHj4cs-em-4acsHabMdT=jJhXkCzjnprD-aQH1OgrZo4nTnmMw@mail.gmail.com/ Fixes: 626ff4f8ebcb ("blk-mq: convert to serialize updating nr_requests with update_nr_hwq_lock") Signed-off-by: Ming Lei Tested-by: Yi Zhang Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 8 +++++++- block/elevator.c | 12 +++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f3b1968c80ce..55a1bbfef7d4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -78,8 +78,14 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count) /* * Serialize updating nr_requests with concurrent queue_requests_store() * and switching elevator. + * + * Use trylock to avoid circular lock dependency with kernfs active + * reference during concurrent disk deletion: + * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) + * kn->active -> update_nr_hwq_lock (via this sysfs write path) */ - down_write(&set->update_nr_hwq_lock); + if (!down_write_trylock(&set->update_nr_hwq_lock)) + return -EBUSY; if (nr == q->nr_requests) goto unlock; diff --git a/block/elevator.c b/block/elevator.c index ebe2a1fcf011..3bcd37c2aa34 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -807,7 +807,16 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, elv_iosched_load_module(ctx.name); ctx.type = elevator_find_get(ctx.name); - down_read(&set->update_nr_hwq_lock); + /* + * Use trylock to avoid circular lock dependency with kernfs active + * reference during concurrent disk deletion: + * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) + * kn->active -> update_nr_hwq_lock (via this sysfs write path) + */ + if (!down_read_trylock(&set->update_nr_hwq_lock)) { + ret = -EBUSY; + goto out; + } if (!blk_queue_no_elv_switch(q)) { ret = elevator_change(q, &ctx); if (!ret) @@ -817,6 +826,7 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, } up_read(&set->update_nr_hwq_lock); +out: if (ctx.type) elevator_put(ctx.type); return ret; From b824c3e16c1904bf80df489e293d1e3cbf98896d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 2 Mar 2026 17:26:31 +0100 Subject: [PATCH 763/828] net: Provide a PREEMPT_RT specific check for netdev_queue::_xmit_lock After acquiring netdev_queue::_xmit_lock the number of the CPU owning the lock is recorded in netdev_queue::xmit_lock_owner. This works as long as the BH context is not preemptible. On PREEMPT_RT the softirq context is preemptible and without the softirq-lock it is possible to have multiple user in __dev_queue_xmit() submitting a skb on the same CPU. This is fine in general but this means also that the current CPU is recorded as netdev_queue::xmit_lock_owner. This in turn leads to the recursion alert and the skb is dropped. Instead checking the for CPU number, that owns the lock, PREEMPT_RT can check if the lockowner matches the current task. Add netif_tx_owned() which returns true if the current context owns the lock by comparing the provided CPU number with the recorded number. This resembles the current check by negating the condition (the current check returns true if the lock is not owned). On PREEMPT_RT use rt_mutex_owner() to return the lock owner and compare the current task against it. Use the new helper in __dev_queue_xmit() and netif_local_xmit_active() which provides a similar check. Update comments regarding pairing READ_ONCE(). Reported-by: Bert Karwatzki Closes: https://lore.kernel.org/all/20260216134333.412332-1-spasswolf@web.de Fixes: 3253cb49cbad4 ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT") Signed-off-by: Sebastian Andrzej Siewior Reported-by: Bert Karwatzki Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20260302162631.uGUyIqDT@linutronix.de Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 27 ++++++++++++++++++++++----- net/core/dev.c | 5 +---- net/core/netpoll.c | 2 +- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4e6e00bb90a..67e25f6d15a4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4711,7 +4711,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, cpu); } @@ -4729,7 +4729,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } @@ -4738,7 +4738,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } return ok; @@ -4746,14 +4746,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) static inline void __netif_tx_unlock(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } @@ -4846,6 +4846,23 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_enable(); } +#ifndef CONFIG_PREEMPT_RT +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + return READ_ONCE(txq->xmit_lock_owner) == cpu; +} + +#else +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + return rt_mutex_owner(&txq->_xmit_lock.lock) == current; +} + +#endif + static inline void netif_addr_lock(struct net_device *dev) { unsigned char nest_level = 0; diff --git a/net/core/dev.c b/net/core/dev.c index 20610a192ec7..14a83f2035b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4818,10 +4818,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - /* Other cpus might concurrently change txq->xmit_lock_owner - * to -1 or to their cpu id, but not to our id. - */ - if (READ_ONCE(txq->xmit_lock_owner) != cpu) { + if (!netif_tx_owned(txq, cpu)) { bool is_list = false; if (dev_xmit_recursion()) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a8558a52884f..cd74beffd209 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -132,7 +132,7 @@ static int netif_local_xmit_active(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) + if (netif_tx_owned(txq, smp_processor_id())) return 1; } From def602e498a4f951da95c95b1b8ce8ae68aa733a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Mar 2026 23:12:37 +0100 Subject: [PATCH 764/828] netfilter: nf_tables: unconditionally bump set->nelems before insertion In case that the set is full, a new element gets published then removed without waiting for the RCU grace period, while RCU reader can be walking over it already. To address this issue, add the element transaction even if set is full, but toggle the set_full flag to report -ENFILE so the abort path safely unwinds the set to its previous state. As for element updates, decrement set->nelems to restore it. A simpler fix is to call synchronize_rcu() in the error path. However, with a large batch adding elements to already maxed-out set, this could cause noticeable slowdown of such batches. Fixes: 35d0ac9070ef ("netfilter: nf_tables: fix set->nelems counting with no NLM_F_EXCL") Reported-by: Inseo An Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fd7f7e4e2a43..df67932d3e09 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7170,6 +7170,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; + bool set_full = false; u64 expiration; u64 timeout; int err, i; @@ -7461,10 +7462,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_elem_free; + if (!(flags & NFT_SET_ELEM_CATCHALL)) { + unsigned int max = nft_set_maxsize(set), nelems; + + nelems = atomic_inc_return(&set->nelems); + if (nelems > max) + set_full = true; + } + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) { err = -ENOMEM; - goto err_elem_free; + goto err_set_size; } ext->genmask = nft_genmask_cur(ctx->net); @@ -7516,7 +7525,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ue->priv = elem_priv; nft_trans_commit_list_add_elem(ctx->net, trans); - goto err_elem_free; + goto err_set_size; } } } @@ -7534,23 +7543,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_element_clash; } - if (!(flags & NFT_SET_ELEM_CATCHALL)) { - unsigned int max = nft_set_maxsize(set); - - if (!atomic_add_unless(&set->nelems, 1, max)) { - err = -ENFILE; - goto err_set_full; - } - } - nft_trans_container_elem(trans)->elems[0].priv = elem.priv; nft_trans_commit_list_add_elem(ctx->net, trans); - return 0; -err_set_full: - nft_setelem_remove(ctx->net, set, elem.priv); + return set_full ? -ENFILE : 0; + err_element_clash: kfree(trans); +err_set_size: + if (!(flags & NFT_SET_ELEM_CATCHALL)) + atomic_dec(&set->nelems); err_elem_free: nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: From fb7fb4016300ac622c964069e286dc83166a5d52 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Mar 2026 23:28:15 +0100 Subject: [PATCH 765/828] netfilter: nf_tables: clone set on flush only Syzbot with fault injection triggered a failing memory allocation with GFP_KERNEL which results in a WARN splat: iter.err WARNING: net/netfilter/nf_tables_api.c:845 at nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845, CPU#0: syz.0.17/5992 Modules linked in: CPU: 0 UID: 0 PID: 5992 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2026 RIP: 0010:nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845 Code: 8b 05 86 5a 4e 09 48 3b 84 24 a0 00 00 00 75 62 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc cc e8 63 6d fa f7 90 <0f> 0b 90 43 +80 7c 35 00 00 0f 85 23 fe ff ff e9 26 fe ff ff 89 d9 RSP: 0018:ffffc900045af780 EFLAGS: 00010293 RAX: ffffffff89ca45bd RBX: 00000000fffffff4 RCX: ffff888028111e40 RDX: 0000000000000000 RSI: 00000000fffffff4 RDI: 0000000000000000 RBP: ffffc900045af870 R08: 0000000000400dc0 R09: 00000000ffffffff R10: dffffc0000000000 R11: fffffbfff1d141db R12: ffffc900045af7e0 R13: 1ffff920008b5f24 R14: dffffc0000000000 R15: ffffc900045af920 FS: 000055557a6a5500(0000) GS:ffff888125496000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb5ea271fc0 CR3: 000000003269e000 CR4: 00000000003526f0 Call Trace: __nft_release_table+0xceb/0x11f0 net/netfilter/nf_tables_api.c:12115 nft_rcv_nl_event+0xc25/0xdb0 net/netfilter/nf_tables_api.c:12187 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85 blocking_notifier_call_chain+0x6a/0x90 kernel/notifier.c:380 netlink_release+0x123b/0x1ad0 net/netlink/af_netlink.c:761 __sock_release net/socket.c:662 [inline] sock_close+0xc3/0x240 net/socket.c:1455 Restrict set clone to the flush set command in the preparation phase. Add NFT_ITER_UPDATE_CLONE and use it for this purpose, update the rbtree and pipapo backends to only clone the set when this iteration type is used. As for the existing NFT_ITER_UPDATE type, update the pipapo backend to use the existing set clone if available, otherwise use the existing set representation. After this update, there is no need to clone a set that is being deleted, this includes bound anonymous set. An alternative approach to NFT_ITER_UPDATE_CLONE is to add a .clone interface and call it from the flush set path. Reported-by: syzbot+4924a0edc148e8b4b342@syzkaller.appspotmail.com Fixes: 3f1d886cc7c3 ("netfilter: nft_set_pipapo: move cloning of match info to insert/removal path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 10 +++++++++- net/netfilter/nft_set_hash.c | 1 + net/netfilter/nft_set_pipapo.c | 11 +++++++++-- net/netfilter/nft_set_rbtree.c | 8 +++++--- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 426534a711b0..ea6f29ad7888 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -320,11 +320,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state + * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element */ enum nft_iter_type { NFT_ITER_UNSPEC, NFT_ITER_READ, NFT_ITER_UPDATE, + NFT_ITER_UPDATE_CLONE, }; struct nft_set; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index df67932d3e09..058f7004cb2b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -833,6 +833,11 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, } } +/* Use NFT_ITER_UPDATE iterator even if this may be called from the preparation + * phase, the set clone might already exist from a previous command, or it might + * be a set that is going away and does not require a clone. The netns and + * netlink release paths also need to work on the live set. + */ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter iter = { @@ -7903,9 +7908,12 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) { + /* The set backend might need to clone the set, do it now from the + * preparation phase, use NFT_ITER_UPDATE_CLONE iterator type. + */ struct nft_set_iter iter = { .genmask = genmask, - .type = NFT_ITER_UPDATE, + .type = NFT_ITER_UPDATE_CLONE, .fn = nft_setelem_flush, }; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 739b992bde59..b0e571c8e3f3 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -374,6 +374,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, { switch (iter->type) { case NFT_ITER_UPDATE: + case NFT_ITER_UPDATE_CLONE: /* only relevant for netlink dumps which use READ type */ WARN_ON_ONCE(iter->skip != 0); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7ef4b44471d3..c091898df710 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2144,13 +2144,20 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_pipapo_match *m; switch (iter->type) { - case NFT_ITER_UPDATE: + case NFT_ITER_UPDATE_CLONE: m = pipapo_maybe_clone(set); if (!m) { iter->err = -ENOMEM; return; } - + nft_pipapo_do_walk(ctx, set, m, iter); + break; + case NFT_ITER_UPDATE: + if (priv->clone) + m = priv->clone; + else + m = rcu_dereference_protected(priv->match, + nft_pipapo_transaction_mutex_held(set)); nft_pipapo_do_walk(ctx, set, m, iter); break; case NFT_ITER_READ: diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3f02e4478216..ee3d4f5b9ff7 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -861,13 +861,15 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree *priv = nft_set_priv(set); switch (iter->type) { - case NFT_ITER_UPDATE: - lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); - + case NFT_ITER_UPDATE_CLONE: if (nft_array_may_resize(set) < 0) { iter->err = -ENOMEM; break; } + fallthrough; + case NFT_ITER_UPDATE: + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); + nft_rbtree_do_walk(ctx, set, iter); break; case NFT_ITER_READ: From 9df95785d3d8302f7c066050117b04cd3c2048c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 Mar 2026 16:31:32 +0100 Subject: [PATCH 766/828] netfilter: nft_set_pipapo: split gc into unlink and reclaim phase Yiming Qian reports Use-after-free in the pipapo set type: Under a large number of expired elements, commit-time GC can run for a very long time in a non-preemptible context, triggering soft lockup warnings and RCU stall reports (local denial of service). We must split GC in an unlink and a reclaim phase. We cannot queue elements for freeing until pointers have been swapped. Expired elements are still exposed to both the packet path and userspace dumpers via the live copy of the data structure. call_rcu() does not protect us: dump operations or element lookups starting after call_rcu has fired can still observe the free'd element, unless the commit phase has made enough progress to swap the clone and live pointers before any new reader has picked up the old version. This a similar approach as done recently for the rbtree backend in commit 35f83a75529a ("netfilter: nft_set_rbtree: don't gc elements on insert"). Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Yiming Qian Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 5 +++ net/netfilter/nf_tables_api.c | 5 --- net/netfilter/nft_set_pipapo.c | 51 ++++++++++++++++++++++++++----- net/netfilter/nft_set_pipapo.h | 2 ++ 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ea6f29ad7888..e2d2bfc1f989 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1863,6 +1863,11 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + static inline void nft_ctx_update(struct nft_ctx *ctx, const struct nft_trans *trans) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 058f7004cb2b..1862bd7fe804 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10493,11 +10493,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) schedule_work(&trans_gc_work); } -static int nft_trans_gc_space(struct nft_trans_gc *trans) -{ - return NFT_TRANS_GC_BATCHCOUNT - trans->count; -} - struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index c091898df710..a34632ae6048 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1680,11 +1680,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, } /** - * pipapo_gc() - Drop expired entries from set, destroy start and end elements + * pipapo_gc_scan() - Drop expired entries from set and link them to gc list * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) +static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); @@ -1697,6 +1697,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) if (!gc) return; + list_add(&gc->list, &priv->gc_head); + while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const struct nft_pipapo_field *f; @@ -1724,9 +1726,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) * NFT_SET_ELEM_DEAD_BIT. */ if (__nft_set_elem_expired(&e->ext, tstamp)) { - gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); - if (!gc) - return; + if (!nft_trans_gc_space(gc)) { + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (!gc) + return; + + list_add(&gc->list, &priv->gc_head); + } nft_pipapo_gc_deactivate(net, set, e); pipapo_drop(m, rulemap); @@ -1740,10 +1746,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) } } - gc = nft_trans_gc_catchall_sync(gc); + priv->last_gc = jiffies; +} + +/** + * pipapo_gc_queue() - Free expired elements + * @set: nftables API set representation + */ +static void pipapo_gc_queue(struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_trans_gc *gc, *next; + + /* always do a catchall cycle: */ + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (gc) { + gc = nft_trans_gc_catchall_sync(gc); + if (gc) + nft_trans_gc_queue_sync_done(gc); + } + + /* always purge queued gc elements. */ + list_for_each_entry_safe(gc, next, &priv->gc_head, list) { + list_del(&gc->list); nft_trans_gc_queue_sync_done(gc); - priv->last_gc = jiffies; } } @@ -1797,6 +1823,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * * We also need to create a new working copy for subsequent insertions and * deletions. + * + * After the live copy has been replaced by the clone, we can safely queue + * expired elements that have been collected by pipapo_gc_scan() for + * memory reclaim. */ static void nft_pipapo_commit(struct nft_set *set) { @@ -1807,7 +1837,7 @@ static void nft_pipapo_commit(struct nft_set *set) return; if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) - pipapo_gc(set, priv->clone); + pipapo_gc_scan(set, priv->clone); old = rcu_replace_pointer(priv->match, priv->clone, nft_pipapo_transaction_mutex_held(set)); @@ -1815,6 +1845,8 @@ static void nft_pipapo_commit(struct nft_set *set) if (old) call_rcu(&old->rcu, pipapo_reclaim_match); + + pipapo_gc_queue(set); } static void nft_pipapo_abort(const struct nft_set *set) @@ -2279,6 +2311,7 @@ static int nft_pipapo_init(const struct nft_set *set, f->mt = NULL; } + INIT_LIST_HEAD(&priv->gc_head); rcu_assign_pointer(priv->match, m); return 0; @@ -2328,6 +2361,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; + WARN_ON_ONCE(!list_empty(&priv->gc_head)); + m = rcu_dereference_protected(priv->match, true); if (priv->clone) { diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index eaab422aa56a..9aee9a9eaeb7 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -156,12 +156,14 @@ struct nft_pipapo_match { * @clone: Copy where pending insertions and deletions are kept * @width: Total bytes to be matched for one packet, including padding * @last_gc: Timestamp of last garbage collection run, jiffies + * @gc_head: list of nft_trans_gc to queue up for mem reclaim */ struct nft_pipapo { struct nft_pipapo_match __rcu *match; struct nft_pipapo_match *clone; int width; unsigned long last_gc; + struct list_head gc_head; }; struct nft_pipapo_elem; From 3d543d9515928e4754a741c338dbcdf68ac47e39 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 5 Mar 2026 12:18:10 +0100 Subject: [PATCH 767/828] ALSA: us122l: drop redundant interface references Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Similarly, USB core holds a reference to all interfaces in the active configuration so there is no need for a driver to take a reference to a sibling interface only to release it at disconnect either. Drop the redundant references to reduce cargo culting, make it easier to spot drivers where extra references are needed, and reduce the risk of memory leaks when drivers fail to release them. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260305111810.18688-1-johan@kernel.org Signed-off-by: Takashi Iwai --- sound/usb/usx2y/us122l.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index 011ea96e9779..f00b53346abd 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c @@ -520,8 +520,6 @@ static int us122l_usb_probe(struct usb_interface *intf, return err; } - usb_get_intf(usb_ifnum_to_if(device, 0)); - usb_get_dev(device); *cardp = card; return 0; } @@ -542,11 +540,9 @@ static int snd_us122l_probe(struct usb_interface *intf, if (intf->cur_altsetting->desc.bInterfaceNumber != 1) return 0; - err = us122l_usb_probe(usb_get_intf(intf), id, &card); - if (err < 0) { - usb_put_intf(intf); + err = us122l_usb_probe(intf, id, &card); + if (err < 0) return err; - } usb_set_intfdata(intf, card); return 0; @@ -574,10 +570,6 @@ static void snd_us122l_disconnect(struct usb_interface *intf) snd_usbmidi_disconnect(p); } - usb_put_intf(usb_ifnum_to_if(us122l->dev, 0)); - usb_put_intf(usb_ifnum_to_if(us122l->dev, 1)); - usb_put_dev(us122l->dev); - snd_card_free_when_closed(card); } From ee8ade4d9678a456bb5ea675c270738b250eda68 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 5 Mar 2026 12:37:34 +0100 Subject: [PATCH 768/828] Revert "drm/syncobj: Fix handle <-> fd ioctls with dirty stack" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2e3649e237237258a08d75afef96648dd2b379f7. The problem occurs when userspace is compiled against new headers with new members, but don't correctly initialise those new members. This is not a kernel problem, and should be fixed in userspace by correctly zero'ing all members. Cc: Rob Clark Cc: Julian Orth Cc: Christian König Cc: Michel Dänzer Reviewed-by: Christian König Acked-by: Julian Orth Link: https://patch.msgid.link/20260305113734.1309238-1-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_syncobj.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 49eccb43ce63..250734dee928 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -875,7 +875,7 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, return drm_syncobj_export_sync_file(file_private, args->handle, point, &args->fd); - if (point) + if (args->point) return -EINVAL; return drm_syncobj_handle_to_fd(file_private, args->handle, @@ -909,7 +909,7 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, args->handle, point); - if (point) + if (args->point) return -EINVAL; return drm_syncobj_fd_to_handle(file_private, args->fd, From 0abc73c8a40fd64ac1739c90bb4f42c418d27a5e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 3 Mar 2026 18:56:39 +0100 Subject: [PATCH 769/828] net: ethernet: mtk_eth_soc: Reset prog ptr to old_prog in case of error in mtk_xdp_setup() Reset eBPF program pointer to old_prog and do not decrease its ref-count if mtk_open routine in mtk_xdp_setup() fails. Fixes: 7c26c20da5d42 ("net: ethernet: mtk_eth_soc: add basic XDP support") Suggested-by: Paolo Valerio Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260303-mtk-xdp-prog-ptr-fix-v2-1-97b6dbbe240f@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e5e2ffa9c542..ddc321a02fda 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3748,12 +3748,21 @@ static int mtk_xdp_setup(struct net_device *dev, struct bpf_prog *prog, mtk_stop(dev); old_prog = rcu_replace_pointer(eth->prog, prog, lockdep_rtnl_is_held()); + + if (netif_running(dev) && need_update) { + int err; + + err = mtk_open(dev); + if (err) { + rcu_assign_pointer(eth->prog, old_prog); + + return err; + } + } + if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev) && need_update) - return mtk_open(dev); - return 0; } From f26b098d937488e8f5c617d465760a10bfcc7f13 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Thu, 5 Mar 2026 10:31:17 +0100 Subject: [PATCH 770/828] ftrace: Add MAINTAINERS entries for all ftrace headers There is currently no entry for ftrace_irq.h and ftrace_regs.h. Add a generic entry for all *ftrace* headers to include them and prevent overlooking future ftrace headers. Cc: Masami Hiramatsu Cc: Mark Rutland Link: https://patch.msgid.link/20260305093117.853700-1-jmarchan@redhat.com Signed-off-by: Jerome Marchand Signed-off-by: Steven Rostedt (Google) --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 61bf550fd37c..b8d1ad952827 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10484,7 +10484,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git F: Documentation/trace/ftrace* F: arch/*/*/*/*ftrace* F: arch/*/*/*ftrace* -F: include/*/ftrace.h +F: include/*/*ftrace* F: kernel/trace/fgraph.c F: kernel/trace/ftrace* F: samples/ftrace From 6be2681514261324c8ee8a1c6f76cefdf700220f Mon Sep 17 00:00:00 2001 From: Sun Jian Date: Wed, 25 Feb 2026 19:14:50 +0800 Subject: [PATCH 771/828] selftests/harness: order TEST_F and XFAIL_ADD constructors TEST_F() allocates and registers its struct __test_metadata via mmap() inside its constructor, and only then assigns the _##fixture_##test##_object pointer. XFAIL_ADD() runs in a constructor too and reads _##fixture_##test##_object to initialize xfail->test. If XFAIL_ADD runs first, xfail->test can be NULL and the expected failure will be reported as FAIL. Use constructor priorities to ensure TEST_F registration runs before XFAIL_ADD, without adding extra state or runtime lookups. Fixes: 2709473c9386 ("selftests: kselftest_harness: support using xfail") Signed-off-by: Sun Jian Link: https://patch.msgid.link/20260225111451.347923-1-sun.jian.kdev@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/kselftest_harness.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 16a119a4656c..4afaef01c22e 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -76,6 +76,9 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) memset(s, c, n); } +#define KSELFTEST_PRIO_TEST_F 20000 +#define KSELFTEST_PRIO_XFAIL 20001 + #define TEST_TIMEOUT_DEFAULT 30 /* Utilities exposed to the test definitions */ @@ -465,7 +468,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) fixture_name##_teardown(_metadata, self, variant); \ } \ static struct __test_metadata *_##fixture_name##_##test_name##_object; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \ _register_##fixture_name##_##test_name(void) \ { \ struct __test_metadata *object = mmap(NULL, sizeof(*object), \ @@ -880,7 +883,7 @@ struct __test_xfail { .fixture = &_##fixture_name##_fixture_object, \ .variant = &_##fixture_name##_##variant_name##_object, \ }; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_XFAIL))) \ _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \ { \ _##fixture_name##_##variant_name##_##test_name##_xfail.test = \ From c952291593e54415a7bb74c4a7187a7c7c7e8651 Mon Sep 17 00:00:00 2001 From: Sun Jian Date: Wed, 25 Feb 2026 19:14:51 +0800 Subject: [PATCH 772/828] selftests: net: tun: don't abort XFAIL cases The tun UDP tunnel GSO fixture contains XFAIL-marked variants intended to exercise failure paths (e.g. EMSGSIZE / "Message too long"). Using ASSERT_EQ() in these tests aborts the subtest, which prevents the harness from classifying them as XFAIL and can make the overall net: tun test fail. Switch the relevant ASSERT_EQ() checks to EXPECT_EQ() so the subtests continue running and the failures are correctly reported and accounted as XFAIL where applicable. Signed-off-by: Sun Jian Link: https://patch.msgid.link/20260225111451.347923-2-sun.jian.kdev@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tun.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c index 8a5cd5cb5472..cf106a49b55e 100644 --- a/tools/testing/selftests/net/tun.c +++ b/tools/testing/selftests/net/tun.c @@ -944,8 +944,8 @@ TEST_F(tun_vnet_udptnl, send_gso_packet) ASSERT_EQ(ret, off); ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss); - ASSERT_EQ(ret, variant->data_size); - ASSERT_EQ(r_num_mss, variant->r_num_mss); + EXPECT_EQ(ret, variant->data_size); + EXPECT_EQ(r_num_mss, variant->r_num_mss); } TEST_F(tun_vnet_udptnl, recv_gso_packet) @@ -955,18 +955,18 @@ TEST_F(tun_vnet_udptnl, recv_gso_packet) int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; ret = send_gso_packet_into_tunnel(self, variant); - ASSERT_EQ(ret, variant->data_size); + EXPECT_EQ(ret, variant->data_size); memset(&vnet_hdr, 0, sizeof(vnet_hdr)); ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr); - ASSERT_EQ(ret, variant->data_size); + EXPECT_EQ(ret, variant->data_size); if (!variant->no_gso) { - ASSERT_EQ(vh->gso_size, variant->gso_size); + EXPECT_EQ(vh->gso_size, variant->gso_size); gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) : (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6); - ASSERT_EQ(vh->gso_type, gso_type); + EXPECT_EQ(vh->gso_type, gso_type); } } From f9b5bf12eb65e1e708a883a1e24712e81ed2f340 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:01 -0800 Subject: [PATCH 773/828] MAINTAINERS: remove Johan Hedberg from Bluetooth subsystem We have not seen emails or tags from Johan in over 5 years, and there is no recent mailing list activity. Marcel Holtmann hasn't provided any tags in the Bluetooth subsystem in over 5 years, but he is active on the Bluetooth mailing list, providing informal review. Luiz Augusto von Dentz is very active, handling essentially all commits and reviews (12% coverage, but Luiz is the sole active committer). Subsystem BLUETOOTH SUBSYSTEM Changes 50 / 411 (12%) Last activity: 2026-02-23 Marcel Holtmann : Johan Hedberg : Luiz Augusto von Dentz : Author 138d7eca445e 2026-02-23 00:00:00 164 Committer 138d7eca445e 2026-02-23 00:00:00 361 Tags 138d7eca445e 2026-02-23 00:00:00 362 Top reviewers: [15]: pmenzel@molgen.mpg.de [8]: keescook@chromium.org [5]: willemb@google.com [4]: horms@kernel.org [3]: kuniyu@amazon.com [3]: luiz.von.dentz@intel.com INACTIVE MAINTAINER Johan Hedberg Acked-by: Marcel Holtmann Link: https://patch.msgid.link/20260303215339.2333548-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bee85351120e..4eb755b0a0d1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4615,7 +4615,6 @@ F: drivers/bluetooth/ BLUETOOTH SUBSYSTEM M: Marcel Holtmann -M: Johan Hedberg M: Luiz Augusto von Dentz L: linux-bluetooth@vger.kernel.org S: Supported From 80f8a19fe7cf5b1b5d541c7aaa37aa9d7653bda5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:02 -0800 Subject: [PATCH 774/828] MAINTAINERS: remove Manish Chopra from QLogic QL4xxx (now orphan) We have not seen tags from Manish for the QL4xxx driver in over 5 years, and there is no mailing list activity since Oct 2023. There has been no maintainer activity in this subsystem at all. Since there is no other maintainer for this driver it becomes an Orphan. Subsystem QLOGIC QL4xxx ETHERNET DRIVER Changes 40 / 74 (54%) (No activity) Top reviewers: [30]: horms@kernel.org [2]: jiri@nvidia.com [2]: shannon.nelson@amd.com [1]: saeedm@nvidia.com [1]: aleksandr.loktionov@intel.com [1]: kory.maincent@bootlin.com INACTIVE MAINTAINER Manish Chopra Link: https://patch.msgid.link/20260303215339.2333548-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4eb755b0a0d1..a83af9a89141 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21451,9 +21451,8 @@ S: Supported F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER -M: Manish Chopra L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/ethernet/qlogic/qed/ F: drivers/net/ethernet/qlogic/qede/ F: include/linux/qed/ From e07f796a86b2f0fda976268a08c321579022fcbc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:03 -0800 Subject: [PATCH 775/828] MAINTAINERS: remove Jerin Jacob from Marvell OcteonTX2 We have not seen tags from Jerin for OcteonTX2 in over 5 years. Recent lore activity is in DPDK (non-kernel), not Linux. Sunil, Linu, Geetha, hariprasad, and Subbaraya are active, though the review coverage isn't great (38%). Subsystem MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER Changes 53 / 138 (38%) Last activity: 2026-02-18 Sunil Goutham : Author fc1b2901e0fe 2024-03-08 00:00:00 1 Tags 70f8986ecef1 2025-06-16 00:00:00 9 Linu Cherian : Author a861e5809f3e 2025-10-30 00:00:00 7 Tags a861e5809f3e 2025-10-30 00:00:00 7 Geetha sowjanya : Author 70e9a5760abf 2026-01-29 00:00:00 16 Tags 70e9a5760abf 2026-01-29 00:00:00 20 Jerin Jacob : hariprasad : Author 45be47bf5d7d 2026-02-18 00:00:00 22 Tags 45be47bf5d7d 2026-02-18 00:00:00 25 Subbaraya Sundeep : Author 47a1208776d7 2025-10-30 00:00:00 20 Tags 47a1208776d7 2025-10-30 00:00:00 30 Top reviewers: [36]: horms@kernel.org [4]: jacob.e.keller@intel.com [4]: kalesh-anakkur.purayil@broadcom.com [3]: vadim.fedorenko@linux.dev [2]: shaojijie@huawei.com [2]: jiri@nvidia.com INACTIVE MAINTAINER Jerin Jacob Link: https://patch.msgid.link/20260303215339.2333548-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a83af9a89141..de0f5a38b525 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15469,7 +15469,6 @@ MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER M: Sunil Goutham M: Linu Cherian M: Geetha sowjanya -M: Jerin Jacob M: hariprasad M: Subbaraya Sundeep L: netdev@vger.kernel.org From 77f72ef58fb849cbe39fd69a20f9ae5076cd442b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:05 -0800 Subject: [PATCH 776/828] MAINTAINERS: remove Sean Wang from MediaTek Ethernet and switch We have not seen tags from Sean in over 5 years, with only one mailing list post since 2024. Felix and Lorenzo are active for the Ethernet driver, and Chester, Daniel and DENG Qingfang are active for the switch driver. Subsystem MEDIATEK ETHERNET DRIVER Changes 55 / 113 (48%) Last activity: 2025-10-12 Felix Fietkau : Author d4736737110f 2025-09-02 00:00:00 3 Tags d4736737110f 2025-09-02 00:00:00 4 Sean Wang : Lorenzo Bianconi : Author 96326447d466 2025-08-13 00:00:00 35 Tags 3abc0e55ea1f 2025-10-12 00:00:00 40 Top reviewers: [26]: horms@kernel.org [5]: andrew@lunn.ch [4]: jacob.e.keller@intel.com [3]: shannon.nelson@amd.com [3]: michal.swiatkowski@linux.intel.com INACTIVE MAINTAINER Sean Wang Subsystem MEDIATEK SWITCH DRIVER Changes 26 / 70 (37%) Last activity: 2025-12-01 Chester A. Unal : Tags 585943b7ad30 2025-12-01 00:00:00 7 Daniel Golle : Author 497041d76301 2025-04-23 00:00:00 2 Tags 3b87e60d2131 2025-12-01 00:00:00 14 DENG Qingfang : Sean Wang : Top reviewers: [4]: andrew@lunn.ch [4]: florian.fainelli@broadcom.com [4]: arinc.unal@arinc9.com [2]: olteanv@gmail.com INACTIVE MAINTAINER Sean Wang Acked-by: Chester A. Unal Link: https://patch.msgid.link/20260303215339.2333548-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index de0f5a38b525..706fb325bc67 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16157,7 +16157,6 @@ F: drivers/dma/mediatek/ MEDIATEK ETHERNET DRIVER M: Felix Fietkau -M: Sean Wang M: Lorenzo Bianconi L: netdev@vger.kernel.org S: Maintained @@ -16351,7 +16350,6 @@ MEDIATEK SWITCH DRIVER M: Chester A. Unal M: Daniel Golle M: DENG Qingfang -M: Sean Wang L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530-mdio.c From 593cdf14523bfb837b947012cdc6545a53ea67b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:06 -0800 Subject: [PATCH 777/828] MAINTAINERS: remove DENG Qingfang from MediaTek switch We have not seen tags from DENG Qingfang for the MediaTek switch driver in over 5 years. He is active upstream with PPP/PPPoE patches in net-next. Chester and Daniel are active. Subsystem MEDIATEK SWITCH DRIVER Changes 26 / 70 (37%) Last activity: 2025-12-01 Chester A. Unal : Tags 585943b7ad30 2025-12-01 00:00:00 7 Daniel Golle : Author 497041d76301 2025-04-23 00:00:00 2 Tags 3b87e60d2131 2025-12-01 00:00:00 14 DENG Qingfang : Sean Wang : Top reviewers: [4]: andrew@lunn.ch [4]: florian.fainelli@broadcom.com [4]: arinc.unal@arinc9.com [2]: olteanv@gmail.com INACTIVE MAINTAINER DENG Qingfang Acked-by: Chester A. Unal Link: https://patch.msgid.link/20260303215339.2333548-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 706fb325bc67..49bf6c8da74c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16349,7 +16349,6 @@ F: include/soc/mediatek/smi.h MEDIATEK SWITCH DRIVER M: Chester A. Unal M: Daniel Golle -M: DENG Qingfang L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530-mdio.c From 34f49454b25cd2e532f137174ef59b778f1a09f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:07 -0800 Subject: [PATCH 778/828] MAINTAINERS: replace Clark Wang with Frank Li for Freescale FEC We have not seen tags from Clark for FEC in over 5 years. He has some limited recent activity on the mailing list in other NXP subsystems (stmmac, phy). Wei Fang and Shenwei Wang are active, with decent review coverage (61%). Frank Li has been reviewing code actively more recenty, let's make it official. Subsystem FREESCALE IMX / MXC FEC DRIVER Changes 57 / 92 (61%) Last activity: 2026-02-10 Wei Fang : Author 25eb3058eb70 2026-02-10 00:00:00 33 Tags 25eb3058eb70 2026-02-10 00:00:00 61 Shenwei Wang : Author d466c16026e9 2025-09-14 00:00:00 6 Tags d466c16026e9 2025-09-14 00:00:00 6 Clark Wang : Top reviewers: [23]: Frank.Li@nxp.com [17]: andrew@lunn.ch [4]: csokas.bence@prolan.hu [3]: horms@kernel.org [2]: maxime.chevallier@bootlin.com INACTIVE MAINTAINER Clark Wang Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260303215339.2333548-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 49bf6c8da74c..b21c87332d14 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10167,8 +10167,8 @@ F: drivers/i2c/busses/i2c-cpm.c FREESCALE IMX / MXC FEC DRIVER M: Wei Fang +R: Frank Li R: Shenwei Wang -R: Clark Wang L: imx@lists.linux.dev L: netdev@vger.kernel.org S: Maintained From 60da2d2752ac4513fc3a91a0558dfd8176d2313f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:08 -0800 Subject: [PATCH 779/828] MAINTAINERS: remove Jonathan Lemon from OpenCompute PTP We have not seen emails or tags from Jonathan in over 5 years, and there is no recent mailing list activity. Vadim Fedorenko is active. Subsystem OPENCOMPUTE PTP CLOCK DRIVER Changes 49 / 130 (37%) Last activity: 2025-11-25 Jonathan Lemon : Vadim Fedorenko : Author d3ca2ef0c915 2025-09-19 00:00:00 5 Tags 648282e2d1e5 2025-11-25 00:00:00 20 Top reviewers: [7]: horms@kernel.org [4]: jiri@nvidia.com [3]: richardcochran@gmail.com [2]: aleksandr.loktionov@intel.com INACTIVE MAINTAINER Jonathan Lemon Add Jonathan to CREDITS as the initial author of ptp_ocp. Acked-by: Vadim Fedorenko Link: https://patch.msgid.link/20260303215339.2333548-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index d74c8b2b7ed3..fb9b4dc193a0 100644 --- a/CREDITS +++ b/CREDITS @@ -2415,6 +2415,10 @@ S: Am Muehlenweg 38 S: D53424 Remagen S: Germany +N: Jonathan Lemon +E: jonathan.lemon@gmail.com +D: OpenCompute PTP clock driver (ptp_ocp) + N: Colin Leroy E: colin@colino.net W: http://www.geekounet.org/ diff --git a/MAINTAINERS b/MAINTAINERS index b21c87332d14..d63fe814b750 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19803,7 +19803,6 @@ F: arch/*/boot/dts/ F: include/dt-bindings/ OPENCOMPUTE PTP CLOCK DRIVER -M: Jonathan Lemon M: Vadim Fedorenko L: netdev@vger.kernel.org S: Maintained From 9ede3e910f088fb3ffec0ed821f0b03951000d23 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:09 -0800 Subject: [PATCH 780/828] MAINTAINERS: replace Taras Chornyi with Elad Nachman for Marvell Prestera We have not seen emails or tags from Taras in over 5 years, and there is no recent mailing list activity. Elad Nachman has been providing reviews in the last couple of years and is the top reviewer for this subsystem. Subsystem MARVELL PRESTERA ETHERNET SWITCH DRIVER Changes 39 / 157 (24%) (No activity) Top reviewers: [8]: enachman@marvell.com [6]: horms@kernel.org [4]: idosch@nvidia.com [3]: andrew@lunn.ch [3]: jacob.e.keller@intel.com [3]: jiri@nvidia.com INACTIVE MAINTAINER Taras Chornyi Link: https://patch.msgid.link/20260303215339.2333548-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d63fe814b750..497822aac6aa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15483,7 +15483,7 @@ S: Supported F: drivers/perf/marvell_pem_pmu.c MARVELL PRESTERA ETHERNET SWITCH DRIVER -M: Taras Chornyi +M: Elad Nachman S: Supported W: https://github.com/Marvell-switching/switchdev-prestera F: drivers/net/ethernet/marvell/prestera/ From 4d37e68c46d15beb4af782b19d7f1b382316776c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:10 -0800 Subject: [PATCH 781/828] MAINTAINERS: remove Claudiu Manoil and Alexandre Belloni from Ocelot switch We have not seen tags from Claudiu for the Ocelot switch driver in over 5 years. He is active upstream in other NXP subsystems (ENETC, gianfar), with 46 emails on lore since 2024. We have not seen tags from Alexandre for the Ocelot switch driver in over 5 years. He is very active upstream in other subsystems (RTC, I3C, Atmel/Microchip SoC), with over 1,200 emails on lore since 2024. Vladimir Oltean is active. Subsystem OCELOT ETHERNET SWITCH DRIVER Changes 180 / 494 (36%) Last activity: 2026-02-12 Vladimir Oltean : Author c22ba07c827f 2026-02-10 00:00:00 33 Tags 026f6513c588 2026-02-12 00:00:00 39 Claudiu Manoil : Alexandre Belloni : Top reviewers: [49]: f.fainelli@gmail.com [19]: horms@kernel.org [10]: richardcochran@gmail.com [9]: jacob.e.keller@intel.com [8]: colin.foster@in-advantage.com INACTIVE MAINTAINER Claudiu Manoil Acked-by: Claudiu Manoil Acked-by: Alexandre Belloni Link: https://patch.msgid.link/20260303215339.2333548-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 497822aac6aa..d050d756beec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19216,8 +19216,6 @@ F: tools/objtool/ OCELOT ETHERNET SWITCH DRIVER M: Vladimir Oltean -M: Claudiu Manoil -M: Alexandre Belloni M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Supported From ed6579002548f3d9c00380e2aeaa8305d3ed8fc5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:53:11 -0800 Subject: [PATCH 782/828] MAINTAINERS: remove Thomas Falcon from IBM ibmvnic We have not seen emails or tags from Thomas's IBM address (tlfalcon@linux.ibm.com) in over 5 years. Looks like Thomas is active in perf tooling at Intel (thomas.falcon@intel.com). Subsystem IBM Power SRIOV Virtual NIC Device Driver Changes 49 / 134 (36%) Last activity: 2025-08-26 Haren Myneni : Tags 3c14917953a5 2025-08-26 00:00:00 2 Rick Lindsley : Nick Child : Author d93a6caab5d7 2025-03-25 00:00:00 14 Tags d93a6caab5d7 2025-03-25 00:00:00 16 Thomas Falcon : Top reviewers: [22]: drt@linux.ibm.com [13]: horms@kernel.org [9]: ricklind@linux.vnet.ibm.com [3]: davemarq@linux.ibm.com INACTIVE MAINTAINER Thomas Falcon Move Thomas to CREDITS as the initial author of ibmvnic. Acked-by: Thomas Falcon Link: https://patch.msgid.link/20260303215339.2333548-12-kuba@kernel.org Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index fb9b4dc193a0..9091bac3d2da 100644 --- a/CREDITS +++ b/CREDITS @@ -1242,6 +1242,10 @@ N: Veaceslav Falico E: vfalico@gmail.com D: Co-maintainer and co-author of the network bonding driver. +N: Thomas Falcon +E: tlfalcon@linux.ibm.com +D: Initial author of the IBM ibmvnic network driver + N: János Farkas E: chexum@shadow.banki.hu D: romfs, various (mostly networking) fixes diff --git a/MAINTAINERS b/MAINTAINERS index d050d756beec..3fcb17815e00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12212,7 +12212,6 @@ IBM Power SRIOV Virtual NIC Device Driver M: Haren Myneni M: Rick Lindsley R: Nick Child -R: Thomas Falcon L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ibm/ibmvnic.* From b92b0075ee1870f78f59ab1f7da7dbfdd718ad7a Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 5 Mar 2026 14:53:12 +0000 Subject: [PATCH 783/828] ata: libata-core: Add BRIDGE_OK quirk for QEMU drives Currently, whenever you boot with a QEMU drive over an AHCI interface, you get: [ 1.632121] ata1.00: applying bridge limits This happens due to the kernel not believing the given drive is SATA, since word 93 of IDENTIFY (ATA_ID_HW_CONFIG) is non-zero. The result is a pretty severe limit in max_hw_sectors_kb, which limits our IO sizes. QEMU has set word 93 erroneously for SATA drives but does not, in any way, emulate any of these real hardware details. There is no PATA drive and no SATA cable. As such, add a BRIDGE_OK quirk for QEMU HARDDISK. Special care is taken to limit this quirk to "2.5+", to allow for fixed future versions. This results in the max_hw_sectors being limited solely by the controller interface's limits. Which, for AHCI controllers, takes it from 128KB to 32767KB. Cc: stable@vger.kernel.org Signed-off-by: Pedro Falcato Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 76b012f544ea..6c4e567b6582 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4232,6 +4232,7 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { /* Devices that do not need bridging limits applied */ { "MTRON MSP-SATA*", NULL, ATA_QUIRK_BRIDGE_OK }, { "BUFFALO HD-QSU2/R5", NULL, ATA_QUIRK_BRIDGE_OK }, + { "QEMU HARDDISK", "2.5+", ATA_QUIRK_BRIDGE_OK }, /* Devices which aren't very happy with higher link speeds */ { "WD My Book", NULL, ATA_QUIRK_1_5_GBPS }, From e5e890630533bdc15b26a34bb8e7ef539bdf1322 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 4 Mar 2026 13:03:56 +0100 Subject: [PATCH 784/828] net: bridge: fix nd_tbl NULL dereference when IPv6 is disabled When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never initialized because inet6_init() exits before ndisc_init() is called which initializes it. Then, if neigh_suppress is enabled and an ICMPv6 Neighbor Discovery packet reaches the bridge, br_do_suppress_nd() will dereference ipv6_stub->nd_tbl which is NULL, passing it to neigh_lookup(). This causes a kernel NULL pointer dereference. BUG: kernel NULL pointer dereference, address: 0000000000000268 Oops: 0000 [#1] PREEMPT SMP NOPTI [...] RIP: 0010:neigh_lookup+0x16/0xe0 [...] Call Trace: ? neigh_lookup+0x16/0xe0 br_do_suppress_nd+0x160/0x290 [bridge] br_handle_frame_finish+0x500/0x620 [bridge] br_handle_frame+0x353/0x440 [bridge] __netif_receive_skb_core.constprop.0+0x298/0x1110 __netif_receive_skb_one_core+0x3d/0xa0 process_backlog+0xa0/0x140 __napi_poll+0x2c/0x170 net_rx_action+0x2c4/0x3a0 handle_softirqs+0xd0/0x270 do_softirq+0x3f/0x60 Fix this by replacing IS_ENABLED(IPV6) call with ipv6_mod_enabled() in the callers. This is in essence disabling NS/NA suppression when IPv6 is disabled. Fixes: ed842faeb2bd ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports") Reported-by: Guruprasad C P Closes: https://lore.kernel.org/netdev/CAHXs0ORzd62QOG-Fttqa2Cx_A_VFp=utE2H2VTX5nqfgs7LDxQ@mail.gmail.com/ Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260304120357.9778-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/bridge/br_device.c | 2 +- net/bridge/br_input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ee01122f466f..f7502e62dd35 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -74,7 +74,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { br_do_proxy_suppress_arp(skb, br, vid, NULL); - } else if (IS_ENABLED(CONFIG_IPV6) && + } else if (ipv6_mod_enabled() && skb->protocol == htons(ETH_P_IPV6) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && pskb_may_pull(skb, sizeof(struct ipv6hdr) + diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1405f1061a54..2cbae0f9ae1f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -170,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb (skb->protocol == htons(ETH_P_ARP) || skb->protocol == htons(ETH_P_RARP))) { br_do_proxy_suppress_arp(skb, br, vid, p); - } else if (IS_ENABLED(CONFIG_IPV6) && + } else if (ipv6_mod_enabled() && skb->protocol == htons(ETH_P_IPV6) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && pskb_may_pull(skb, sizeof(struct ipv6hdr) + From 168ff39e4758897d2eee4756977d036d52884c7e Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 4 Mar 2026 13:03:57 +0100 Subject: [PATCH 785/828] net: vxlan: fix nd_tbl NULL dereference when IPv6 is disabled When booting with the 'ipv6.disable=1' parameter, the nd_tbl is never initialized because inet6_init() exits before ndisc_init() is called which initializes it. If an IPv6 packet is injected into the interface, route_shortcircuit() is called and a NULL pointer dereference happens on neigh_lookup(). BUG: kernel NULL pointer dereference, address: 0000000000000380 Oops: Oops: 0000 [#1] SMP NOPTI [...] RIP: 0010:neigh_lookup+0x20/0x270 [...] Call Trace: vxlan_xmit+0x638/0x1ef0 [vxlan] dev_hard_start_xmit+0x9e/0x2e0 __dev_queue_xmit+0xbee/0x14e0 packet_sendmsg+0x116f/0x1930 __sys_sendto+0x1f5/0x200 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x12f/0x1590 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix this by adding an early check on route_shortcircuit() when protocol is ETH_P_IPV6. Note that ipv6_mod_enabled() cannot be used here because VXLAN can be built-in even when IPv6 is built as a module. Fixes: e15a00aafa4b ("vxlan: add ipv6 route short circuit support") Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260304120357.9778-2-fmancera@suse.de Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 05558b6afecd..17c941aac32d 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2130,6 +2130,11 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) { struct ipv6hdr *pip6; + /* check if nd_tbl is not initiliazed due to + * ipv6.disable=1 set during boot + */ + if (!ipv6_stub->nd_tbl) + return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return false; pip6 = ipv6_hdr(skb); From 21ec92774d1536f71bdc90b0e3d052eff99cf093 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 4 Mar 2026 19:38:13 +0800 Subject: [PATCH 786/828] net: ipv6: fix panic when IPv4 route references loopback IPv6 nexthop When a standalone IPv6 nexthop object is created with a loopback device (e.g., "ip -6 nexthop add id 100 dev lo"), fib6_nh_init() misclassifies it as a reject route. This is because nexthop objects have no destination prefix (fc_dst=::), causing fib6_is_reject() to match any loopback nexthop. The reject path skips fib_nh_common_init(), leaving nhc_pcpu_rth_output unallocated. If an IPv4 route later references this nexthop, __mkroute_output() dereferences NULL nhc_pcpu_rth_output and panics. Simplify the check in fib6_nh_init() to only match explicit reject routes (RTF_REJECT) instead of using fib6_is_reject(). The loopback promotion heuristic in fib6_is_reject() is handled separately by ip6_route_info_create_nh(). After this change, the three cases behave as follows: 1. Explicit reject route ("ip -6 route add unreachable 2001:db8::/64"): RTF_REJECT is set, enters reject path, skips fib_nh_common_init(). No behavior change. 2. Implicit loopback reject route ("ip -6 route add 2001:db8::/32 dev lo"): RTF_REJECT is not set, takes normal path, fib_nh_common_init() is called. ip6_route_info_create_nh() still promotes it to reject afterward. nhc_pcpu_rth_output is allocated but unused, which is harmless. 3. Standalone nexthop object ("ip -6 nexthop add id 100 dev lo"): RTF_REJECT is not set, takes normal path, fib_nh_common_init() is called. nhc_pcpu_rth_output is properly allocated, fixing the crash when IPv4 routes reference this nexthop. Suggested-by: Ido Schimmel Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") Reported-by: syzbot+334190e097a98a1b81bb@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/698f8482.a70a0220.2c38d7.00ca.GAE@google.com/T/ Signed-off-by: Jiayuan Chen Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/20260304113817.294966-2-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7db0c837196c..08cd86f49bf9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3583,7 +3583,6 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; - int addr_type; int err; fib6_nh->fib_nh_family = AF_INET6; @@ -3625,11 +3624,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, fib6_nh->fib_nh_weight = 1; - /* We cannot add true routes via loopback here, - * they would result in kernel looping; promote them to reject routes + /* Reset the nexthop device to the loopback device in case of reject + * routes. */ - addr_type = ipv6_addr_type(&cfg->fc_dst); - if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { + if (cfg->fc_flags & RTF_REJECT) { /* hold loopback dev/idev if we haven't done so. */ if (dev != net->loopback_dev) { if (dev) { From 46c1ef0cfcea50aaf0b52316fdab94bf4b45795b Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 4 Mar 2026 19:38:14 +0800 Subject: [PATCH 787/828] selftests: net: add test for IPv4 route with loopback IPv6 nexthop Add a regression test for a kernel panic that occurs when an IPv4 route references an IPv6 nexthop object created on the loopback device. The test creates an IPv6 nexthop on lo, binds an IPv4 route to it, then triggers a route lookup via ping to verify the kernel does not crash. ./fib_nexthops.sh Tests passed: 249 Tests failed: 0 Reviewed-by: Ido Schimmel Signed-off-by: Jiayuan Chen Reviewed-by: David Ahern Link: https://patch.msgid.link/20260304113817.294966-3-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthops.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 21026b667667..6eb7f95e70e1 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1672,6 +1672,17 @@ ipv4_withv6_fcnal() run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" log_test $? 2 "IPv4 route with invalid IPv6 gateway" + + # Test IPv4 route with loopback IPv6 nexthop + # Regression test: loopback IPv6 nexthop was misclassified as reject + # route, skipping nhc_pcpu_rth_output allocation, causing panic when + # an IPv4 route references it and triggers __mkroute_output(). + run_cmd "$IP -6 nexthop add id 20 dev lo" + run_cmd "$IP ro add 172.20.20.0/24 nhid 20" + run_cmd "ip netns exec $me ping -c1 -W1 172.20.20.1" + log_test $? 1 "IPv4 route with loopback IPv6 nexthop (no crash)" + run_cmd "$IP ro del 172.20.20.0/24" + run_cmd "$IP nexthop del id 20" } ipv4_fcnal_runtime() From e2cedd400c3ec0302ffca2490e8751772906ac23 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Mar 2026 09:06:02 -0500 Subject: [PATCH 788/828] net/sched: act_ife: Fix metalist update behavior Whenever an ife action replace changes the metalist, instead of replacing the old data on the metalist, the current ife code is appending the new metadata. Aside from being innapropriate behavior, this may lead to an unbounded addition of metadata to the metalist which might cause an out of bounds error when running the encode op: [ 138.423369][ C1] ================================================================== [ 138.424317][ C1] BUG: KASAN: slab-out-of-bounds in ife_tlv_meta_encode (net/ife/ife.c:168) [ 138.424906][ C1] Write of size 4 at addr ffff8880077f4ffe by task ife_out_out_bou/255 [ 138.425778][ C1] CPU: 1 UID: 0 PID: 255 Comm: ife_out_out_bou Not tainted 7.0.0-rc1-00169-gfbdfa8da05b6 #624 PREEMPT(full) [ 138.425795][ C1] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 138.425800][ C1] Call Trace: [ 138.425804][ C1] [ 138.425808][ C1] dump_stack_lvl (lib/dump_stack.c:122) [ 138.425828][ C1] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482) [ 138.425839][ C1] ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221) [ 138.425844][ C1] ? __virt_addr_valid (./arch/x86/include/asm/preempt.h:95 (discriminator 1) ./include/linux/rcupdate.h:975 (discriminator 1) ./include/linux/mmzone.h:2207 (discriminator 1) arch/x86/mm/physaddr.c:54 (discriminator 1)) [ 138.425853][ C1] ? ife_tlv_meta_encode (net/ife/ife.c:168) [ 138.425859][ C1] kasan_report (mm/kasan/report.c:221 mm/kasan/report.c:597) [ 138.425868][ C1] ? ife_tlv_meta_encode (net/ife/ife.c:168) [ 138.425878][ C1] kasan_check_range (mm/kasan/generic.c:186 (discriminator 1) mm/kasan/generic.c:200 (discriminator 1)) [ 138.425884][ C1] __asan_memset (mm/kasan/shadow.c:84 (discriminator 2)) [ 138.425889][ C1] ife_tlv_meta_encode (net/ife/ife.c:168) [ 138.425893][ C1] ? ife_tlv_meta_encode (net/ife/ife.c:171) [ 138.425898][ C1] ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221) [ 138.425903][ C1] ife_encode_meta_u16 (net/sched/act_ife.c:57) [ 138.425910][ C1] ? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:114) [ 138.425916][ C1] ? __asan_memcpy (mm/kasan/shadow.c:105 (discriminator 3)) [ 138.425921][ C1] ? __pfx_ife_encode_meta_u16 (net/sched/act_ife.c:45) [ 138.425927][ C1] ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221) [ 138.425931][ C1] tcf_ife_act (net/sched/act_ife.c:847 net/sched/act_ife.c:879) To solve this issue, fix the replace behavior by adding the metalist to the ife rcu data structure. Fixes: aa9fd9a325d51 ("sched: act: ife: update parameters via rcu handling") Reported-by: Ruitong Liu Tested-by: Ruitong Liu Co-developed-by: Victor Nogueira Signed-off-by: Victor Nogueira Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260304140603.76500-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_ife.h | 4 +- net/sched/act_ife.c | 93 ++++++++++++++++++------------------- 2 files changed, 45 insertions(+), 52 deletions(-) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index c7f24a2da1ca..24d4d5a62b3c 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -13,15 +13,13 @@ struct tcf_ife_params { u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; - + struct list_head metalist; struct rcu_head rcu; }; struct tcf_ife_info { struct tc_action common; struct tcf_ife_params __rcu *params; - /* list of metaids allowed */ - struct list_head metalist; }; #define to_ife(a) ((struct tcf_ife_info *)a) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 79df81d12894..d5e8a91bb4eb 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -293,8 +293,8 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) /* called when adding new meta information */ static int __add_metainfo(const struct tcf_meta_ops *ops, - struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic, bool exists) + struct tcf_ife_params *p, u32 metaid, void *metaval, + int len, bool atomic) { struct tcf_meta_info *mi = NULL; int ret = 0; @@ -313,45 +313,40 @@ static int __add_metainfo(const struct tcf_meta_ops *ops, } } - if (exists) - spin_lock_bh(&ife->tcf_lock); - list_add_tail(&mi->metalist, &ife->metalist); - if (exists) - spin_unlock_bh(&ife->tcf_lock); + list_add_tail(&mi->metalist, &p->metalist); return ret; } static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, - struct tcf_ife_info *ife, u32 metaid, - bool exists) + struct tcf_ife_params *p, u32 metaid) { int ret; if (!try_module_get(ops->owner)) return -ENOENT; - ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); + ret = __add_metainfo(ops, p, metaid, NULL, 0, true); if (ret) module_put(ops->owner); return ret; } -static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool exists) +static int add_metainfo(struct tcf_ife_params *p, u32 metaid, void *metaval, + int len) { const struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret; if (!ops) return -ENOENT; - ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); + ret = __add_metainfo(ops, p, metaid, metaval, len, false); if (ret) /*put back what find_ife_oplist took */ module_put(ops->owner); return ret; } -static int use_all_metadata(struct tcf_ife_info *ife, bool exists) +static int use_all_metadata(struct tcf_ife_params *p) { struct tcf_meta_ops *o; int rc = 0; @@ -359,7 +354,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); + rc = add_metainfo_and_get_ops(o, p, o->metaid); if (rc == 0) installed += 1; } @@ -371,7 +366,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) return -EINVAL; } -static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) +static int dump_metalist(struct sk_buff *skb, struct tcf_ife_params *p) { struct tcf_meta_info *e; struct nlattr *nest; @@ -379,14 +374,14 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) int total_encoded = 0; /*can only happen on decode */ - if (list_empty(&ife->metalist)) + if (list_empty(&p->metalist)) return 0; nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); if (!nest) goto out_nlmsg_trim; - list_for_each_entry(e, &ife->metalist, metalist) { + list_for_each_entry(e, &p->metalist, metalist) { if (!e->ops->get(skb, e)) total_encoded += 1; } @@ -403,13 +398,11 @@ out_nlmsg_trim: return -1; } -/* under ife->tcf_lock */ -static void _tcf_ife_cleanup(struct tc_action *a) +static void __tcf_ife_cleanup(struct tcf_ife_params *p) { - struct tcf_ife_info *ife = to_ife(a); struct tcf_meta_info *e, *n; - list_for_each_entry_safe(e, n, &ife->metalist, metalist) { + list_for_each_entry_safe(e, n, &p->metalist, metalist) { list_del(&e->metalist); if (e->metaval) { if (e->ops->release) @@ -422,18 +415,23 @@ static void _tcf_ife_cleanup(struct tc_action *a) } } +static void tcf_ife_cleanup_params(struct rcu_head *head) +{ + struct tcf_ife_params *p = container_of(head, struct tcf_ife_params, + rcu); + + __tcf_ife_cleanup(p); + kfree(p); +} + static void tcf_ife_cleanup(struct tc_action *a) { struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; - spin_lock_bh(&ife->tcf_lock); - _tcf_ife_cleanup(a); - spin_unlock_bh(&ife->tcf_lock); - p = rcu_dereference_protected(ife->params, 1); if (p) - kfree_rcu(p, rcu); + call_rcu(&p->rcu, tcf_ife_cleanup_params); } static int load_metalist(struct nlattr **tb, bool rtnl_held) @@ -455,8 +453,7 @@ static int load_metalist(struct nlattr **tb, bool rtnl_held) return 0; } -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, - bool exists, bool rtnl_held) +static int populate_metalist(struct tcf_ife_params *p, struct nlattr **tb) { int len = 0; int rc = 0; @@ -468,7 +465,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = add_metainfo(ife, i, val, len, exists); + rc = add_metainfo(p, i, val, len); if (rc) return rc; } @@ -523,6 +520,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, p = kzalloc_obj(*p); if (!p) return -ENOMEM; + INIT_LIST_HEAD(&p->metalist); if (tb[TCA_IFE_METALST]) { err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, @@ -567,8 +565,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&ife->metalist); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) @@ -600,8 +596,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = populate_metalist(ife, tb2, exists, - !(flags & TCA_ACT_FLAGS_NO_RTNL)); + err = populate_metalist(p, tb2); if (err) goto metadata_parse_err; } else { @@ -610,7 +605,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife, exists); + err = use_all_metadata(p); if (err) goto metadata_parse_err; } @@ -626,13 +621,14 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); if (p) - kfree_rcu(p, rcu); + call_rcu(&p->rcu, tcf_ife_cleanup_params); return ret; metadata_parse_err: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: + __tcf_ife_cleanup(p); kfree(p); tcf_idr_release(*a, bind); return err; @@ -679,7 +675,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) goto nla_put_failure; - if (dump_metalist(skb, ife)) { + if (dump_metalist(skb, p)) { /*ignore failure to dump metalist */ pr_info("Failed to dump metalist\n"); } @@ -693,13 +689,13 @@ nla_put_failure: return -1; } -static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, +static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_params *p, u16 metaid, u16 mlen, void *mdata) { struct tcf_meta_info *e; /* XXX: use hash to speed up */ - list_for_each_entry(e, &ife->metalist, metalist) { + list_for_each_entry_rcu(e, &p->metalist, metalist) { if (metaid == e->metaid) { if (e->ops) { /* We check for decode presence already */ @@ -716,10 +712,13 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; + struct tcf_ife_params *p; u8 *ifehdr_end; u8 *tlv_data; u16 metalen; + p = rcu_dereference_bh(ife->params); + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); @@ -745,7 +744,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { + if (find_decode_metaid(skb, p, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ @@ -769,12 +768,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, /*XXX: check if we can do this at install time instead of current * send data path **/ -static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) +static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_params *p) { - struct tcf_meta_info *e, *n; + struct tcf_meta_info *e; int tot_run_sz = 0, run_sz = 0; - list_for_each_entry_safe(e, n, &ife->metalist, metalist) { + list_for_each_entry_rcu(e, &p->metalist, metalist) { if (e->ops->check_presence) { run_sz = e->ops->check_presence(skb, e); tot_run_sz += run_sz; @@ -795,7 +794,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA where ORIGDATA = original ethernet header ... */ - u16 metalen = ife_get_sz(skb, ife); + u16 metalen = ife_get_sz(skb, p); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; unsigned int skboff = 0; int new_len = skb->len + hdrm; @@ -833,25 +832,21 @@ drop: if (!ife_meta) goto drop; - spin_lock(&ife->tcf_lock); - /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by * ops->presence_check... */ - list_for_each_entry(e, &ife->metalist, metalist) { + list_for_each_entry_rcu(e, &p->metalist, metalist) { if (e->ops->encode) { err = e->ops->encode(skb, (void *)(ife_meta + skboff), e); } if (err < 0) { /* too corrupt to keep around if overwritten */ - spin_unlock(&ife->tcf_lock); goto drop; } skboff += err; } - spin_unlock(&ife->tcf_lock); oethh = (struct ethhdr *)skb->data; if (!is_zero_ether_addr(p->eth_src)) From 5d1271ff4c131cd4a601dabdfdcdc1fe1252493f Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 4 Mar 2026 09:06:03 -0500 Subject: [PATCH 789/828] selftests/tc-testing: Add tests exercising act_ife metalist replace behaviour Add 2 test cases to exercise fix in act_ife's internal metalist behaviour. - Update decode ife action into encode with tcindex metadata - Update decode ife action into encode with multiple metadata Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260304140603.76500-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/actions/ife.json | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index f587a32e44c4..808aef4afe22 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1279,5 +1279,104 @@ "teardown": [ "$TC actions flush action ife" ] + }, + { + "id": "f2a0", + "name": "Update decode ife action with encode metadata", + "category": [ + "actions", + "ife" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife decode index 10" + ], + "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 index 10", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action ife index 10", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "ife", + "mode": "encode", + "control_action": { + "type": "pipe" + }, + "type": "0xed3e", + "tcindex": 1, + "index": 10, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d352", + "name": "Update decode ife action into encode with multiple metadata", + "category": [ + "actions", + "ife" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife decode index 10" + ], + "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 use mark 22 index 10", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action ife index 10", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "ife", + "mode": "encode", + "control_action": { + "type": "pipe" + }, + "type": "0xed3e", + "tcindex": 1, + "mark": 22, + "index": 10, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC actions flush action ife" + ] } ] From 88b6b7f7b216108a09887b074395fa7b751880b1 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:42 +0100 Subject: [PATCH 790/828] xdp: use modulo operation to calculate XDP frag tailroom The current formula for calculating XDP tailroom in mbuf packets works only if each frag has its own page (if rxq->frag_size is PAGE_SIZE), this defeats the purpose of the parameter overall and without any indication leads to negative calculated tailroom on at least half of frags, if shared pages are used. There are not many drivers that set rxq->frag_size. Among them: * i40e and enetc always split page uniformly between frags, use shared pages * ice uses page_pool frags via libeth, those are power-of-2 and uniformly distributed across page * idpf has variable frag_size with XDP on, so current API is not applicable * mlx5, mtk and mvneta use PAGE_SIZE or 0 as frag_size for page_pool As for AF_XDP ZC, only ice, i40e and idpf declare frag_size for it. Modulo operation yields good results for aligned chunks, they are all power-of-2, between 2K and PAGE_SIZE. Formula without modulo fails when chunk_size is 2K. Buffers in unaligned mode are not distributed uniformly, so modulo operation would not work. To accommodate unaligned buffers, we could define frag_size as data + tailroom, and hence do not subtract offset when calculating tailroom, but this would necessitate more changes in the drivers. Define rxq->frag_size as an even portion of a page that fully belongs to a single frag. When calculating tailroom, locate the data start within such portion by performing a modulo operation on page offset. Fixes: bf25146a5595 ("bpf: add frags support to the bpf_xdp_adjust_tail() API") Acked-by: Jakub Kicinski Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-2-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 0d5d5a17acb2..d6fafb3633b0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4155,7 +4155,8 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) return -EOPNOTSUPP; - tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); + tailroom = rxq->frag_size - skb_frag_size(frag) - + skb_frag_off(frag) % rxq->frag_size; if (unlikely(offset > tailroom)) return -EINVAL; From 16394d80539937d348dd3b9ea32415c54e67a81b Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:43 +0100 Subject: [PATCH 791/828] xsk: introduce helper to determine rxq->frag_size rxq->frag_size is basically a step between consecutive strictly aligned frames. In ZC mode, chunk size fits exactly, but if chunks are unaligned, there is no safe way to determine accessible space to grow tailroom. Report frag_size to be zero, if chunks are unaligned, chunk_size otherwise. Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-3-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index aefc368449d5..6b9ebae2dc95 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { @@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return 0; } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { From 02852b47c706772af795d3e28fca99fc9b923b2c Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:44 +0100 Subject: [PATCH 792/828] ice: fix rxq info registering in mbuf packets XDP RxQ info contains frag_size, which depends on the MTU. This makes the old way of registering RxQ info before calculating new buffer sizes invalid. Currently, it leads to frag_size being outdated, making it sometimes impossible to grow tailroom in a mbuf packet. E.g. fragments are actually 3K+, but frag size is still as if MTU was 1500. Always register new XDP RxQ info after reconfiguring memory pools. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-4-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_base.c | 26 ++++++-------------- drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 + drivers/net/ethernet/intel/ice/ice_txrx.c | 4 ++- drivers/net/ethernet/intel/ice/ice_xsk.c | 3 +++ 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index d85b0a4baa37..165ef8afa8b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -666,23 +666,12 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF || ring->vsi->type == ICE_VSI_LB) { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - return err; - } - ice_rx_xsk_pool(ring); err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); if (err) return err; if (ring->xsk_pool) { - xdp_rxq_info_unreg(&ring->xdp_rxq); - rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, @@ -705,14 +694,13 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (err) return err; - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - goto err_destroy_fq; - } + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + goto err_destroy_fq; + xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, ring->pp); } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index b9be10b58856..301947d53ede 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3342,6 +3342,7 @@ process_rx: rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; rx_rings[i].xdp_buf = NULL; + rx_rings[i].xdp_rxq = (struct xdp_rxq_info){ }; /* this is to allow wr32 to have something to write to * during early allocation of Rx buffers diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index a5bbce68f76c..a2cd4cf37734 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -560,7 +560,9 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) i = 0; } - if (rx_ring->vsi->type == ICE_VSI_PF && + if ((rx_ring->vsi->type == ICE_VSI_PF || + rx_ring->vsi->type == ICE_VSI_SF || + rx_ring->vsi->type == ICE_VSI_LB) && xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); xdp_rxq_info_unreg(&rx_ring->xdp_rxq); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index c673094663a3..0643017541c3 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -899,6 +899,9 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) u16 ntc = rx_ring->next_to_clean; u16 ntu = rx_ring->next_to_use; + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + while (ntc != ntu) { struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc); From e142dc4ef0f451b7ef99d09aaa84e9389af629d7 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:45 +0100 Subject: [PATCH 793/828] ice: change XDP RxQ frag_size from DMA write length to xdp.frame_sz The only user of frag_size field in XDP RxQ info is bpf_xdp_frags_increase_tail(). It clearly expects whole buff size instead of DMA write size. Different assumptions in ice driver configuration lead to negative tailroom. This allows to trigger kernel panic, when using XDP_ADJUST_TAIL_GROW_MULTI_BUFF xskxceiver test and changing packet size to 6912 and the requested offset to a huge value, e.g. XSK_UMEM__MAX_FRAME_SIZE * 100. Due to other quirks of the ZC configuration in ice, panic is not observed in ZC mode, but tailroom growing still fails when it should not. Use fill queue buffer truesize instead of DMA write size in XDP RxQ info. Fix ZC mode too by using the new helper. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-5-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_base.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 165ef8afa8b0..1667f686ff75 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -661,7 +661,6 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); u32 num_bufs = ICE_DESC_UNUSED(ring); - u32 rx_buf_len; int err; if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF || @@ -672,12 +671,12 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return err; if (ring->xsk_pool) { - rx_buf_len = - xsk_pool_get_rx_frame_size(ring->xsk_pool); + u32 frag_size = + xsk_pool_get_rx_frag_step(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - rx_buf_len); + frag_size); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -697,7 +696,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + ring->truesize); if (err) goto err_destroy_fq; From 8f497dc8a61429cc004720aa8e713743355d80cf Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:46 +0100 Subject: [PATCH 794/828] i40e: fix registering XDP RxQ info Current way of handling XDP RxQ info in i40e has a problem, where frag_size is not updated when xsk_buff_pool is detached or when MTU is changed, this leads to growing tail always failing for multi-buffer packets. Couple XDP RxQ info registering with buffer allocations and unregistering with cleaning the ring. Fixes: a045d2f2d03d ("i40e: set xdp_rxq_info::frag_size") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-6-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 34 ++++++++++++--------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 5 +-- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7b9e147d7365..781ec5aa814b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3583,18 +3583,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (ring->vsi->type != I40E_VSI_MAIN) goto skip; - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->queue_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - return err; - } - ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { - xdp_rxq_info_unreg(&ring->xdp_rxq); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->queue_index, @@ -3606,17 +3596,23 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) MEM_TYPE_XSK_BUFF_POOL, NULL); if (err) - return err; + goto unreg_xdp; dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->queue_index); } else { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); if (err) - return err; + goto unreg_xdp; } skip: @@ -3654,7 +3650,8 @@ skip: dev_info(&vsi->back->pdev->dev, "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); - return -ENOMEM; + err = -ENOMEM; + goto unreg_xdp; } /* set the context in the HMC */ @@ -3663,7 +3660,8 @@ skip: dev_info(&vsi->back->pdev->dev, "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); - return -ENOMEM; + err = -ENOMEM; + goto unreg_xdp; } /* configure Rx buffer alignment */ @@ -3671,7 +3669,8 @@ skip: if (I40E_2K_TOO_SMALL_WITH_PADDING) { dev_info(&vsi->back->pdev->dev, "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto unreg_xdp; } clear_ring_build_skb_enabled(ring); } else { @@ -3701,6 +3700,11 @@ skip: } return 0; +unreg_xdp: + if (ring->vsi->type == I40E_VSI_MAIN) + xdp_rxq_info_unreg(&ring->xdp_rxq); + + return err; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 34db7d8866b0..894f2d06d39d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1470,6 +1470,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + if (rx_ring->xsk_pool) { i40e_xsk_clean_rx_ring(rx_ring); goto skip_free; @@ -1527,8 +1530,6 @@ skip_free: void i40e_free_rx_resources(struct i40e_ring *rx_ring) { i40e_clean_rx_ring(rx_ring); - if (rx_ring->vsi->type == I40E_VSI_MAIN) - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); rx_ring->xdp_prog = NULL; kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL; From c69d22c6c46a1d792ba8af3d8d6356fdc0e6f538 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:47 +0100 Subject: [PATCH 795/828] i40e: use xdp.frame_sz as XDP RxQ info frag_size The only user of frag_size field in XDP RxQ info is bpf_xdp_frags_increase_tail(). It clearly expects whole buffer size instead of DMA write size. Different assumptions in i40e driver configuration lead to negative tailroom. Set frag_size to the same value as frame_sz in shared pages mode, use new helper to set frag_size when AF_XDP ZC is active. Fixes: a045d2f2d03d ("i40e: set xdp_rxq_info::frag_size") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-7-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 781ec5aa814b..926d001b2150 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3569,6 +3569,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; + u32 xdp_frame_sz; int err = 0; bool ok; @@ -3578,6 +3579,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) memset(&rx_ctx, 0, sizeof(rx_ctx)); ring->rx_buf_len = vsi->rx_buf_len; + xdp_frame_sz = i40e_rx_pg_size(ring) / 2; /* XDP RX-queue info only needed for RX rings exposed to XDP */ if (ring->vsi->type != I40E_VSI_MAIN) @@ -3585,11 +3587,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { + xdp_frame_sz = xsk_pool_get_rx_frag_step(ring->xsk_pool); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->queue_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + xdp_frame_sz); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -3605,7 +3608,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->queue_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + xdp_frame_sz); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -3616,7 +3619,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } skip: - xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, xdp_frame_sz, &ring->xdp_rxq); rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); From 75d9228982f23d68066ca0b7d87014c3eb8ddc85 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:48 +0100 Subject: [PATCH 796/828] libeth, idpf: use truesize as XDP RxQ info frag_size The only user of frag_size field in XDP RxQ info is bpf_xdp_frags_increase_tail(). It clearly expects whole buffer size instead of DMA write size. Different assumptions in idpf driver configuration lead to negative tailroom. To make it worse, buffer sizes are not actually uniform in idpf when splitq is enabled, as there are several buffer queues, so rxq->rx_buf_size is meaningless in this case. Use truesize of the first bufq in AF_XDP ZC, as there is only one. Disable growing tail for regular splitq. Fixes: ac8a861f632e ("idpf: prepare structures to support XDP") Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-8-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/xdp.c | 6 +++++- drivers/net/ethernet/intel/idpf/xsk.c | 1 + drivers/net/ethernet/intel/libeth/xsk.c | 1 + include/net/libeth/xsk.h | 3 +++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c index 6ac9c6624c2a..cbccd4546768 100644 --- a/drivers/net/ethernet/intel/idpf/xdp.c +++ b/drivers/net/ethernet/intel/idpf/xdp.c @@ -47,12 +47,16 @@ static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg) { const struct idpf_vport *vport = rxq->q_vector->vport; const struct idpf_q_vec_rsrc *rsrc; + u32 frag_size = 0; bool split; int err; + if (idpf_queue_has(XSK, rxq)) + frag_size = rxq->bufq_sets[0].bufq.truesize; + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx, rxq->q_vector->napi.napi_id, - rxq->rx_buf_size); + frag_size); if (err) return err; diff --git a/drivers/net/ethernet/intel/idpf/xsk.c b/drivers/net/ethernet/intel/idpf/xsk.c index 676cbd80774d..d95d3efdfd36 100644 --- a/drivers/net/ethernet/intel/idpf/xsk.c +++ b/drivers/net/ethernet/intel/idpf/xsk.c @@ -403,6 +403,7 @@ int idpf_xskfq_init(struct idpf_buf_queue *bufq) bufq->pending = fq.pending; bufq->thresh = fq.thresh; bufq->rx_buf_size = fq.buf_len; + bufq->truesize = fq.truesize; if (!idpf_xskfq_refill(bufq)) netdev_err(bufq->pool->netdev, diff --git a/drivers/net/ethernet/intel/libeth/xsk.c b/drivers/net/ethernet/intel/libeth/xsk.c index 846e902e31b6..4882951d5c9c 100644 --- a/drivers/net/ethernet/intel/libeth/xsk.c +++ b/drivers/net/ethernet/intel/libeth/xsk.c @@ -167,6 +167,7 @@ int libeth_xskfq_create(struct libeth_xskfq *fq) fq->pending = fq->count; fq->thresh = libeth_xdp_queue_threshold(fq->count); fq->buf_len = xsk_pool_get_rx_frame_size(fq->pool); + fq->truesize = xsk_pool_get_rx_frag_step(fq->pool); return 0; } diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 481a7b28e6f2..82b5d21aae87 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @pending: current number of XSkFQEs to refill * @thresh: threshold below which the queue is refilled * @buf_len: HW-writeable length per each buffer + * @truesize: step between consecutive buffers, 0 if none exists * @nid: ID of the closest NUMA node with memory */ struct libeth_xskfq { @@ -614,6 +615,8 @@ struct libeth_xskfq { u32 thresh; u32 buf_len; + u32 truesize; + int nid; }; From f8e18abf183dbd636a8725532c7f5aa58957de84 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:49 +0100 Subject: [PATCH 797/828] net: enetc: use truesize as XDP RxQ info frag_size The only user of frag_size field in XDP RxQ info is bpf_xdp_frags_increase_tail(). It clearly expects truesize instead of DMA write size. Different assumptions in enetc driver configuration lead to negative tailroom. Set frag_size to the same value as frame_sz. Fixes: 2768b2e2f7d2 ("net: enetc: register XDP RX queues with frag_size") Reviewed-by: Aleksandr Loktionov Reviewed-by: Vladimir Oltean Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-9-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 70768392912c..a146ceaf2ed6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -3467,7 +3467,7 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i, priv->rx_ring[i] = bdr; err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, - ENETC_RXB_DMA_SIZE_XDP); + ENETC_RXB_TRUESIZE); if (err) goto free_vector; From 8821e857759be9db3cde337ad328b71fe5c8a55f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 5 Mar 2026 12:12:50 +0100 Subject: [PATCH 798/828] xdp: produce a warning when calculated tailroom is negative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many ethernet drivers report xdp Rx queue frag size as being the same as DMA write size. However, the only user of this field, namely bpf_xdp_frags_increase_tail(), clearly expects a truesize. Such difference leads to unspecific memory corruption issues under certain circumstances, e.g. in ixgbevf maximum DMA write size is 3 KB, so when running xskxceiver's XDP_ADJUST_TAIL_GROW_MULTI_BUFF, 6K packet fully uses all DMA-writable space in 2 buffers. This would be fine, if only rxq->frag_size was properly set to 4K, but value of 3K results in a negative tailroom, because there is a non-zero page offset. We are supposed to return -EINVAL and be done with it in such case, but due to tailroom being stored as an unsigned int, it is reported to be somewhere near UINT_MAX, resulting in a tail being grown, even if the requested offset is too much (it is around 2K in the abovementioned test). This later leads to all kinds of unspecific calltraces. [ 7340.337579] xskxceiver[1440]: segfault at 1da718 ip 00007f4161aeac9d sp 00007f41615a6a00 error 6 [ 7340.338040] xskxceiver[1441]: segfault at 7f410000000b ip 00000000004042b5 sp 00007f415bffecf0 error 4 [ 7340.338179] in libc.so.6[61c9d,7f4161aaf000+160000] [ 7340.339230] in xskxceiver[42b5,400000+69000] [ 7340.340300] likely on CPU 6 (core 0, socket 6) [ 7340.340302] Code: ff ff 01 e9 f4 fe ff ff 0f 1f 44 00 00 4c 39 f0 74 73 31 c0 ba 01 00 00 00 f0 0f b1 17 0f 85 ba 00 00 00 49 8b 87 88 00 00 00 <4c> 89 70 08 eb cc 0f 1f 44 00 00 48 8d bd f0 fe ff ff 89 85 ec fe [ 7340.340888] likely on CPU 3 (core 0, socket 3) [ 7340.345088] Code: 00 00 00 ba 00 00 00 00 be 00 00 00 00 89 c7 e8 31 ca ff ff 89 45 ec 8b 45 ec 85 c0 78 07 b8 00 00 00 00 eb 46 e8 0b c8 ff ff <8b> 00 83 f8 69 74 24 e8 ff c7 ff ff 8b 00 83 f8 0b 74 18 e8 f3 c7 [ 7340.404334] Oops: general protection fault, probably for non-canonical address 0x6d255010bdffc: 0000 [#1] SMP NOPTI [ 7340.405972] CPU: 7 UID: 0 PID: 1439 Comm: xskxceiver Not tainted 6.19.0-rc1+ #21 PREEMPT(lazy) [ 7340.408006] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-5.fc42 04/01/2014 [ 7340.409716] RIP: 0010:lookup_swap_cgroup_id+0x44/0x80 [ 7340.410455] Code: 83 f8 1c 73 39 48 ba ff ff ff ff ff ff ff 03 48 8b 04 c5 20 55 fa bd 48 21 d1 48 89 ca 83 e1 01 48 d1 ea c1 e1 04 48 8d 04 90 <8b> 00 48 83 c4 10 d3 e8 c3 cc cc cc cc 31 c0 e9 98 b7 dd 00 48 89 [ 7340.412787] RSP: 0018:ffffcc5c04f7f6d0 EFLAGS: 00010202 [ 7340.413494] RAX: 0006d255010bdffc RBX: ffff891f477895a8 RCX: 0000000000000010 [ 7340.414431] RDX: 0001c17e3fffffff RSI: 00fa070000000000 RDI: 000382fc7fffffff [ 7340.415354] RBP: 00fa070000000000 R08: ffffcc5c04f7f8f8 R09: ffffcc5c04f7f7d0 [ 7340.416283] R10: ffff891f4c1a7000 R11: ffffcc5c04f7f9c8 R12: ffffcc5c04f7f7d0 [ 7340.417218] R13: 03ffffffffffffff R14: 00fa06fffffffe00 R15: ffff891f47789500 [ 7340.418229] FS: 0000000000000000(0000) GS:ffff891ffdfaa000(0000) knlGS:0000000000000000 [ 7340.419489] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7340.420286] CR2: 00007f415bfffd58 CR3: 0000000103f03002 CR4: 0000000000772ef0 [ 7340.421237] PKRU: 55555554 [ 7340.421623] Call Trace: [ 7340.421987] [ 7340.422309] ? softleaf_from_pte+0x77/0xa0 [ 7340.422855] swap_pte_batch+0xa7/0x290 [ 7340.423363] zap_nonpresent_ptes.constprop.0.isra.0+0xd1/0x270 [ 7340.424102] zap_pte_range+0x281/0x580 [ 7340.424607] zap_pmd_range.isra.0+0xc9/0x240 [ 7340.425177] unmap_page_range+0x24d/0x420 [ 7340.425714] unmap_vmas+0xa1/0x180 [ 7340.426185] exit_mmap+0xe1/0x3b0 [ 7340.426644] __mmput+0x41/0x150 [ 7340.427098] exit_mm+0xb1/0x110 [ 7340.427539] do_exit+0x1b2/0x460 [ 7340.427992] do_group_exit+0x2d/0xc0 [ 7340.428477] get_signal+0x79d/0x7e0 [ 7340.428957] arch_do_signal_or_restart+0x34/0x100 [ 7340.429571] exit_to_user_mode_loop+0x8e/0x4c0 [ 7340.430159] do_syscall_64+0x188/0x6b0 [ 7340.430672] ? __do_sys_clone3+0xd9/0x120 [ 7340.431212] ? switch_fpu_return+0x4e/0xd0 [ 7340.431761] ? arch_exit_to_user_mode_prepare.isra.0+0xa1/0xc0 [ 7340.432498] ? do_syscall_64+0xbb/0x6b0 [ 7340.433015] ? __handle_mm_fault+0x445/0x690 [ 7340.433582] ? count_memcg_events+0xd6/0x210 [ 7340.434151] ? handle_mm_fault+0x212/0x340 [ 7340.434697] ? do_user_addr_fault+0x2b4/0x7b0 [ 7340.435271] ? clear_bhb_loop+0x30/0x80 [ 7340.435788] ? clear_bhb_loop+0x30/0x80 [ 7340.436299] ? clear_bhb_loop+0x30/0x80 [ 7340.436812] ? clear_bhb_loop+0x30/0x80 [ 7340.437323] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 7340.437973] RIP: 0033:0x7f4161b14169 [ 7340.438468] Code: Unable to access opcode bytes at 0x7f4161b1413f. [ 7340.439242] RSP: 002b:00007ffc6ebfa770 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca [ 7340.440173] RAX: fffffffffffffe00 RBX: 00000000000005a1 RCX: 00007f4161b14169 [ 7340.441061] RDX: 00000000000005a1 RSI: 0000000000000109 RDI: 00007f415bfff990 [ 7340.441943] RBP: 00007ffc6ebfa7a0 R08: 0000000000000000 R09: 00000000ffffffff [ 7340.442824] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 7340.443707] R13: 0000000000000000 R14: 00007f415bfff990 R15: 00007f415bfff6c0 [ 7340.444586] [ 7340.444922] Modules linked in: rfkill intel_rapl_msr intel_rapl_common intel_uncore_frequency_common skx_edac_common nfit libnvdimm kvm_intel vfat fat kvm snd_pcm irqbypass rapl iTCO_wdt snd_timer intel_pmc_bxt iTCO_vendor_support snd ixgbevf virtio_net soundcore i2c_i801 pcspkr libeth_xdp net_failover i2c_smbus lpc_ich failover libeth virtio_balloon joydev 9p fuse loop zram lz4hc_compress lz4_compress 9pnet_virtio 9pnet netfs ghash_clmulni_intel serio_raw qemu_fw_cfg [ 7340.449650] ---[ end trace 0000000000000000 ]--- The issue can be fixed in all in-tree drivers, but we cannot just trust OOT drivers to not do this. Therefore, make tailroom a signed int and produce a warning when it is negative to prevent such mistakes in the future. Fixes: bf25146a5595 ("bpf: add frags support to the bpf_xdp_adjust_tail() API") Reviewed-by: Aleksandr Loktionov Reviewed-by: Toke Høiland-Jørgensen Acked-by: Martin KaFai Lau Signed-off-by: Larysa Zaremba Link: https://patch.msgid.link/20260305111253.2317394-10-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index d6fafb3633b0..a77d23fe2359 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4150,13 +4150,14 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; struct xdp_rxq_info *rxq = xdp->rxq; - unsigned int tailroom; + int tailroom; if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) return -EOPNOTSUPP; tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag) % rxq->frag_size; + WARN_ON_ONCE(tailroom < 0); if (unlikely(offset > tailroom)) return -EINVAL; From e31a374a99f5026df6ebff2a1c49492276e776fd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 4 Mar 2026 19:15:48 +0100 Subject: [PATCH 799/828] fbdev: au1100fb: Fix build on MIPS64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an error reported by the kernel test robot: au1100fb.c: error: implicit declaration of function 'KSEG1ADDR'; did you mean 'CKSEG1ADDR'? arch/mips/include/asm/addrspace.h defines KSEG1ADDR only for 32 bit configurations. So provide its compile-test stub also for 64bit mips builds. Fixes: 6f366e86481a ("fbdev: au1100fb: Make driver compilable on non-mips platforms") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603042127.PT6LuKqi-lkp@intel.com/ Signed-off-by: Helge Deller Acked-by: Uwe Kleine-König --- drivers/video/fbdev/au1100fb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c index 1a04154bc535..c54cfcd832bb 100644 --- a/drivers/video/fbdev/au1100fb.c +++ b/drivers/video/fbdev/au1100fb.c @@ -380,8 +380,12 @@ static struct au1100fb_panel known_lcd_panels[] = #define panel_is_color(panel) (panel->control_base & LCD_CONTROL_PC) #define panel_swap_rgb(panel) (panel->control_base & LCD_CONTROL_CCO) -#if defined(CONFIG_COMPILE_TEST) && !defined(CONFIG_MIPS) -/* This is only defined to be able to compile this driver on non-mips platforms */ +#if defined(CONFIG_COMPILE_TEST) && (!defined(CONFIG_MIPS) || defined(CONFIG_64BIT)) +/* + * KSEG1ADDR() is defined in arch/mips/include/asm/addrspace.h + * for 32 bit configurations. Provide a stub for compile testing + * on other platforms. + */ #define KSEG1ADDR(x) (x) #endif From 76e8173ba92e15eeb0421b7cdbaef20513193b51 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Thu, 5 Mar 2026 11:07:23 +0000 Subject: [PATCH 800/828] drm/panthor: Correct the order of arguments passed to gem_sync This commit corrects the order of arguments passed to panthor_gem_sync() function, called when the SYNC_WAIT condition has to be evaluated for a blocked GPU queue. Fixes: cd2c9c3015e6 ("drm/panthor: Add flag to map GEM object Write-Back Cacheable") Signed-off-by: Akash Goel Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Reviewed-by: Liviu Dudau Link: https://patch.msgid.link/20260305110723.2871733-1-akash.goel@arm.com Signed-off-by: Liviu Dudau --- drivers/gpu/drm/panthor/panthor_sched.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index bd703a2904a1..a70f1db0764e 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -893,14 +893,15 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue out_sync: /* Make sure the CPU caches are invalidated before the seqno is read. - * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check + * panthor_gem_sync() is a NOP if map_wc=true, so no need to check * it here. */ - panthor_gem_sync(&bo->base.base, queue->syncwait.offset, + panthor_gem_sync(&bo->base.base, + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE, + queue->syncwait.offset, queue->syncwait.sync64 ? sizeof(struct panthor_syncobj_64b) : - sizeof(struct panthor_syncobj_32b), - DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE); + sizeof(struct panthor_syncobj_32b)); return queue->syncwait.kmap + queue->syncwait.offset; From d5b8b0347fa8470b751a506fb801797e271d7548 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Mar 2026 22:20:41 -0800 Subject: [PATCH 801/828] accel/amdxdna: Split mailbox channel create function The management channel used for firmware control command submission is currently created after the firmware is started. If channel creation fails (for example, due to memory allocation failure or workqueue creation interruption), the firmware remains in a pending state and is unable to receive any control commands. To avoid leaving the firmware in this inconsistent state, split xdna_mailbox_create_channel() into two separate functions so that resource allocation can be completed before interacting with the hardware. xdna_mailbox_alloc_channel() Allocates memory and initializes the workqueue. This can be called earlier, before interacting with the hardware. xdna_mailbox_start_channel() Performs the hardware interaction required to start the channel. Rename xdna_mailbox_destroy_channel() to xdna_mailbox_free_channel(). Ensure that xdna_mailbox_stop_channel() and xdna_mailbox_free_channel() properly unwind the corresponding start and allocation steps, respectively. Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260305062041.3954024-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 17 ++++- drivers/accel/amdxdna/aie2_pci.c | 63 +++++++++------- drivers/accel/amdxdna/amdxdna_mailbox.c | 99 ++++++++++++------------- drivers/accel/amdxdna/amdxdna_mailbox.h | 31 ++++---- 4 files changed, 116 insertions(+), 94 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 22e1a85a7ae0..ffcf3be79e23 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -293,13 +293,20 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct } intr_reg = i2x.mb_head_ptr_reg + 4; - hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, - intr_reg, ret); + hwctx->priv->mbox_chann = xdna_mailbox_alloc_channel(ndev->mbox); if (!hwctx->priv->mbox_chann) { XDNA_ERR(xdna, "Not able to create channel"); ret = -EINVAL; goto del_ctx_req; } + + ret = xdna_mailbox_start_channel(hwctx->priv->mbox_chann, &x2i, &i2x, + intr_reg, ret); + if (ret) { + XDNA_ERR(xdna, "Not able to create channel"); + ret = -EINVAL; + goto free_channel; + } ndev->hwctx_num++; XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id); @@ -307,6 +314,8 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct return 0; +free_channel: + xdna_mailbox_free_channel(hwctx->priv->mbox_chann); del_ctx_req: aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); return ret; @@ -322,7 +331,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); - xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); + xdna_mailbox_free_channel(hwctx->priv->mbox_chann); XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id); hwctx->priv->mbox_chann = NULL; hwctx->fw_ctx_id = -1; @@ -921,7 +930,7 @@ void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev) return; xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); + xdna_mailbox_free_channel(ndev->mgmt_chann); ndev->mgmt_chann = NULL; } diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 977ce21eaf9f..4924a9da55b6 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -361,10 +361,29 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) } pci_set_master(pdev); + mbox_res.ringbuf_base = ndev->sram_base; + mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar); + mbox_res.mbox_base = ndev->mbox_base; + mbox_res.mbox_size = MBOX_SIZE(ndev); + mbox_res.name = "xdna_mailbox"; + ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res); + if (!ndev->mbox) { + XDNA_ERR(xdna, "failed to create mailbox device"); + ret = -ENODEV; + goto disable_dev; + } + + ndev->mgmt_chann = xdna_mailbox_alloc_channel(ndev->mbox); + if (!ndev->mgmt_chann) { + XDNA_ERR(xdna, "failed to alloc channel"); + ret = -ENODEV; + goto disable_dev; + } + ret = aie2_smu_init(ndev); if (ret) { XDNA_ERR(xdna, "failed to init smu, ret %d", ret); - goto disable_dev; + goto free_channel; } ret = aie2_psp_start(ndev->psp_hdl); @@ -379,18 +398,6 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto stop_psp; } - mbox_res.ringbuf_base = ndev->sram_base; - mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar); - mbox_res.mbox_base = ndev->mbox_base; - mbox_res.mbox_size = MBOX_SIZE(ndev); - mbox_res.name = "xdna_mailbox"; - ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res); - if (!ndev->mbox) { - XDNA_ERR(xdna, "failed to create mailbox device"); - ret = -ENODEV; - goto stop_psp; - } - mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx); if (mgmt_mb_irq < 0) { ret = mgmt_mb_irq; @@ -399,13 +406,13 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) } xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4; - ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox, - &ndev->mgmt_x2i, - &ndev->mgmt_i2x, - xdna_mailbox_intr_reg, - mgmt_mb_irq); - if (!ndev->mgmt_chann) { - XDNA_ERR(xdna, "failed to create management mailbox channel"); + ret = xdna_mailbox_start_channel(ndev->mgmt_chann, + &ndev->mgmt_x2i, + &ndev->mgmt_i2x, + xdna_mailbox_intr_reg, + mgmt_mb_irq); + if (ret) { + XDNA_ERR(xdna, "failed to start management mailbox channel"); ret = -EINVAL; goto stop_psp; } @@ -413,37 +420,41 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) ret = aie2_mgmt_fw_init(ndev); if (ret) { XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; } ret = aie2_pm_init(ndev); if (ret) { XDNA_ERR(xdna, "failed to init pm, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; } ret = aie2_mgmt_fw_query(ndev); if (ret) { XDNA_ERR(xdna, "failed to query fw, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; } ret = aie2_error_async_events_alloc(ndev); if (ret) { XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; } ndev->dev_status = AIE2_DEV_START; return 0; -destroy_mgmt_chann: - aie2_destroy_mgmt_chann(ndev); +stop_fw: + aie2_suspend_fw(ndev); + xdna_mailbox_stop_channel(ndev->mgmt_chann); stop_psp: aie2_psp_stop(ndev->psp_hdl); fini_smu: aie2_smu_fini(ndev); +free_channel: + xdna_mailbox_free_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; disable_dev: pci_disable_device(pdev); diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 235a94047530..46d844a73a94 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -460,26 +460,49 @@ msg_id_failed: return ret; } -struct mailbox_channel * -xdna_mailbox_create_channel(struct mailbox *mb, - const struct xdna_mailbox_chann_res *x2i, - const struct xdna_mailbox_chann_res *i2x, - u32 iohub_int_addr, - int mb_irq) +struct mailbox_channel *xdna_mailbox_alloc_channel(struct mailbox *mb) { struct mailbox_channel *mb_chann; - int ret; - - if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { - pr_err("Ring buf size must be power of 2"); - return NULL; - } mb_chann = kzalloc_obj(*mb_chann); if (!mb_chann) return NULL; + INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); + mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME); + if (!mb_chann->work_q) { + MB_ERR(mb_chann, "Create workqueue failed"); + goto free_chann; + } mb_chann->mb = mb; + + return mb_chann; + +free_chann: + kfree(mb_chann); + return NULL; +} + +void xdna_mailbox_free_channel(struct mailbox_channel *mb_chann) +{ + destroy_workqueue(mb_chann->work_q); + kfree(mb_chann); +} + +int +xdna_mailbox_start_channel(struct mailbox_channel *mb_chann, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 iohub_int_addr, + int mb_irq) +{ + int ret; + + if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { + pr_err("Ring buf size must be power of 2"); + return -EINVAL; + } + mb_chann->msix_irq = mb_irq; mb_chann->iohub_int_addr = iohub_int_addr; memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i)); @@ -489,61 +512,37 @@ xdna_mailbox_create_channel(struct mailbox *mb, mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I); mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X); - INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); - mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME); - if (!mb_chann->work_q) { - MB_ERR(mb_chann, "Create workqueue failed"); - goto free_and_out; - } - /* Everything look good. Time to enable irq handler */ ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann); if (ret) { MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret); - goto destroy_wq; + return ret; } mb_chann->bad_state = false; mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); - MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); - return mb_chann; - -destroy_wq: - destroy_workqueue(mb_chann->work_q); -free_and_out: - kfree(mb_chann); - return NULL; -} - -int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) -{ - struct mailbox_msg *mb_msg; - unsigned long msg_id; - - MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); - free_irq(mb_chann->msix_irq, mb_chann); - destroy_workqueue(mb_chann->work_q); - /* We can clean up and release resources */ - - xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) - mailbox_release_msg(mb_chann, mb_msg); - - xa_destroy(&mb_chann->chan_xa); - - MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq); - kfree(mb_chann); + MB_DBG(mb_chann, "Mailbox channel started (irq: %d)", mb_chann->msix_irq); return 0; } void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) { + struct mailbox_msg *mb_msg; + unsigned long msg_id; + /* Disable an irq and wait. This might sleep. */ - disable_irq(mb_chann->msix_irq); + free_irq(mb_chann->msix_irq, mb_chann); /* Cancel RX work and wait for it to finish */ - cancel_work_sync(&mb_chann->rx_work); - MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); + drain_workqueue(mb_chann->work_q); + + /* We can clean up and release resources */ + xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) + mailbox_release_msg(mb_chann, mb_msg); + xa_destroy(&mb_chann->chan_xa); + + MB_DBG(mb_chann, "Mailbox channel stopped, irq: %d", mb_chann->msix_irq); } struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h index ea367f2fb738..8b1e00945da4 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.h +++ b/drivers/accel/amdxdna/amdxdna_mailbox.h @@ -74,9 +74,16 @@ struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, const struct xdna_mailbox_res *res); /* - * xdna_mailbox_create_channel() -- Create a mailbox channel instance + * xdna_mailbox_alloc_channel() -- alloc a mailbox channel * - * @mailbox: the handle return from xdna_mailbox_create() + * @mb: mailbox handle + */ +struct mailbox_channel *xdna_mailbox_alloc_channel(struct mailbox *mb); + +/* + * xdna_mailbox_start_channel() -- start a mailbox channel instance + * + * @mb_chann: the handle return from xdna_mailbox_alloc_channel() * @x2i: host to firmware mailbox resources * @i2x: firmware to host mailbox resources * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt @@ -84,28 +91,24 @@ struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, * * Return: If success, return a handle of mailbox channel. Otherwise, return NULL. */ -struct mailbox_channel * -xdna_mailbox_create_channel(struct mailbox *mailbox, - const struct xdna_mailbox_chann_res *x2i, - const struct xdna_mailbox_chann_res *i2x, - u32 xdna_mailbox_intr_reg, - int mb_irq); +int +xdna_mailbox_start_channel(struct mailbox_channel *mb_chann, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 xdna_mailbox_intr_reg, + int mb_irq); /* - * xdna_mailbox_destroy_channel() -- destroy mailbox channel + * xdna_mailbox_free_channel() -- free mailbox channel * * @mailbox_chann: the handle return from xdna_mailbox_create_channel() - * - * Return: if success, return 0. otherwise return error code */ -int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann); +void xdna_mailbox_free_channel(struct mailbox_channel *mailbox_chann); /* * xdna_mailbox_stop_channel() -- stop mailbox channel * * @mailbox_chann: the handle return from xdna_mailbox_create_channel() - * - * Return: if success, return 0. otherwise return error code */ void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann); From 9f2c7349b2810c671badfc1adc804f711a83e420 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 5 Mar 2026 09:18:52 -0800 Subject: [PATCH 802/828] MAINTAINERS: Orphan Altera PCIe controller driver Email to Joyce Ooi now bounces. Remove the address and mark the Altera PCIe controller driver as an orphan for now. Signed-off-by: Dave Hansen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260305171852.3114177-1-dave.hansen@linux.intel.com --- MAINTAINERS | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 55af015174a5..02768d27802c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20122,9 +20122,8 @@ F: Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml F: drivers/pci/controller/pci-aardvark.c PCI DRIVER FOR ALTERA PCIE IP -M: Joyce Ooi L: linux-pci@vger.kernel.org -S: Supported +S: Orphan F: Documentation/devicetree/bindings/pci/altr,pcie-root-port.yaml F: drivers/pci/controller/pcie-altera.c @@ -20369,9 +20368,8 @@ S: Supported F: Documentation/PCI/pci-error-recovery.rst PCI MSI DRIVER FOR ALTERA MSI IP -M: Joyce Ooi L: linux-pci@vger.kernel.org -S: Supported +S: Orphan F: Documentation/devicetree/bindings/interrupt-controller/altr,msi-controller.yaml F: drivers/pci/controller/pcie-altera-msi.c From f2fa6cc736ef518628f6a9d1155f9ec24e0af4e7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Mar 2026 16:01:19 -0800 Subject: [PATCH 803/828] rcutorture: Update due to x86 not supporting none/voluntary preemption As of v7.0-rc1, architectures that support preemption, including x86 and arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY. Attempting to build kernels with these two Kconfig options results in .config errors. This commit therefore switches such rcutorture scenarios to CONFIG_PREEMPT_LAZY. Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Signed-off-by: Boqun Feng Link: https://patch.msgid.link/bfe89f6c-3b63-40c6-aa6d-5f523e3e9a31@paulmck-laptop --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-N | 4 +++- tools/testing/selftests/rcutorture/configs/rcu/SRCU-T | 3 ++- tools/testing/selftests/rcutorture/configs/rcu/SRCU-U | 4 ++-- tools/testing/selftests/rcutorture/configs/rcu/TASKS02 | 3 ++- tools/testing/selftests/rcutorture/configs/rcu/TINY01 | 4 ++-- tools/testing/selftests/rcutorture/configs/rcu/TINY02 | 3 ++- tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 3 ++- tools/testing/selftests/rcutorture/configs/rcu/TREE04 | 4 +++- tools/testing/selftests/rcutorture/configs/rcu/TREE05 | 4 +++- tools/testing/selftests/rcutorture/configs/rcu/TREE06 | 5 ++++- tools/testing/selftests/rcutorture/configs/rcu/TREE10 | 1 + tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL | 4 +++- 12 files changed, 29 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 07f5e0a70ae7..f943cdfb0a74 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -2,7 +2,9 @@ CONFIG_RCU_TRACE=n CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T index c70cf0405f24..06e4d1030279 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U index bc9eeabaa1b1..71da6e3e9488 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n @@ -7,4 +8,3 @@ CONFIG_PREEMPT_DYNAMIC=n CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PREEMPT_COUNT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 index 2f9fcffff5ae..dd2bd4e08da4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 index 0953c52fcfd7..2be53bf60d65 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n @@ -11,4 +12,3 @@ CONFIG_RCU_TRACE=n #CHECK#CONFIG_RCU_STALL_COMMON=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PREEMPT_COUNT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index 30439f6fc20e..be8860342ef7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 18efab346381..8fb124c28f28 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -1,7 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=5 CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 67caf4276bb0..ac857d5bcb22 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -1,10 +1,12 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_LAZY=y CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y +#CHECK#CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 9f48c73709ec..61d15b1b54d1 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -1,6 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_TREE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index db27651de04b..0e090bb68a0f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -1,9 +1,12 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_TREE_RCU=y +#CHECK#CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 420632b030dc..b2ce37861e71 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -6,6 +6,7 @@ CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y +CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL index 5d546efa68e8..696fba9968c6 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL @@ -1,6 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n From 59af2d5652e998fea64335de6145afb3c1e0c958 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2026 15:59:01 -0800 Subject: [PATCH 804/828] rcuscale: Update due to x86 not supporting none/voluntary preemption As of v7.0-rc1, architectures that support preemption, including x86 and arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY. Attempting to build kernels with these two Kconfig options results in .config errors. This commit therefore switches such rcuscale scenarios to CONFIG_PREEMPT_LAZY. Signed-off-by: Paul E. McKenney Signed-off-by: Boqun Feng Link: https://patch.msgid.link/20260303235903.1967409-2-paulmck@kernel.org --- tools/testing/selftests/rcutorture/configs/rcuscale/TINY | 3 ++- tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index 0fa2dc086e10..c1a7d38823fb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 index 0059592c7408..f3f1368b8386 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 @@ -1,5 +1,6 @@ CONFIG_SMP=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n From 3c6ddb58f670baa584b22eef761aade77bd81b16 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2026 15:59:02 -0800 Subject: [PATCH 805/828] refscale: Update due to x86 not supporting none/voluntary preemption As of v7.0-rc1, architectures that support preemption, including x86 and arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY. Attempting to build kernels with these two Kconfig options results in .config errors. This commit therefore switches such refscale scenarios to CONFIG_PREEMPT_LAZY. Signed-off-by: Paul E. McKenney Signed-off-by: Boqun Feng Link: https://patch.msgid.link/20260303235903.1967409-3-paulmck@kernel.org --- tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT | 3 ++- tools/testing/selftests/rcutorture/configs/refscale/TINY | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index 67f9d2998afd..1f215e17cbae 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -1,5 +1,6 @@ CONFIG_SMP=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/TINY b/tools/testing/selftests/rcutorture/configs/refscale/TINY index 759343980b80..9a11b578ff34 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/refscale/TINY @@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n From 78c2ce0fd6ddb6c87aaa56b0d49c70e17df21e6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2026 15:59:03 -0800 Subject: [PATCH 806/828] scftorture: Update due to x86 not supporting none/voluntary preemption As of v7.0-rc1, architectures that support preemption, including x86 and arm64, no longer support CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY. Attempting to build kernels with these two Kconfig options results in .config errors. This commit therefore switches such scftorture scenarios to CONFIG_PREEMPT_LAZY. Signed-off-by: Paul E. McKenney Signed-off-by: Boqun Feng Link: https://patch.msgid.link/20260303235903.1967409-4-paulmck@kernel.org --- tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT index 6133f54ce2a7..f9621e2770a4 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT @@ -1,5 +1,6 @@ CONFIG_SMP=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n From 150bceb3e0a4a30950279d91ea0e8cc69a736742 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 18 Feb 2026 16:21:55 -0600 Subject: [PATCH 807/828] accel: ethosu: Fix job submit error clean-up refcount underflows If the job submit fails before adding the job to the scheduler queue such as when the GEM buffer bounds checks fail, then doing a ethosu_job_put() results in a pm_runtime_put_autosuspend() without the corresponding pm_runtime_resume_and_get(). The dma_fence_put()'s are also unnecessary, but seem to be harmless. Split the ethosu_job_cleanup() function into 2 parts for the before and after the job is queued. Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver") Reviewed-and-Tested-by: Anders Roxell Link: https://patch.msgid.link/20260218-ethos-fixes-v1-1-be3fa3ea9a30@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/accel/ethosu/ethosu_job.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c index 8598a3634340..ec85f4156744 100644 --- a/drivers/accel/ethosu/ethosu_job.c +++ b/drivers/accel/ethosu/ethosu_job.c @@ -143,17 +143,10 @@ out: return ret; } -static void ethosu_job_cleanup(struct kref *ref) +static void ethosu_job_err_cleanup(struct ethosu_job *job) { - struct ethosu_job *job = container_of(ref, struct ethosu_job, - refcount); unsigned int i; - pm_runtime_put_autosuspend(job->dev->base.dev); - - dma_fence_put(job->done_fence); - dma_fence_put(job->inference_done_fence); - for (i = 0; i < job->region_cnt; i++) drm_gem_object_put(job->region_bo[i]); @@ -162,6 +155,19 @@ static void ethosu_job_cleanup(struct kref *ref) kfree(job); } +static void ethosu_job_cleanup(struct kref *ref) +{ + struct ethosu_job *job = container_of(ref, struct ethosu_job, + refcount); + + pm_runtime_put_autosuspend(job->dev->base.dev); + + dma_fence_put(job->done_fence); + dma_fence_put(job->inference_done_fence); + + ethosu_job_err_cleanup(job); +} + static void ethosu_job_put(struct ethosu_job *job) { kref_put(&job->refcount, ethosu_job_cleanup); @@ -454,12 +460,16 @@ static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file } } ret = ethosu_job_push(ejob); + if (!ret) { + ethosu_job_put(ejob); + return 0; + } out_cleanup_job: if (ret) drm_sched_job_cleanup(&ejob->base); out_put_job: - ethosu_job_put(ejob); + ethosu_job_err_cleanup(ejob); return ret; } From 838ae99f9a77a5724ee6d4e7b7b1eb079147f888 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 18 Feb 2026 16:21:56 -0600 Subject: [PATCH 808/828] accel: ethosu: Fix NPU_OP_ELEMENTWISE validation with scalar The NPU_OP_ELEMENTWISE instruction uses a scalar value for IFM2 if the IFM2_BROADCAST "scalar" mode is set. It is a bit (7) on the u65 and part of a field (bits 3:0) on the u85. The driver was hardcoded to the u85. Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver") Reviewed-and-Tested-by: Anders Roxell Link: https://patch.msgid.link/20260218-ethos-fixes-v1-2-be3fa3ea9a30@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/accel/ethosu/ethosu_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c index 668c71d5ff45..ff5e58944138 100644 --- a/drivers/accel/ethosu/ethosu_gem.c +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -417,7 +417,10 @@ static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev, return ret; break; case NPU_OP_ELEMENTWISE: - use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) || + use_scale = ethosu_is_u65(edev) ? + (st.ifm2.broadcast & 0x80) : + (st.ifm2.broadcast == 8); + use_ifm2 = !(use_scale || (param == 5) || (param == 6) || (param == 7) || (param == 0x24)); use_ifm = st.ifm.broadcast != 8; ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2); From 021f1b77f70d62351bba67b050b8d784641d817f Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 18 Feb 2026 16:21:57 -0600 Subject: [PATCH 809/828] accel: ethosu: Handle possible underflow in IFM size calculations If the command stream has larger padding sizes than the IFM and OFM diminsions, then the calculations will underflow to a negative value. The result is a very large region bounds which is caught on submit, but it's better to catch it earlier. Current mesa ethosu driver has a signedness bug which resulted in padding of 127 (the max) and triggers this issue. Reviewed-and-Tested-by: Anders Roxell Link: https://patch.msgid.link/20260218-ethos-fixes-v1-3-be3fa3ea9a30@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/accel/ethosu/ethosu_gem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c index ff5e58944138..7994e7073903 100644 --- a/drivers/accel/ethosu/ethosu_gem.c +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -245,11 +245,14 @@ static int calc_sizes(struct drm_device *ddev, ((st->ifm.stride_kernel >> 1) & 0x1) + 1; u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) + (st->ifm.stride_kernel & 0x1) + 1; - u32 ifm_height = st->ofm.height[2] * stride_y + + s32 ifm_height = st->ofm.height[2] * stride_y + st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom); - u32 ifm_width = st->ofm.width * stride_x + + s32 ifm_width = st->ofm.width * stride_x + st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right); + if (ifm_height < 0 || ifm_width < 0) + return -EINVAL; + len = feat_matrix_length(info, &st->ifm, ifm_width, ifm_height, st->ifm.depth); dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n", From 048efe129a297256d3c2088cf8d79515ff5ec864 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 5 Mar 2026 21:57:06 -0300 Subject: [PATCH 810/828] smb: client: fix oops due to uninitialised var in smb2_unlink() If SMB2_open_init() or SMB2_close_init() fails (e.g. reconnect), the iovs set @rqst will be left uninitialised, hence calling SMB2_open_free(), SMB2_close_free() or smb2_set_related() on them will oops. Fix this by initialising @close_iov and @open_iov before setting them in @rqst. Reported-by: Thiago Becker Fixes: 1cf9f2a6a544 ("smb: client: handle unlink(2) of files open by different clients") Signed-off-by: Paulo Alcantara (Red Hat) Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 1c4663ed7e69..5280c5c869ad 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1216,6 +1216,7 @@ again: memset(resp_buftype, 0, sizeof(resp_buftype)); memset(rsp_iov, 0, sizeof(rsp_iov)); + memset(open_iov, 0, sizeof(open_iov)); rqst[0].rq_iov = open_iov; rqst[0].rq_nvec = ARRAY_SIZE(open_iov); @@ -1240,14 +1241,15 @@ again: creq = rqst[0].rq_iov[0].iov_base; creq->ShareAccess = FILE_SHARE_DELETE_LE; + memset(&close_iov, 0, sizeof(close_iov)); rqst[1].rq_iov = &close_iov; rqst[1].rq_nvec = 1; rc = SMB2_close_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, false); - smb2_set_related(&rqst[1]); if (rc) goto err_free; + smb2_set_related(&rqst[1]); if (retries) { /* Back-off before retry */ From ee0e6e69a772d601e152e5368a1da25d656122a8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Mar 2026 18:48:05 -0800 Subject: [PATCH 811/828] ata: libata-eh: Fix detection of deferred qc timeouts If the ata_qc_for_each_raw() loop finishes without finding a matching SCSI command for any QC, the variable qc will hold a pointer to the last element examined, which has the tag i == ATA_MAX_QUEUE - 1. This qc can match the port deferred QC (ap->deferred_qc). If that happens, the condition qc == ap->deferred_qc evaluates to true despite the loop not breaking with a match on the SCSI command for this QC. In that case, the error handler mistakenly intercepts a command that has not been issued yet and that has not timed out, and thus erroneously returning a timeout error. Fix the problem by checking for i < ATA_MAX_QUEUE in addition to qc == ap->deferred_qc. The problem was found by an experimental code review agent based on gemini-3.1-pro while reviewing backports into v6.18.y. Assisted-by: Gemini:gemini-3.1-pro Fixes: eddb98ad9364 ("ata: libata-eh: correctly handle deferred qc timeouts") Signed-off-by: Guenter Roeck [cassel: modified commit log as suggested by Damien] Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-eh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 563432400f72..23be85418b3b 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -647,7 +647,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, break; } - if (qc == ap->deferred_qc) { + if (i < ATA_MAX_QUEUE && qc == ap->deferred_qc) { /* * This is a deferred command that timed out while * waiting for the command queue to drain. Since the qc From 8e732934fb81282be41602550e7e07baf265e972 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 3 Mar 2026 23:36:10 +0100 Subject: [PATCH 812/828] parisc: Increase initial mapping to 64 MB with KALLSYMS The 32MB initial kernel mapping can become too small when CONFIG_KALLSYMS is used. Increase the mapping to 64 MB in this case. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 2c139a4dbf4b..17afe7a59edf 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -85,7 +85,7 @@ extern void __update_cache(pte_t pte); printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) /* This is the size of the initially mapped kernel memory */ -#if defined(CONFIG_64BIT) +#if defined(CONFIG_64BIT) || defined(CONFIG_KALLSYMS) #define KERNEL_INITIAL_ORDER 26 /* 1<<26 = 64MB */ #else #define KERNEL_INITIAL_ORDER 25 /* 1<<25 = 32MB */ From 17c144f1104bfc29a3ce3f7d0931a1bfb7a3558c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 3 Mar 2026 23:36:11 +0100 Subject: [PATCH 813/828] parisc: Check kernel mapping earlier at bootup The check if the initial mapping is sufficient needs to happen much earlier during bootup. Move this test directly to the start_parisc() function and use native PDC iodc functions to print the warning, because panic() and printk() are not functional yet. This fixes boot when enabling various KALLSYSMS options which need much more space. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/kernel/setup.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index ace483b6f19a..d3e17a7a8901 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -120,14 +120,6 @@ void __init setup_arch(char **cmdline_p) #endif printk(KERN_CONT ".\n"); - /* - * Check if initial kernel page mappings are sufficient. - * panic early if not, else we may access kernel functions - * and variables which can't be reached. - */ - if (__pa((unsigned long) &_end) >= KERNEL_INITIAL_SIZE) - panic("KERNEL_INITIAL_ORDER too small!"); - #ifdef CONFIG_64BIT if(parisc_narrow_firmware) { printk(KERN_INFO "Kernel is using PDC in 32-bit mode.\n"); @@ -279,6 +271,18 @@ void __init start_parisc(void) int ret, cpunum; struct pdc_coproc_cfg coproc_cfg; + /* + * Check if initial kernel page mapping is sufficient. + * Print warning if not, because we may access kernel functions and + * variables which can't be reached yet through the initial mappings. + * Note that the panic() and printk() functions are not functional + * yet, so we need to use direct iodc() firmware calls instead. + */ + const char warn1[] = "CRITICAL: Kernel may crash because " + "KERNEL_INITIAL_ORDER is too small.\n"; + if (__pa((unsigned long) &_end) >= KERNEL_INITIAL_SIZE) + pdc_iodc_print(warn1, sizeof(warn1) - 1); + /* check QEMU/SeaBIOS marker in PAGE0 */ running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0); From 8475d8fe21ec9c7eb2faca555fbc5b68cf0d2597 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 4 Mar 2026 22:24:18 +0100 Subject: [PATCH 814/828] parisc: Fix initial page table creation for boot The KERNEL_INITIAL_ORDER value defines the initial size (usually 32 or 64 MB) of the page table during bootup. Up until now the whole area was initialized with PTE entries, but there was no check if we filled too many entries. Change the code to fill up with so many entries that the "_end" symbol can be reached by the kernel, but not more entries than actually fit into the initial PTE tables. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/kernel/head.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 96e0264ac961..9188c8d87437 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -56,6 +56,7 @@ ENTRY(parisc_kernel_start) .import __bss_start,data .import __bss_stop,data + .import __end,data load32 PA(__bss_start),%r3 load32 PA(__bss_stop),%r4 @@ -149,7 +150,11 @@ $cpu_ok: * everything ... it will get remapped correctly later */ ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */ load32 (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ - load32 PA(pg0),%r1 + load32 PA(_end),%r1 + SHRREG %r1,PAGE_SHIFT,%r1 /* %r1 is PFN count for _end symbol */ + cmpb,<<,n %r11,%r1,1f + copy %r1,%r11 /* %r1 PFN count smaller than %r11 */ +1: load32 PA(pg0),%r1 $pgt_fill_loop: STREGM %r3,ASM_PTE_ENTRY_SIZE(%r1) From 97c5550b763171dbef61e6239cab372b9f9cd4a2 Mon Sep 17 00:00:00 2001 From: Piotr Jaroszynski Date: Thu, 5 Mar 2026 15:26:29 -0800 Subject: [PATCH 815/828] arm64: contpte: fix set_access_flags() no-op check for SMMU/ATS faults contpte_ptep_set_access_flags() compared the gathered ptep_get() value against the requested entry to detect no-ops. ptep_get() ORs AF/dirty from all sub-PTEs in the CONT block, so a dirty sibling can make the target appear already-dirty. When the gathered value matches entry, the function returns 0 even though the target sub-PTE still has PTE_RDONLY set in hardware. For a CPU with FEAT_HAFDBS this gathered view is fine, since hardware may set AF/dirty on any sub-PTE and CPU TLB behavior is effectively gathered across the CONT range. But page-table walkers that evaluate each descriptor individually (e.g. a CPU without DBM support, or an SMMU without HTTU, or with HA/HD disabled in CD.TCR) can keep faulting on the unchanged target sub-PTE, causing an infinite fault loop. Gathering can therefore cause false no-ops when only a sibling has been updated: - write faults: target still has PTE_RDONLY (needs PTE_RDONLY cleared) - read faults: target still lacks PTE_AF Fix by checking each sub-PTE against the requested AF/dirty/write state (the same bits consumed by __ptep_set_access_flags()), using raw per-PTE values rather than the gathered ptep_get() view, before returning no-op. Keep using the raw target PTE for the write-bit unfold decision. Per Arm ARM (DDI 0487) D8.7.1 ("The Contiguous bit"), any sub-PTE in a CONT range may become the effective cached translation and software must maintain consistent attributes across the range. Fixes: 4602e5757bcc ("arm64/mm: wire up PTE_CONT for user mappings") Cc: Ryan Roberts Cc: Catalin Marinas Cc: Will Deacon Cc: Jason Gunthorpe Cc: John Hubbard Cc: Zi Yan Cc: Breno Leitao Cc: stable@vger.kernel.org Reviewed-by: Alistair Popple Reviewed-by: James Houghton Reviewed-by: Ryan Roberts Reviewed-by: Catalin Marinas Tested-by: Breno Leitao Signed-off-by: Piotr Jaroszynski Acked-by: Balbir Singh Signed-off-by: Will Deacon --- arch/arm64/mm/contpte.c | 53 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index b929a455103f..1519d090d5ea 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -599,6 +599,27 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes); +static bool contpte_all_subptes_match_access_flags(pte_t *ptep, pte_t entry) +{ + pte_t *cont_ptep = contpte_align_down(ptep); + /* + * PFNs differ per sub-PTE. Match only bits consumed by + * __ptep_set_access_flags(): AF, DIRTY and write permission. + */ + const pteval_t cmp_mask = PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; + pteval_t entry_cmp = pte_val(entry) & cmp_mask; + int i; + + for (i = 0; i < CONT_PTES; i++) { + pteval_t pte_cmp = pte_val(__ptep_get(cont_ptep + i)) & cmp_mask; + + if (pte_cmp != entry_cmp) + return false; + } + + return true; +} + int contpte_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t entry, int dirty) @@ -608,13 +629,37 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma, int i; /* - * Gather the access/dirty bits for the contiguous range. If nothing has - * changed, its a noop. + * Check whether all sub-PTEs in the CONT block already match the + * requested access flags/write permission, using raw per-PTE values + * rather than the gathered ptep_get() view. + * + * __ptep_set_access_flags() can update AF, dirty and write + * permission, but only to make the mapping more permissive. + * + * ptep_get() gathers AF/dirty state across the whole CONT block, + * which is correct for a CPU with FEAT_HAFDBS. But page-table + * walkers that evaluate each descriptor individually (e.g. a CPU + * without DBM support, or an SMMU without HTTU, or with HA/HD + * disabled in CD.TCR) can keep faulting on the target sub-PTE if + * only a sibling has been updated. Gathering can therefore cause + * false no-ops when only a sibling has been updated: + * - write faults: target still has PTE_RDONLY (needs PTE_RDONLY cleared) + * - read faults: target still lacks PTE_AF + * + * Per Arm ARM (DDI 0487) D8.7.1, any sub-PTE in a CONT range may + * become the effective cached translation, so all entries must have + * consistent attributes. Check the full CONT block before returning + * no-op, and when any sub-PTE mismatches, proceed to update the whole + * range. */ - orig_pte = pte_mknoncont(ptep_get(ptep)); - if (pte_val(orig_pte) == pte_val(entry)) + if (contpte_all_subptes_match_access_flags(ptep, entry)) return 0; + /* + * Use raw target pte (not gathered) for write-bit unfold decision. + */ + orig_pte = pte_mknoncont(__ptep_get(ptep)); + /* * We can fix up access/dirty bits without having to unfold the contig * range. But if the write bit is changing, we must unfold. From d87c828daa7ead9763416f75cc416496969cf1dc Mon Sep 17 00:00:00 2001 From: Yifan Wu Date: Thu, 5 Mar 2026 09:36:37 +0800 Subject: [PATCH 816/828] selftest/arm64: Fix sve2p1_sigill() to hwcap test The FEAT_SVE2p1 is indicated by ID_AA64ZFR0_EL1.SVEver. However, the BFADD requires the FEAT_SVE_B16B16, which is indicated by ID_AA64ZFR0_EL1.B16B16. This could cause the test to incorrectly fail on a CPU that supports FEAT_SVE2.1 but not FEAT_SVE_B16B16. LD1Q Gather load quadwords which is decoded from SVE encodings and implied by FEAT_SVE2p1. Fixes: c5195b027d29 ("kselftest/arm64: Add SVE 2.1 to hwcap test") Signed-off-by: Yifan Wu Reviewed-by: Mark Brown Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 9d2df1f3e6bb..c2661a312fc9 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -475,8 +475,8 @@ static void sve2_sigill(void) static void sve2p1_sigill(void) { - /* BFADD Z0.H, Z0.H, Z0.H */ - asm volatile(".inst 0x65000000" : : : "z0"); + /* LD1Q {Z0.Q}, P0/Z, [Z0.D, X0] */ + asm volatile(".inst 0xC400A000" : : : "z0"); } static void sve2p2_sigill(void) From 7fe44c4388146bdbb3c5932d81a26d9fa0fd3ec9 Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Fri, 6 Mar 2026 10:49:18 +0000 Subject: [PATCH 817/828] bpf: drop kthread_exit from noreturn_deny kthread_exit became a macro to do_exit in commit 28aaa9c39945 ("kthread: consolidate kthread exit paths to prevent use-after-free"), so there is no kthread_exit function BTF ID to resolve. Remove it from noreturn_deny to avoid resolve_btfids unresolved symbol warnings. Signed-off-by: Christian Loehle Signed-off-by: Linus Torvalds --- kernel/bpf/verifier.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 401d6c4960ec..8db79e593156 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -25261,7 +25261,6 @@ BTF_ID(func, __x64_sys_exit_group) BTF_ID(func, do_exit) BTF_ID(func, do_group_exit) BTF_ID(func, kthread_complete_and_exit) -BTF_ID(func, kthread_exit) BTF_ID(func, make_task_dead) BTF_SET_END(noreturn_deny) From 0b2758f48f22b173963f39e553d0ecd05f3b4433 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 6 Mar 2026 09:10:36 -0800 Subject: [PATCH 818/828] Require (reasonably) normal mappings for MADV_DOFORK This came up as a result of the tracing fix pull request, and commit e39bb9e02b68 ("tracing: Fix WARN_ON in tracing_buffers_mmap_close") in particular. The use of MADV_DOFORK confused the ring buffer mapping reference counting just because it was unexpected, since the mapping was originally done with VM_DONTCOPY. The tracing code may well be the only case of this (and fixed it all by just using the mmap open callback to unconfuse itself), but it's just strange that we allow MADV_DOFORK on special mappings where the kernel has set the "don't copy this" bit. The code already disallowed it for VM_IO mappings (going back to the original commit f822566165dd: "madvise MADV_DONTFORK/MADV_DOFORK"), so just extend it to any of the VM_SPECIAL cases (which includes VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP in addition to VM_IO). We could also allow MADV_DOFORK only on mappings that had been marked DONTFORK by the user. But that would require us to track that (presumably with another VM_xyz bit), so let's just do this trivial and straightforward modifications. If anybody notices, Lorenzo will be boarding Flying Pig Airlines. Suggested-by: David Hildenbrand (Arm) Reviewed-by: Lorenzo Stoakes (Oracle) Link: https://lore.kernel.org/all/a8907468-d7e9-4727-af28-66d905093230@kernel.org/ Cc: Steven Rostedt Cc: Jason Gunthorpe Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index c0370d9b4e23..dbb69400786d 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1389,7 +1389,7 @@ static int madvise_vma_behavior(struct madvise_behavior *madv_behavior) new_flags |= VM_DONTCOPY; break; case MADV_DOFORK: - if (new_flags & VM_IO) + if (new_flags & VM_SPECIAL) return -EINVAL; new_flags &= ~VM_DONTCOPY; break; From 457965c13f0837a289c9164b842d0860133f6274 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Mar 2026 11:33:39 -0800 Subject: [PATCH 819/828] tracing: Add NULL pointer check to trigger_data_free() If trigger_data_alloc() fails and returns NULL, event_hist_trigger_parse() jumps to the out_free error path. While kfree() safely handles a NULL pointer, trigger_data_free() does not. This causes a NULL pointer dereference in trigger_data_free() when evaluating data->cmd_ops->set_filter. Fix the problem by adding a NULL pointer check to trigger_data_free(). The problem was found by an experimental code review agent based on gemini-3.1-pro while reviewing backports into v6.18.y. Cc: Miaoqian Lin Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Steven Rostedt (Google) Link: https://patch.msgid.link/20260305193339.2810953-1-linux@roeck-us.net Fixes: 0550069cc25f ("tracing: Properly process error handling in event_hist_trigger_parse()") Assisted-by: Gemini:gemini-3.1-pro Signed-off-by: Guenter Roeck Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index fecbd679d432..d5230b759a2d 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -50,6 +50,9 @@ static int trigger_kthread_fn(void *ignore) void trigger_data_free(struct event_trigger_data *data) { + if (!data) + return; + if (data->cmd_ops->set_filter) data->cmd_ops->set_filter(NULL, data, NULL); From 3b1679e086bb869ca02722f6bd29b3573a6a0e7e Mon Sep 17 00:00:00 2001 From: Andrei-Alexandru Tachici Date: Mon, 2 Mar 2026 11:27:34 +0100 Subject: [PATCH 820/828] tracing: Fix enabling multiple events on the kernel command line and bootconfig Multiple events can be enabled on the kernel command line via a comma separator. But if the are specified one at a time, then only the last event is enabled. This is because the event names are saved in a temporary buffer, and each call by the init cmdline code will reset that buffer. This also affects names in the boot config file, as it may call the callback multiple times with an example of: kernel.trace_event = ":mod:rproc_qcom_common", ":mod:qrtr", ":mod:qcom_aoss" Change the cmdline callback function to append a comma and the next value if the temporary buffer already has content. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260302-trace-events-allow-multiple-modules-v1-1-ce4436e37fb8@oss.qualcomm.com Signed-off-by: Andrei-Alexandru Tachici Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b7343fdfd7b0..249d1cba72c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -4493,7 +4493,11 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; static __init int setup_trace_event(char *str) { - strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE); + if (bootup_event_buf[0] != '\0') + strlcat(bootup_event_buf, ",", COMMAND_LINE_SIZE); + + strlcat(bootup_event_buf, str, COMMAND_LINE_SIZE); + trace_set_ring_buffer_expanded(NULL); disable_tracing_selftest("running event tracing"); From fbc7aef517d8765e4c425d2792409bb9bf2e1f13 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:54:24 -0800 Subject: [PATCH 821/828] bpf: Fix u32/s32 bounds when ranges cross min/max boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as in __reg64_deduce_bounds(), refine s32/u32 ranges in __reg32_deduce_bounds() in the following situations: - s32 range crosses U32_MAX/0 boundary, positive part of the s32 range overlaps with u32 range: 0 U32_MAX | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | |----------------------------|----------------------------| |xxxxx s32 range xxxxxxxxx] [xxxxxxx| 0 S32_MAX S32_MIN -1 - s32 range crosses U32_MAX/0 boundary, negative part of the s32 range overlaps with u32 range: 0 U32_MAX | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | |----------------------------|----------------------------| |xxxxxxxxx] [xxxxxxxxxxxx s32 range | 0 S32_MAX S32_MIN -1 - No refinement if ranges overlap in two intervals. This helps for e.g. consider the following program: call %[bpf_get_prandom_u32]; w0 &= 0xffffffff; if w0 < 0x3 goto 1f; // on fall-through u32 range [3..U32_MAX] if w0 s> 0x1 goto 1f; // on fall-through s32 range [S32_MIN..1] if w0 s< 0x0 goto 1f; // range can be narrowed to [S32_MIN..-1] r10 = 0; 1: ...; The reg_bounds.c selftest is updated to incorporate identical logic, refinement based on non-overflowing range halves: ((x ∩ [0, smax]) ∩ (y ∩ [0, smax])) ∪ ((x ∩ [smin,-1]) ∩ (y ∩ [smin,-1])) Reported-by: Andrea Righi Reported-by: Emil Tsalapatis Closes: https://lore.kernel.org/bpf/aakqucg4vcujVwif@gpd4/T/ Reviewed-by: Emil Tsalapatis Acked-by: Shung-Hsi Yu Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-1-f7f67e060a6b@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 +++++++ .../selftests/bpf/prog_tests/reg_bounds.c | 62 +++++++++++++++++-- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 401d6c4960ec..f960b382fdb3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2511,6 +2511,30 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value); reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value); + } else { + if (reg->u32_max_value < (u32)reg->s32_min_value) { + /* See __reg64_deduce_bounds() for detailed explanation. + * Refine ranges in the following situation: + * + * 0 U32_MAX + * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | + * |----------------------------|----------------------------| + * |xxxxx s32 range xxxxxxxxx] [xxxxxxx| + * 0 S32_MAX S32_MIN -1 + */ + reg->s32_min_value = (s32)reg->u32_min_value; + reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value); + } else if ((u32)reg->s32_max_value < reg->u32_min_value) { + /* + * 0 U32_MAX + * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | + * |----------------------------|----------------------------| + * |xxxxxxxxx] [xxxxxxxxxxxx s32 range | + * 0 S32_MAX S32_MIN -1 + */ + reg->s32_max_value = (s32)reg->u32_max_value; + reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value); + } } } diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 0322f817d07b..04938d0d431b 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -422,15 +422,69 @@ static bool is_valid_range(enum num_t t, struct range x) } } -static struct range range_improve(enum num_t t, struct range old, struct range new) +static struct range range_intersection(enum num_t t, struct range old, struct range new) { return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); } +/* + * Result is precise when 'x' and 'y' overlap or form a continuous range, + * result is an over-approximation if 'x' and 'y' do not overlap. + */ +static struct range range_union(enum num_t t, struct range x, struct range y) +{ + if (!is_valid_range(t, x)) + return y; + if (!is_valid_range(t, y)) + return x; + return range(t, min_t(t, x.a, y.a), max_t(t, x.b, y.b)); +} + +/* + * This function attempts to improve x range intersecting it with y. + * range_cast(... to_t ...) looses precision for ranges that pass to_t + * min/max boundaries. To avoid such precision loses this function + * splits both x and y into halves corresponding to non-overflowing + * sub-ranges: [0, smin] and [smax, -1]. + * Final result is computed as follows: + * + * ((x ∩ [0, smax]) ∩ (y ∩ [0, smax])) ∪ + * ((x ∩ [smin,-1]) ∩ (y ∩ [smin,-1])) + * + * Precision might still be lost if final union is not a continuous range. + */ +static struct range range_refine_in_halves(enum num_t x_t, struct range x, + enum num_t y_t, struct range y) +{ + struct range x_pos, x_neg, y_pos, y_neg, r_pos, r_neg; + u64 smax, smin, neg_one; + + if (t_is_32(x_t)) { + smax = (u64)(u32)S32_MAX; + smin = (u64)(u32)S32_MIN; + neg_one = (u64)(u32)(s32)(-1); + } else { + smax = (u64)S64_MAX; + smin = (u64)S64_MIN; + neg_one = U64_MAX; + } + x_pos = range_intersection(x_t, x, range(x_t, 0, smax)); + x_neg = range_intersection(x_t, x, range(x_t, smin, neg_one)); + y_pos = range_intersection(y_t, y, range(x_t, 0, smax)); + y_neg = range_intersection(y_t, y, range(y_t, smin, neg_one)); + r_pos = range_intersection(x_t, x_pos, range_cast(y_t, x_t, y_pos)); + r_neg = range_intersection(x_t, x_neg, range_cast(y_t, x_t, y_neg)); + return range_union(x_t, r_pos, r_neg); + +} + static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) { struct range y_cast; + if (t_is_32(x_t) == t_is_32(y_t)) + x = range_refine_in_halves(x_t, x, y_t, y); + y_cast = range_cast(y_t, x_t, y); /* If we know that @@ -444,7 +498,7 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, */ if (x_t == S64 && y_t == S32 && y_cast.a <= S32_MAX && y_cast.b <= S32_MAX && (s64)x.a >= S32_MIN && (s64)x.b <= S32_MAX) - return range_improve(x_t, x, y_cast); + return range_intersection(x_t, x, y_cast); /* the case when new range knowledge, *y*, is a 32-bit subregister * range, while previous range knowledge, *x*, is a full register @@ -462,7 +516,7 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); if (!is_valid_range(x_t, x_swap)) return x; - return range_improve(x_t, x, x_swap); + return range_intersection(x_t, x, x_swap); } if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) { @@ -480,7 +534,7 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, } /* otherwise, plain range cast and intersection works */ - return range_improve(x_t, x, y_cast); + return range_intersection(x_t, x, y_cast); } /* ======================= From f81fdfd16771e266753146bd83f6dd23515ebee9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:54:25 -0800 Subject: [PATCH 822/828] selftests/bpf: test refining u32/s32 bounds when ranges cross min/max boundary Two test cases for signed/unsigned 32-bit bounds refinement when s32 range crosses the sign boundary: - s32 range [S32_MIN..1] overlapping with u32 range [3..U32_MAX], s32 range tail before sign boundary overlaps with u32 range. - s32 range [-3..5] overlapping with u32 range [0..S32_MIN+3], s32 range head after the sign boundary overlaps with u32 range. This covers both branches added in the __reg32_deduce_bounds(). Also, crossing_32_bit_signed_boundary_2() no longer triggers invariant violations. Reviewed-by: Emil Tsalapatis Reviewed-by: Paul Chaignon Acked-by: Shung-Hsi Yu Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-2-f7f67e060a6b@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_bounds.c | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 97065a26cf70..e526315c718a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1148,7 +1148,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ +__flag(BPF_F_TEST_REG_INVARIANTS) __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ @@ -2000,4 +2000,41 @@ __naked void bounds_refinement_multiple_overlaps(void *ctx) : __clobber_all); } +SEC("socket") +__success +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void signed_unsigned_intersection32_case1(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0xffffffff; \ + if w0 < 0x3 goto 1f; /* on fall-through u32 range [3..U32_MAX] */ \ + if w0 s> 0x1 goto 1f; /* on fall-through s32 range [S32_MIN..1] */ \ + if w0 s< 0x0 goto 1f; /* range can be narrowed to [S32_MIN..-1] */ \ + r10 = 0; /* thus predicting the jump. */ \ +1: exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__success +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void signed_unsigned_intersection32_case2(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0xffffffff; \ + if w0 > 0x80000003 goto 1f; /* on fall-through u32 range [0..S32_MIN+3] */ \ + if w0 s< -3 goto 1f; /* on fall-through s32 range [-3..S32_MAX] */ \ + if w0 s> 5 goto 1f; /* on fall-through s32 range [-3..5] */ \ + if w0 <= 5 goto 1f; /* range can be narrowed to [0..5] */ \ + r10 = 0; /* thus predicting the jump */ \ +1: exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; From d87c9305a8841b312b14ca0c360a563ef60b2a5b Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:54:26 -0800 Subject: [PATCH 823/828] Revert "selftests/bpf: Update reg_bound range refinement logic" This reverts commit da653de268d32a80e135c9eb960a8147c186f1bc. Removed logic is now covered by range_refine_in_halves() which handles both 32-bit and 64-bit refinements. Suggested-by: Paul Chaignon Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-3-f7f67e060a6b@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 04938d0d431b..cb8dd2f63296 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -519,20 +519,6 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, return range_intersection(x_t, x, x_swap); } - if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) { - if (x_t == S64 && x.a > x.b) { - if (x.b < y.a && x.a <= y.b) - return range(x_t, x.a, y.b); - if (x.a > y.b && x.b >= y.a) - return range(x_t, y.a, x.b); - } else if (x_t == U64 && y.a > y.b) { - if (y.b < x.a && y.a <= x.b) - return range(x_t, y.a, x.b); - if (y.a > x.b && y.b >= x.a) - return range(x_t, x.a, y.b); - } - } - /* otherwise, plain range cast and intersection works */ return range_intersection(x_t, x, y_cast); } From d008ba8be8984760e36d7dcd4adbd5a41a645708 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 6 Mar 2026 19:19:25 -0800 Subject: [PATCH 824/828] tracing: Fix trace_buf_size= cmdline parameter with sizes >= 2G Some of the sizing logic through tracer_alloc_buffers() uses int internally, causing unexpected behavior if the user passes a value that does not fit in an int (on my x86 machine, the result is uselessly tiny buffers). Fix by plumbing the parameter's real type (unsigned long) through to the ring buffer allocation functions, which already use unsigned long. It has always been possible to create larger ring buffers via the sysfs interface: this only affects the cmdline parameter. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/bff42a4288aada08bdf74da3f5b67a2c28b761f8.1772852067.git.calvin@wbinvd.org Fixes: 73c5162aa362 ("tracing: keep ring buffer to minimum size till used") Signed-off-by: Calvin Owens Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1e7c032a72d2..ebd996f8710e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9350,7 +9350,7 @@ static void setup_trace_scratch(struct trace_array *tr, } static int -allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) +allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size) { enum ring_buffer_flags rb_flags; struct trace_scratch *tscratch; @@ -9405,7 +9405,7 @@ static void free_trace_buffer(struct array_buffer *buf) } } -static int allocate_trace_buffers(struct trace_array *tr, int size) +static int allocate_trace_buffers(struct trace_array *tr, unsigned long size) { int ret; @@ -10769,7 +10769,7 @@ __init static void enable_instances(void) __init static int tracer_alloc_buffers(void) { - int ring_buf_size; + unsigned long ring_buf_size; int ret = -ENOMEM; From 2658a1720a1944fbaeda937000ad2b3c3dfaf1bb Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:02:47 -0800 Subject: [PATCH 825/828] bpf: collect only live registers in linked regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an inconsistency between func_states_equal() and collect_linked_regs(): - regsafe() uses check_ids() to verify that cached and current states have identical register id mapping. - func_states_equal() calls regsafe() only for registers computed as live by compute_live_registers(). - clean_live_states() is supposed to remove dead registers from cached states, but it can skip states belonging to an iterator-based loop. - collect_linked_regs() collects all registers sharing the same id, ignoring the marks computed by compute_live_registers(). Linked registers are stored in the state's jump history. - backtrack_insn() marks all linked registers for an instruction as precise whenever one of the linked registers is precise. The above might lead to a scenario: - There is an instruction I with register rY known to be dead at I. - Instruction I is reached via two paths: first A, then B. - On path A: - There is an id link between registers rX and rY. - Checkpoint C is created at I. - Linked register set {rX, rY} is saved to the jump history. - rX is marked as precise at I, causing both rX and rY to be marked precise at C. - On path B: - There is no id link between registers rX and rY, otherwise register states are sub-states of those in C. - Because rY is dead at I, check_ids() returns true. - Current state is considered equal to checkpoint C, propagate_precision() propagates spurious precision mark for register rY along the path B. - Depending on a program, this might hit verifier_bug() in the backtrack_insn(), e.g. if rY ∈ [r1..r5] and backtrack_insn() spots a function call. The reproducer program is in the next patch. This was hit by sched_ext scx_lavd scheduler code. Changes in tests: - verifier_scalar_ids.c selftests need modification to preserve some registers as live for __msg() checks. - exceptions_assert.c adjusted to match changes in the verifier log, R0 is dead after conditional instruction and thus does not get range. - precise.c adjusted to match changes in the verifier log, register r9 is dead after comparison and it's range is not important for test. Reported-by: Emil Tsalapatis Fixes: 0fb3cf6110a5 ("bpf: use register liveness information for func_states_equal") Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-linked-regs-and-propagate-precision-v1-1-18e859be570d@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 13 ++++- .../selftests/bpf/progs/exceptions_assert.c | 34 +++++------ .../selftests/bpf/progs/verifier_scalar_ids.c | 56 ++++++++++++++----- .../testing/selftests/bpf/verifier/precise.c | 8 +-- 4 files changed, 73 insertions(+), 38 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f960b382fdb3..836ceb128d19 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17359,17 +17359,24 @@ static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_st * in verifier state, save R in linked_regs if R->id == id. * If there are too many Rs sharing same id, reset id for leftover Rs. */ -static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id, +static void collect_linked_regs(struct bpf_verifier_env *env, + struct bpf_verifier_state *vstate, + u32 id, struct linked_regs *linked_regs) { + struct bpf_insn_aux_data *aux = env->insn_aux_data; struct bpf_func_state *func; struct bpf_reg_state *reg; + u16 live_regs; int i, j; id = id & ~BPF_ADD_CONST; for (i = vstate->curframe; i >= 0; i--) { + live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before; func = vstate->frame[i]; for (j = 0; j < BPF_REG_FP; j++) { + if (!(live_regs & BIT(j))) + continue; reg = &func->regs[j]; __collect_linked_regs(linked_regs, reg, id, i, j, true); } @@ -17584,9 +17591,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * if parent state is created. */ if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id) - collect_linked_regs(this_branch, src_reg->id, &linked_regs); + collect_linked_regs(env, this_branch, src_reg->id, &linked_regs); if (dst_reg->type == SCALAR_VALUE && dst_reg->id) - collect_linked_regs(this_branch, dst_reg->id, &linked_regs); + collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs); if (linked_regs.cnt > 1) { err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); if (err) diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index a01c2736890f..858af5988a38 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -18,43 +18,43 @@ return *(u64 *)num; \ } -__msg(": R0=0xffffffff80000000") +__msg("R{{.}}=0xffffffff80000000") check_assert(s64, ==, eq_int_min, INT_MIN); -__msg(": R0=0x7fffffff") +__msg("R{{.}}=0x7fffffff") check_assert(s64, ==, eq_int_max, INT_MAX); -__msg(": R0=0") +__msg("R{{.}}=0") check_assert(s64, ==, eq_zero, 0); -__msg(": R0=0x8000000000000000 R1=0x8000000000000000") +__msg("R{{.}}=0x8000000000000000") check_assert(s64, ==, eq_llong_min, LLONG_MIN); -__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff") +__msg("R{{.}}=0x7fffffffffffffff") check_assert(s64, ==, eq_llong_max, LLONG_MAX); -__msg(": R0=scalar(id=1,smax=0x7ffffffe)") +__msg("R{{.}}=scalar(id=1,smax=0x7ffffffe)") check_assert(s64, <, lt_pos, INT_MAX); -__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, <, lt_zero, 0); -__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff") +__msg("R{{.}}=scalar(id=1,smax=0xffffffff7fffffff") check_assert(s64, <, lt_neg, INT_MIN); -__msg(": R0=scalar(id=1,smax=0x7fffffff)") +__msg("R{{.}}=scalar(id=1,smax=0x7fffffff)") check_assert(s64, <=, le_pos, INT_MAX); -__msg(": R0=scalar(id=1,smax=0)") +__msg("R{{.}}=scalar(id=1,smax=0)") check_assert(s64, <=, le_zero, 0); -__msg(": R0=scalar(id=1,smax=0xffffffff80000000") +__msg("R{{.}}=scalar(id=1,smax=0xffffffff80000000") check_assert(s64, <=, le_neg, INT_MIN); -__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_pos, INT_MAX); -__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_zero, 0); -__msg(": R0=scalar(id=1,smin=0xffffffff80000001") +__msg("R{{.}}=scalar(id=1,smin=0xffffffff80000001") check_assert(s64, >, gt_neg, INT_MIN); -__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_pos, INT_MAX); -__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_zero, 0); -__msg(": R0=scalar(id=1,smin=0xffffffff80000000") +__msg("R{{.}}=scalar(id=1,smin=0xffffffff80000000") check_assert(s64, >=, ge_neg, INT_MIN); SEC("?tc") diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 3072fee9a448..58c7704d61cd 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -40,6 +40,9 @@ __naked void linked_regs_bpf_k(void) */ "r3 = r10;" "r3 += r0;" + /* Mark r1 and r2 as alive. */ + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -73,6 +76,9 @@ __naked void linked_regs_bpf_x_src(void) */ "r4 = r10;" "r4 += r0;" + /* Mark r1 and r2 as alive. */ + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -106,6 +112,10 @@ __naked void linked_regs_bpf_x_dst(void) */ "r4 = r10;" "r4 += r3;" + /* Mark r1 and r2 as alive. */ + "r0 = r0;" + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -143,6 +153,9 @@ __naked void linked_regs_broken_link(void) */ "r3 = r10;" "r3 += r0;" + /* Mark r1 and r2 as alive. */ + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -156,16 +169,16 @@ __naked void linked_regs_broken_link(void) */ SEC("socket") __success __log_level(2) -__msg("12: (0f) r2 += r1") +__msg("17: (0f) r2 += r1") /* Current state */ -__msg("frame2: last_idx 12 first_idx 11 subseq_idx -1 ") -__msg("frame2: regs=r1 stack= before 11: (bf) r2 = r10") +__msg("frame2: last_idx 17 first_idx 14 subseq_idx -1 ") +__msg("frame2: regs=r1 stack= before 16: (bf) r2 = r10") __msg("frame2: parent state regs=r1 stack=") __msg("frame1: parent state regs= stack=") __msg("frame0: parent state regs= stack=") /* Parent state */ -__msg("frame2: last_idx 10 first_idx 10 subseq_idx 11 ") -__msg("frame2: regs=r1 stack= before 10: (25) if r1 > 0x7 goto pc+0") +__msg("frame2: last_idx 13 first_idx 13 subseq_idx 14 ") +__msg("frame2: regs=r1 stack= before 13: (25) if r1 > 0x7 goto pc+0") __msg("frame2: parent state regs=r1 stack=") /* frame1.r{6,7} are marked because mark_precise_scalar_ids() * looks for all registers with frame2.r1.id in the current state @@ -173,20 +186,20 @@ __msg("frame2: parent state regs=r1 stack=") __msg("frame1: parent state regs=r6,r7 stack=") __msg("frame0: parent state regs=r6 stack=") /* Parent state */ -__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10") -__msg("frame2: regs=r1 stack= before 8: (85) call pc+1") +__msg("frame2: last_idx 9 first_idx 9 subseq_idx 13") +__msg("frame2: regs=r1 stack= before 9: (85) call pc+3") /* frame1.r1 is marked because of backtracking of call instruction */ __msg("frame1: parent state regs=r1,r6,r7 stack=") __msg("frame0: parent state regs=r6 stack=") /* Parent state */ -__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8") -__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1") -__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1") +__msg("frame1: last_idx 8 first_idx 7 subseq_idx 9") +__msg("frame1: regs=r1,r6,r7 stack= before 8: (bf) r7 = r1") +__msg("frame1: regs=r1,r6 stack= before 7: (bf) r6 = r1") __msg("frame1: parent state regs=r1 stack=") __msg("frame0: parent state regs=r6 stack=") /* Parent state */ -__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6") -__msg("frame1: regs=r1 stack= before 4: (85) call pc+1") +__msg("frame1: last_idx 4 first_idx 4 subseq_idx 7") +__msg("frame1: regs=r1 stack= before 4: (85) call pc+2") __msg("frame0: parent state regs=r1,r6 stack=") /* Parent state */ __msg("frame0: last_idx 3 first_idx 1 subseq_idx 4") @@ -204,6 +217,7 @@ __naked void precision_many_frames(void) "r1 = r0;" "r6 = r0;" "call precision_many_frames__foo;" + "r6 = r6;" /* mark r6 as live */ "exit;" : : __imm(bpf_ktime_get_ns) @@ -220,6 +234,8 @@ void precision_many_frames__foo(void) "r6 = r1;" "r7 = r1;" "call precision_many_frames__bar;" + "r6 = r6;" /* mark r6 as live */ + "r7 = r7;" /* mark r7 as live */ "exit" ::: __clobber_all); } @@ -229,6 +245,8 @@ void precision_many_frames__bar(void) { asm volatile ( "if r1 > 7 goto +0;" + "r6 = 0;" /* mark r6 as live */ + "r7 = 0;" /* mark r7 as live */ /* force r1 to be precise, this eventually marks: * - bar frame r1 * - foo frame r{1,6,7} @@ -340,6 +358,8 @@ __naked void precision_two_ids(void) "r3 += r7;" /* force r9 to be precise, this also marks r8 */ "r3 += r9;" + "r6 = r6;" /* mark r6 as live */ + "r8 = r8;" /* mark r8 as live */ "exit;" : : __imm(bpf_ktime_get_ns) @@ -353,7 +373,7 @@ __flag(BPF_F_TEST_STATE_FREQ) * collect_linked_regs() can't tie more than 6 registers for a single insn. */ __msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1") -__msg("9: (bf) r6 = r6 ; R6=scalar(id=2") +__msg("14: (bf) r6 = r6 ; R6=scalar(id=2") /* check that r{0-5} are marked precise after 'if' */ __msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0") __msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:") @@ -372,6 +392,12 @@ __naked void linked_regs_too_many_regs(void) "r6 = r0;" /* propagate range for r{0-6} */ "if r0 > 7 goto +0;" + /* keep r{1-5} live */ + "r1 = r1;" + "r2 = r2;" + "r3 = r3;" + "r4 = r4;" + "r5 = r5;" /* make r6 appear in the log */ "r6 = r6;" /* force r0 to be precise, @@ -517,7 +543,7 @@ __naked void check_ids_in_regsafe_2(void) "*(u64*)(r10 - 8) = r1;" /* r9 = pointer to stack */ "r9 = r10;" - "r9 += -8;" + "r9 += -16;" /* r8 = ktime_get_ns() */ "call %[bpf_ktime_get_ns];" "r8 = r0;" @@ -538,6 +564,8 @@ __naked void check_ids_in_regsafe_2(void) "if r7 > 4 goto l2_%=;" /* Access memory at r9[r6] */ "r9 += r6;" + "r9 += r7;" + "r9 += r8;" "r0 = *(u8*)(r9 + 0);" "l2_%=:" "r0 = 0;" diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 061d98f6e9bb..a9242103dc47 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -44,9 +44,9 @@ mark_precise: frame0: regs=r2 stack= before 23\ mark_precise: frame0: regs=r2 stack= before 22\ mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2,r9 stack=:\ + mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ - mark_precise: frame0: regs=r2,r9 stack= before 19\ + mark_precise: frame0: regs=r2 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ mark_precise: frame0: regs=r8,r9 stack= before 17\ mark_precise: frame0: regs=r0,r9 stack= before 15\ @@ -107,9 +107,9 @@ mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2,r9 stack=:\ + mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 19 first_idx 17\ - mark_precise: frame0: regs=r2,r9 stack= before 19\ + mark_precise: frame0: regs=r2 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ mark_precise: frame0: regs=r8,r9 stack= before 17\ mark_precise: frame0: parent state regs= stack=:", From 223ffb6a3d0582522455e83ccf7ad2d3a753e039 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:02:48 -0800 Subject: [PATCH 826/828] selftests/bpf: add reproducer for spurious precision propagation through calls Add a test for the scenario described in the previous commit: an iterator loop with two paths where one ties r2/r7 via shared scalar id and skips a call, while the other goes through the call. Precision marks from the linked registers get spuriously propagated to the call path via propagate_precision(), hitting "backtracking call unexpected regs" in backtrack_insn(). Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-linked-regs-and-propagate-precision-v1-2-18e859be570d@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/verifier_linked_scalars.c | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c index 2ef346c827c2..7bf7dbfd237d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c +++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c @@ -363,4 +363,68 @@ void alu32_negative_offset(void) __sink(path[0]); } +void dummy_calls(void) +{ + bpf_iter_num_new(0, 0, 0); + bpf_iter_num_next(0); + bpf_iter_num_destroy(0); +} + +SEC("socket") +__success +__flag(BPF_F_TEST_STATE_FREQ) +int spurious_precision_marks(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile( + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 4f;" + "r7 = *(u32 *)(r0 + 0);" + "r8 = *(u32 *)(r0 + 0);" + /* This jump can't be predicted and does not change r7 or r8 state. */ + "if r7 > r8 goto 2f;" + /* Branch explored first ties r2 and r7 as having the same id. */ + "r2 = r7;" + "goto 3f;" + "2:" + /* Branch explored second does not tie r2 and r7 but has a function call. */ + "call %[bpf_get_prandom_u32];" + "3:" + /* + * A checkpoint. + * When first branch is explored, this would inject linked registers + * r2 and r7 into the jump history. + * When second branch is explored, this would be a cache hit point, + * triggering propagate_precision(). + */ + "if r7 <= 42 goto +0;" + /* + * Mark r7 as precise using an if condition that is always true. + * When reached via the second branch, this triggered a bug in the backtrack_insn() + * because r2 (tied to r7) was propagated as precise to a call. + */ + "if r7 <= 0xffffFFFF goto +0;" + "goto 1b;" + "4:" + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_common, "r7", "r8" + ); + + return 0; +} + char _license[] SEC("license") = "GPL"; From b0dcdcb9ae757c8a8ba2fb24d34f8d147bae707b Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Wed, 4 Mar 2026 17:47:30 -0800 Subject: [PATCH 827/828] resolve_btfids: Fix linker flags detection The "|| echo -lzstd" default makes zstd an unconditional link dependency of resolve_btfids. On systems where libzstd-dev is not installed and pkg-config fails, the linker fails: ld: cannot find -lzstd: No such file or directory libzstd is a transitive dependency of libelf, so the -lzstd flag is strictly necessary only for static builds [1]. Remove ZSTD_LIBS variable, and instead set LIBELF_LIBS depending on whether the build is static or not. Use $(HOSTPKG_CONFIG) as primary source of the flags list. Also add a default value for HOSTPKG_CONFIG in case it's not built via the toplevel Makefile. Pass it from selftests/bpf too. [1] https://lore.kernel.org/bpf/4ff82800-2daa-4b9f-95a9-6f512859ee70@linux.dev/ Reported-by: BPF CI Bot (Claude Opus 4.6) Reported-by: Vitaly Chikunov Closes: https://lore.kernel.org/bpf/aaWqMcK-2AQw5dx8@altlinux.org/ Fixes: 4021848a903e ("selftests/bpf: Pass through build flags to bpftool and resolve_btfids") Signed-off-by: Ihor Solodrai Reviewed-by: Paul Chaignon Link: https://lore.kernel.org/r/20260305014730.3123382-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/bpf/resolve_btfids/Makefile | 9 +++++++-- tools/testing/selftests/bpf/Makefile | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index ef083602b73a..7672208f65e4 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -23,6 +23,7 @@ RM ?= rm HOSTCC ?= gcc HOSTLD ?= ld HOSTAR ?= ar +HOSTPKG_CONFIG ?= pkg-config CROSS_COMPILE = OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -63,10 +64,14 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU $(abspath $@) install_headers LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) + +ifneq ($(filter -static,$(EXTRA_LDFLAGS)),) +LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs --static 2>/dev/null || echo -lelf -lzstd) +else LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) +endif ZLIB_LIBS := $(shell $(HOSTPKG_CONFIG) zlib --libs 2>/dev/null || echo -lz) -ZSTD_LIBS := $(shell $(HOSTPKG_CONFIG) libzstd --libs 2>/dev/null || echo -lzstd) HOSTCFLAGS_resolve_btfids += -g \ -I$(srctree)/tools/include \ @@ -76,7 +81,7 @@ HOSTCFLAGS_resolve_btfids += -g \ $(LIBELF_FLAGS) \ -Wall -Werror -LIBS = $(LIBELF_LIBS) $(ZLIB_LIBS) $(ZSTD_LIBS) +LIBS = $(LIBELF_LIBS) $(ZLIB_LIBS) export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 72a9ba41f95e..d5acbeba0383 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -409,6 +409,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ + HOSTPKG_CONFIG=$(PKG_CONFIG) \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by From 1954c4f012206147c34acda8da04f827aa7d3ee3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Mar 2026 20:07:15 +0000 Subject: [PATCH 828/828] eventpoll: Convert epoll_put_uevent() to scoped user access Saves two function calls, and one stac/clac pair. stac/clac is rather expensive on older cpus like Zen 2. A synthetic network stress test gives a ~1.5% increase of pps on AMD Zen 2. Signed-off-by: Eric Dumazet Cc: Christophe Leroy Cc: Dave Hansen Cc: Kuniyuki Iwashima Signed-off-by: Linus Torvalds --- include/linux/eventpoll.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index ccb478eb174b..ea9ca0e4172a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -82,11 +82,14 @@ static inline struct epoll_event __user * epoll_put_uevent(__poll_t revents, __u64 data, struct epoll_event __user *uevent) { - if (__put_user(revents, &uevent->events) || - __put_user(data, &uevent->data)) - return NULL; - + scoped_user_write_access_size(uevent, sizeof(*uevent), efault) { + unsafe_put_user(revents, &uevent->events, efault); + unsafe_put_user(data, &uevent->data, efault); + } return uevent+1; + +efault: + return NULL; } #endif