From 0319227099dfa06157b7cd669072b6e899d1bba8 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Tue, 25 Nov 2025 13:19:56 -0500
Subject: [PATCH 001/107] oid_registry: allow arbitrary size OIDs

The current OID registry parser uses 64 bit arithmetic which limits us to
supporting 64 bit or smaller OIDs.  This isn't usually a problem except
that it prevents us from representing the 2.25.  prefix OIDs which are the
OID representation of UUIDs and have a 128 bit number following the
prefix.  Rather than import not often used perl arithmetic modules,
replace the current perl 64 bit arithmetic with a callout to bc, which is
arbitrary precision, for decimal to base 2 conversion, then do pure string
operations on the base 2 number.

[James.Bottomley@HansenPartnership.com: tidy up perl with better my placement also set bc to arbitrary size]
  Link: https://lkml.kernel.org/r/dbc90c344c691ed988640a28367ff895b5ef2604.camel@HansenPartnership.com
Link: https://lkml.kernel.org/r/833c858cd74533203b43180208734b84f1137af0.camel@HansenPartnership.com
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/build_OID_registry | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/lib/build_OID_registry b/lib/build_OID_registry
index 8267e8d71338..30493ac190c0 100755
--- a/lib/build_OID_registry
+++ b/lib/build_OID_registry
@@ -60,10 +60,12 @@ for (my $i = 0; $i <= $#names; $i++) {
     # Determine the encoded length of this OID
     my $size = $#components;
     for (my $loop = 2; $loop <= $#components; $loop++) {
-	my $c = $components[$loop];
+	$ENV{'BC_LINE_LENGTH'} = "0";
+	my $c = `echo "ibase=10; obase=2; $components[$loop]" | bc`;
+	chomp($c);
 
 	# We will base128 encode the number
-	my $tmp = ($c == 0) ? 0 : int(log($c)/log(2));
+	my $tmp = length($c) - 1;
 	$tmp = int($tmp / 7);
 	$size += $tmp;
     }
@@ -100,16 +102,24 @@ for (my $i = 0; $i <= $#names; $i++) {
     push @octets, $components[0] * 40 + $components[1];
 
     for (my $loop = 2; $loop <= $#components; $loop++) {
-	my $c = $components[$loop];
+	# get the base 2 representation of the component
+	$ENV{'BC_LINE_LENGTH'} = "0";
+	my $c = `echo "ibase=10; obase=2; $components[$loop]" | bc`;
+	chomp($c);
 
-	# Base128 encode the number
-	my $tmp = ($c == 0) ? 0 : int(log($c)/log(2));
+	my $tmp = length($c) - 1;
 	$tmp = int($tmp / 7);
 
-	for (; $tmp > 0; $tmp--) {
-	    push @octets, (($c >> $tmp * 7) & 0x7f) | 0x80;
+	# zero pad upto length multiple of 7
+	$c = substr("0000000", 0, ($tmp + 1) * 7 - length($c)).$c;
+
+	# Base128 encode the number
+	for (my $j = 0; $j < $tmp; $j++) {
+	    my $b = oct("0b".substr($c, $j * 7, 7));
+
+	    push @octets, $b | 0x80;
 	}
-	push @octets, $c & 0x7f;
+	push @octets, oct("0b".substr($c, $tmp * 7, 7));
     }
 
     push @encoded_oids, \@octets;

From b11052be3ea7c1dfc81804b203bc4369edafd040 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 14 Dec 2025 18:57:30 +0100
Subject: [PATCH 002/107] crash_dump: constify struct configfs_item_operations
 and configfs_group_operations

'struct configfs_item_operations' and 'configfs_group_operations' are not
modified in this driver.

Constifying these structures moves some data to a read-only section, so
increases overall security, especially when the structure holds some
function pointers.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  16339	  11001	    384	  27724	   6c4c	kernel/crash_dump_dm_crypt.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  16499	  10841	    384	  27724	   6c4c	kernel/crash_dump_dm_crypt.o

Link: https://lkml.kernel.org/r/d046ee5666d2f6b1a48ca1a222dfbd2f7c44462f.1765735035.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Coiby Xu <coxu@redhat.com>
Tested-by: Coiby Xu <coxu@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/crash_dump_dm_crypt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c
index 401423ba477d..0d23dc1de67c 100644
--- a/kernel/crash_dump_dm_crypt.c
+++ b/kernel/crash_dump_dm_crypt.c
@@ -223,7 +223,7 @@ static void config_key_release(struct config_item *item)
 	key_count--;
 }
 
-static struct configfs_item_operations config_key_item_ops = {
+static const struct configfs_item_operations config_key_item_ops = {
 	.release = config_key_release,
 };
 
@@ -298,7 +298,7 @@ static struct configfs_attribute *config_keys_attrs[] = {
  * Note that, since no extra work is required on ->drop_item(),
  * no ->drop_item() is provided.
  */
-static struct configfs_group_operations config_keys_group_ops = {
+static const struct configfs_group_operations config_keys_group_ops = {
 	.make_item = config_keys_make_item,
 };
 

From 4a54331616b309934d46e255a9f1fdd890e77ebe Mon Sep 17 00:00:00 2001
From: Heming Zhao <heming.zhao@suse.com>
Date: Fri, 12 Dec 2025 15:45:03 +0800
Subject: [PATCH 003/107] ocfs2: give ocfs2 the ability to reclaim suballocator
 free bg

Patch series "ocfs2: give ocfs2 the ability to reclaim suballocator free
bg", v6.


This patch (of 2):

The current ocfs2 code can't reclaim suballocator block group space.  In
some cases, this causes ocfs2 to hold onto a lot of space.  For example,
when creating lots of small files, the space is held/managed by the
'//inode_alloc'.  After the user deletes all the small files, the space
never returns to the '//global_bitmap'.  This issue prevents ocfs2 from
providing the needed space even when there is enough free space in a small
ocfs2 volume.

This patch gives ocfs2 the ability to reclaim suballocator free space when
the block group is freed.  For performance reasons, this patch keeps the
first suballocator block group active.

Link: https://lkml.kernel.org/r/20251212074505.25962-2-heming.zhao@suse.com
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Reviewed-by: Su Yue <glass.su@suse.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mark@fasheh.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/suballoc.c | 308 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 299 insertions(+), 9 deletions(-)

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8e6e5235b30c..aac2f96bee43 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -295,6 +295,74 @@ static int ocfs2_validate_group_descriptor(struct super_block *sb,
 	return ocfs2_validate_gd_self(sb, bh, 0);
 }
 
+/*
+ * The hint group descriptor (gd) may already have been released
+ * in _ocfs2_free_suballoc_bits(). We first check the gd signature,
+ * then perform the standard ocfs2_read_group_descriptor() jobs.
+ *
+ * If the gd signature is invalid, we return 'rc=0' and set
+ * '*released=1'. The caller is expected to handle this specific case.
+ * Otherwise, we return the actual error code.
+ *
+ * We treat gd signature corruption case as a release case. The
+ * caller ocfs2_claim_suballoc_bits() will use ocfs2_search_chain()
+ * to search each gd block. The code will eventually find this
+ * corrupted gd block - Late, but not missed.
+ *
+ * Note:
+ * The caller is responsible for initializing the '*released' status.
+ */
+static int ocfs2_read_hint_group_descriptor(struct inode *inode,
+			struct ocfs2_dinode *di, u64 gd_blkno,
+			struct buffer_head **bh, int *released)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+	struct ocfs2_group_desc *gd;
+
+	rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp, NULL);
+	if (rc)
+		goto out;
+
+	gd = (struct ocfs2_group_desc *) tmp->b_data;
+	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+		/*
+		 * Invalid gd cache was set in ocfs2_read_block(),
+		 * which will affect block_group allocation.
+		 * Path:
+		 * ocfs2_reserve_suballoc_bits
+		 *  ocfs2_block_group_alloc
+		 *   ocfs2_block_group_alloc_contig
+		 *    ocfs2_set_new_buffer_uptodate
+		 */
+		ocfs2_remove_from_cache(INODE_CACHE(inode), tmp);
+		*released = 1; /* we return 'rc=0' for this case */
+		goto free_bh;
+	}
+
+	/* below jobs same with ocfs2_read_group_descriptor() */
+	if (!buffer_jbd(tmp)) {
+		rc = ocfs2_validate_group_descriptor(inode->i_sb, tmp);
+		if (rc)
+			goto free_bh;
+	}
+
+	rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
+	if (rc)
+		goto free_bh;
+
+	/* If ocfs2_read_block() got us a new bh, pass it up. */
+	if (!*bh)
+		*bh = tmp;
+
+	return rc;
+
+free_bh:
+	brelse(tmp);
+out:
+	return rc;
+}
+
 int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
 				u64 gd_blkno, struct buffer_head **bh)
 {
@@ -1725,7 +1793,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
 				  u32 bits_wanted,
 				  u32 min_bits,
 				  struct ocfs2_suballoc_result *res,
-				  u16 *bits_left)
+				  u16 *bits_left, int *released)
 {
 	int ret;
 	struct buffer_head *group_bh = NULL;
@@ -1733,9 +1801,11 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
 	struct inode *alloc_inode = ac->ac_inode;
 
-	ret = ocfs2_read_group_descriptor(alloc_inode, di,
-					  res->sr_bg_blkno, &group_bh);
-	if (ret < 0) {
+	ret = ocfs2_read_hint_group_descriptor(alloc_inode, di,
+				res->sr_bg_blkno, &group_bh, released);
+	if (*released) {
+		return 0;
+	} else if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
@@ -1950,6 +2020,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 				     struct ocfs2_suballoc_result *res)
 {
 	int status;
+	int released = 0;
 	u16 victim, i;
 	u16 bits_left = 0;
 	u64 hint = ac->ac_last_group;
@@ -1976,6 +2047,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
+	/* the hint bg may already be released, we quiet search this group. */
 	res->sr_bg_blkno = hint;
 	if (res->sr_bg_blkno) {
 		/* Attempt to short-circuit the usual search mechanism
@@ -1983,7 +2055,12 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 		 * allocation group. This helps us maintain some
 		 * contiguousness across allocations. */
 		status = ocfs2_search_one_group(ac, handle, bits_wanted,
-						min_bits, res, &bits_left);
+						min_bits, res, &bits_left,
+						&released);
+		if (released) {
+			res->sr_bg_blkno = 0;
+			goto chain_search;
+		}
 		if (!status)
 			goto set_hint;
 		if (status < 0 && status != -ENOSPC) {
@@ -1991,7 +2068,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 			goto bail;
 		}
 	}
-
+chain_search:
 	cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
 	if (!le16_to_cpu(cl->cl_next_free_rec) ||
 	    le16_to_cpu(cl->cl_next_free_rec) > le16_to_cpu(cl->cl_count)) {
@@ -2113,6 +2190,12 @@ bail:
 	return status;
 }
 
+/*
+ * after ocfs2 has the ability to release block group unused space,
+ * the ->ip_last_used_group may be invalid. so this function returns
+ * ac->ac_last_group need to verify.
+ * refer the 'hint' in ocfs2_claim_suballoc_bits() for more details.
+ */
 static void ocfs2_init_inode_ac_group(struct inode *dir,
 				      struct buffer_head *parent_di_bh,
 				      struct ocfs2_alloc_context *ac)
@@ -2551,6 +2634,198 @@ bail:
 	return status;
 }
 
+/*
+ * Reclaim the suballocator managed space to main bitmap.
+ * This function first works on the suballocator to perform the
+ * cleanup rec/alloc_inode job, then switches to the main bitmap
+ * to reclaim released space.
+ *
+ * handle: The transaction handle
+ * alloc_inode: The suballoc inode
+ * alloc_bh: The buffer_head of suballoc inode
+ * group_bh: The group descriptor buffer_head of suballocator managed.
+ *           Caller should release the input group_bh.
+ */
+static int _ocfs2_reclaim_suballoc_to_main(handle_t *handle,
+			struct inode *alloc_inode,
+			struct buffer_head *alloc_bh,
+			struct buffer_head *group_bh)
+{
+	int idx, status = 0;
+	int i, next_free_rec, len = 0;
+	__le16 old_bg_contig_free_bits = 0;
+	u16 start_bit;
+	u32 tmp_used;
+	u64 bg_blkno, start_blk;
+	unsigned int count;
+	struct ocfs2_chain_rec *rec;
+	struct buffer_head *main_bm_bh = NULL;
+	struct inode *main_bm_inode = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
+	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
+	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
+	struct ocfs2_group_desc *group = (struct ocfs2_group_desc *) group_bh->b_data;
+
+	idx = le16_to_cpu(group->bg_chain);
+	rec = &(cl->cl_recs[idx]);
+
+	status = ocfs2_extend_trans(handle,
+				ocfs2_calc_group_alloc_credits(osb->sb,
+						 le16_to_cpu(cl->cl_cpg)));
+	if (status) {
+		mlog_errno(status);
+		goto bail;
+	}
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
+					 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	/*
+	 * Only clear the suballocator rec item in-place.
+	 *
+	 * If idx is not the last, we don't compress (remove the empty item)
+	 * the cl_recs[]. If not, we need to do lots jobs.
+	 *
+	 * Compress cl_recs[] code example:
+	 * if (idx != cl->cl_next_free_rec - 1)
+	 *     memmove(&cl->cl_recs[idx], &cl->cl_recs[idx + 1],
+	 *         sizeof(struct ocfs2_chain_rec) *
+	 *         (cl->cl_next_free_rec - idx - 1));
+	 * for(i = idx; i < cl->cl_next_free_rec-1; i++) {
+	 *     group->bg_chain = "later group->bg_chain";
+	 *     group->bg_blkno = xxx;
+	 *     ... ...
+	 * }
+	 */
+
+	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_total);
+	fe->id1.bitmap1.i_total = cpu_to_le32(tmp_used - le32_to_cpu(rec->c_total));
+
+	/* Substraction 1 for the block group itself */
+	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
+	fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - 1);
+
+	tmp_used = le32_to_cpu(fe->i_clusters);
+	fe->i_clusters = cpu_to_le32(tmp_used - le16_to_cpu(cl->cl_cpg));
+
+	spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
+	OCFS2_I(alloc_inode)->ip_clusters -= le32_to_cpu(fe->i_clusters);
+	fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
+					     le32_to_cpu(fe->i_clusters)));
+	spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
+	i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
+	alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
+
+	ocfs2_journal_dirty(handle, alloc_bh);
+	ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0);
+
+	start_blk = le64_to_cpu(rec->c_blkno);
+	count = le32_to_cpu(rec->c_total) / le16_to_cpu(cl->cl_bpc);
+
+	/*
+	 * If the rec is the last one, let's compress the chain list by
+	 * removing the empty cl_recs[] at the end.
+	 */
+	next_free_rec = le16_to_cpu(cl->cl_next_free_rec);
+	if (idx == (next_free_rec - 1)) {
+		len++; /* the last item should be counted first */
+		for (i = (next_free_rec - 2); i > 0; i--) {
+			if (cl->cl_recs[i].c_free == cl->cl_recs[i].c_total)
+				len++;
+			else
+				break;
+		}
+	}
+	le16_add_cpu(&cl->cl_next_free_rec, -len);
+
+	rec->c_free = 0;
+	rec->c_total = 0;
+	rec->c_blkno = 0;
+	ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), group_bh);
+	memset(group, 0, sizeof(struct ocfs2_group_desc));
+
+	/* prepare job for reclaim clusters */
+	main_bm_inode = ocfs2_get_system_file_inode(osb,
+						    GLOBAL_BITMAP_SYSTEM_INODE,
+						    OCFS2_INVALID_SLOT);
+	if (!main_bm_inode)
+		goto bail; /* ignore the error in reclaim path */
+
+	inode_lock(main_bm_inode);
+
+	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
+	if (status < 0)
+		goto free_bm_inode; /* ignore the error in reclaim path */
+
+	ocfs2_block_to_cluster_group(main_bm_inode, start_blk, &bg_blkno,
+				     &start_bit);
+	fe = (struct ocfs2_dinode *) main_bm_bh->b_data;
+	cl = &fe->id2.i_chain;
+	/* reuse group_bh, caller will release the input group_bh */
+	group_bh = NULL;
+
+	/* reclaim clusters to global_bitmap */
+	status = ocfs2_read_group_descriptor(main_bm_inode, fe, bg_blkno,
+					     &group_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto free_bm_bh;
+	}
+	group = (struct ocfs2_group_desc *) group_bh->b_data;
+
+	if ((count + start_bit) > le16_to_cpu(group->bg_bits)) {
+		ocfs2_error(alloc_inode->i_sb,
+			"reclaim length (%d) beyands block group length (%d)",
+			count + start_bit, le16_to_cpu(group->bg_bits));
+		goto free_group_bh;
+	}
+
+	old_bg_contig_free_bits = group->bg_contig_free_bits;
+	status = ocfs2_block_group_clear_bits(handle, main_bm_inode,
+					      group, group_bh,
+					      start_bit, count, 0,
+					      _ocfs2_clear_bit);
+	if (status < 0) {
+		mlog_errno(status);
+		goto free_group_bh;
+	}
+
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
+					 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		ocfs2_block_group_set_bits(handle, main_bm_inode, group, group_bh,
+				start_bit, count,
+				le16_to_cpu(old_bg_contig_free_bits), 1);
+		goto free_group_bh;
+	}
+
+	idx = le16_to_cpu(group->bg_chain);
+	rec = &(cl->cl_recs[idx]);
+
+	le32_add_cpu(&rec->c_free, count);
+	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
+	fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
+	ocfs2_journal_dirty(handle, main_bm_bh);
+
+free_group_bh:
+	brelse(group_bh);
+
+free_bm_bh:
+	ocfs2_inode_unlock(main_bm_inode, 1);
+	brelse(main_bm_bh);
+
+free_bm_inode:
+	inode_unlock(main_bm_inode);
+	iput(main_bm_inode);
+
+bail:
+	return status;
+}
+
 /*
  * expects the suballoc inode to already be locked.
  */
@@ -2563,12 +2838,13 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
 				     void (*undo_fn)(unsigned int bit,
 						     unsigned long *bitmap))
 {
-	int status = 0;
+	int idx, status = 0;
 	u32 tmp_used;
 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
 	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
 	struct buffer_head *group_bh = NULL;
 	struct ocfs2_group_desc *group;
+	struct ocfs2_chain_rec *rec;
 	__le16 old_bg_contig_free_bits = 0;
 
 	/* The alloc_bh comes from ocfs2_free_dinode() or
@@ -2614,12 +2890,26 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
 		goto bail;
 	}
 
-	le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
-		     count);
+	idx = le16_to_cpu(group->bg_chain);
+	rec = &(cl->cl_recs[idx]);
+
+	le32_add_cpu(&rec->c_free, count);
 	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
 	fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
 	ocfs2_journal_dirty(handle, alloc_bh);
 
+	/*
+	 * Reclaim suballocator free space.
+	 * Bypass: global_bitmap, non empty rec, first rec in cl_recs[]
+	 */
+	if (ocfs2_is_cluster_bitmap(alloc_inode) ||
+	    (le32_to_cpu(rec->c_free) != (le32_to_cpu(rec->c_total) - 1)) ||
+	    (le16_to_cpu(cl->cl_next_free_rec) == 1)) {
+		goto bail;
+	}
+
+	_ocfs2_reclaim_suballoc_to_main(handle, alloc_inode, alloc_bh, group_bh);
+
 bail:
 	brelse(group_bh);
 	return status;

From fd4d53bde9128bc85d85cc3427bd592e4c3293e4 Mon Sep 17 00:00:00 2001
From: Heming Zhao <heming.zhao@suse.com>
Date: Fri, 12 Dec 2025 15:45:04 +0800
Subject: [PATCH 004/107] ocfs2: detect released suballocator BG for
 fh_to_[dentry|parent]

After ocfs2 gained the ability to reclaim suballocator free block group
(BGs), a suballocator block group may be released.  This change causes the
xfstest case generic/426 to fail.

generic/426 expects return value -ENOENT or -ESTALE, but the current code
triggers -EROFS.

Call stack before ocfs2 gained the ability to reclaim bg:

ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
 ocfs2_get_dentry
  + ocfs2_test_inode_bit
  |  ocfs2_test_suballoc_bit
  |   + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
  |   |                             //the bg block was always found.
  |   + *res = ocfs2_test_bit //unlink was called, and the bit is zero
  |
  + if (!set) //because the above *res is 0
     status = -ESTALE //the generic/426 expected return value

Current call stack that triggers -EROFS:

ocfs2_get_dentry
 ocfs2_test_inode_bit
  ocfs2_test_suballoc_bit
   ocfs2_read_group_descriptor
    + if reading a released bg, validation fails and triggers -EROFS

How to fix:
Since the read BG is already released, we must avoid triggering -EROFS.
With this commit, we use ocfs2_read_hint_group_descriptor() to detect the
released BG block.  This approach quietly handles this type of error and
returns -EINVAL, which triggers the caller's existing conversion path to
-ESTALE.

[dan.carpenter@linaro.org: fix uninitialized variable]
  Link: https://lkml.kernel.org/r/dc37519fd2470909f8c65e26c5131b8b6dde2a5c.1766043917.git.dan.carpenter@linaro.org
Link: https://lkml.kernel.org/r/20251212074505.25962-3-heming.zhao@suse.com
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Su Yue <glass.su@suse.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/export.c   |  6 ++++--
 fs/ocfs2/suballoc.c | 26 +++++++++++++++++---------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index b95724b767e1..9c2665dd24e2 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -74,8 +74,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
 			 * nice
 			 */
 			status = -ESTALE;
-		} else
+		} else if (status != -ESTALE) {
 			mlog(ML_ERROR, "test inode bit failed %d\n", status);
+		}
 		goto unlock_nfs_sync;
 	}
 
@@ -162,8 +163,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	if (status < 0) {
 		if (status == -EINVAL) {
 			status = -ESTALE;
-		} else
+		} else if (status != -ESTALE) {
 			mlog(ML_ERROR, "test inode bit failed %d\n", status);
+		}
 		parent = ERR_PTR(status);
 		goto bail_unlock;
 	}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index aac2f96bee43..79d1325b2111 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -3163,7 +3163,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
 	struct ocfs2_group_desc *group;
 	struct buffer_head *group_bh = NULL;
 	u64 bg_blkno;
-	int status;
+	int status, quiet = 0, released = 0;
 
 	trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
 				      (unsigned int)bit);
@@ -3179,9 +3179,13 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
 
 	bg_blkno = group_blkno ? group_blkno :
 		   ocfs2_which_suballoc_group(blkno, bit);
-	status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
-					     &group_bh);
-	if (status < 0) {
+	status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
+					     &group_bh, &released);
+	if (released) {
+		quiet = 1;
+		status = -ESTALE;
+		goto bail;
+	} else if (status < 0) {
 		mlog(ML_ERROR, "read group %llu failed %d\n",
 		     (unsigned long long)bg_blkno, status);
 		goto bail;
@@ -3193,7 +3197,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
 bail:
 	brelse(group_bh);
 
-	if (status)
+	if (status && !quiet)
 		mlog_errno(status);
 	return status;
 }
@@ -3213,7 +3217,7 @@ bail:
  */
 int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
 {
-	int status;
+	int status, quiet = 0;
 	u64 group_blkno = 0;
 	u16 suballoc_bit = 0, suballoc_slot = 0;
 	struct inode *inode_alloc_inode;
@@ -3255,8 +3259,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
 
 	status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
 					 group_blkno, blkno, suballoc_bit, res);
-	if (status < 0)
-		mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
+	if (status < 0) {
+		if (status == -ESTALE)
+			quiet = 1;
+		else
+			mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
+	}
 
 	ocfs2_inode_unlock(inode_alloc_inode, 0);
 	inode_unlock(inode_alloc_inode);
@@ -3264,7 +3272,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
 	iput(inode_alloc_inode);
 	brelse(alloc_bh);
 bail:
-	if (status)
+	if (status && !quiet)
 		mlog_errno(status);
 	return status;
 }

From 9677a51abd860aa75bd4fe28176672744bf5180e Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 14 Dec 2025 11:41:35 +0100
Subject: [PATCH 005/107] ocfs2: constify struct configfs_item_operations and
 configfs_group_operations

'struct configfs_item_operations' and 'configfs_group_operations' are not
modified in this driver.

Constifying these structures moves some data to a read-only section, so
increases overall security, especially when the structure holds some
function pointers.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  74011	  19312	   5280	  98603	  1812b	fs/ocfs2/cluster/heartbeat.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  74171	  19152	   5280	  98603	  1812b	fs/ocfs2/cluster/heartbeat.o

Link: https://lkml.kernel.org/r/7c7c00ba328e5e514d8debee698154039e9640dd.1765708880.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/cluster/heartbeat.c   | 4 ++--
 fs/ocfs2/cluster/nodemanager.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 724350925aff..8e9cbc334cf4 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1942,7 +1942,7 @@ static struct configfs_attribute *o2hb_region_attrs[] = {
 	NULL,
 };
 
-static struct configfs_item_operations o2hb_region_item_ops = {
+static const struct configfs_item_operations o2hb_region_item_ops = {
 	.release		= o2hb_region_release,
 };
 
@@ -2193,7 +2193,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
 	NULL,
 };
 
-static struct configfs_group_operations o2hb_heartbeat_group_group_ops = {
+static const struct configfs_group_operations o2hb_heartbeat_group_group_ops = {
 	.make_item	= o2hb_heartbeat_group_make_item,
 	.drop_item	= o2hb_heartbeat_group_drop_item,
 };
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 6bc4e064ace4..c5e83c774d73 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -396,7 +396,7 @@ static struct configfs_attribute *o2nm_node_attrs[] = {
 	NULL,
 };
 
-static struct configfs_item_operations o2nm_node_item_ops = {
+static const struct configfs_item_operations o2nm_node_item_ops = {
 	.release		= o2nm_node_release,
 };
 
@@ -638,7 +638,7 @@ static void o2nm_node_group_drop_item(struct config_group *group,
 	config_item_put(item);
 }
 
-static struct configfs_group_operations o2nm_node_group_group_ops = {
+static const struct configfs_group_operations o2nm_node_group_group_ops = {
 	.make_item	= o2nm_node_group_make_item,
 	.drop_item	= o2nm_node_group_drop_item,
 };
@@ -657,7 +657,7 @@ static void o2nm_cluster_release(struct config_item *item)
 	kfree(cluster);
 }
 
-static struct configfs_item_operations o2nm_cluster_item_ops = {
+static const struct configfs_item_operations o2nm_cluster_item_ops = {
 	.release	= o2nm_cluster_release,
 };
 
@@ -741,7 +741,7 @@ static void o2nm_cluster_group_drop_item(struct config_group *group, struct conf
 	config_item_put(item);
 }
 
-static struct configfs_group_operations o2nm_cluster_group_group_ops = {
+static const struct configfs_group_operations o2nm_cluster_group_group_ops = {
 	.make_group	= o2nm_cluster_group_make_group,
 	.drop_item	= o2nm_cluster_group_drop_item,
 };

From 688dab01c3bb14cb559878aaf7019bfba4a79275 Mon Sep 17 00:00:00 2001
From: Deepanshu Kartikey <kartikey406@gmail.com>
Date: Fri, 12 Dec 2025 11:28:26 +0530
Subject: [PATCH 006/107] ocfs2: validate i_refcount_loc when refcount flag is
 set

Add validation in ocfs2_validate_inode_block() to check that if an inode
has OCFS2_HAS_REFCOUNT_FL set, it must also have a valid i_refcount_loc.
A corrupted filesystem image can have this inconsistent state, which later
triggers a BUG_ON in ocfs2_remove_refcount_tree() when the inode is being
wiped during unlink.

Catch this corruption early during inode validation to fail gracefully
instead of crashing the kernel.

Link: https://lkml.kernel.org/r/20251212055826.20929-1-kartikey406@gmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Reported-by: syzbot+6d832e79d3efe1c46743@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6d832e79d3efe1c46743
Tested-by: syzbot+6d832e79d3efe1c46743@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/20251208084407.3021466-1-kartikey406@gmail.com/T/ [v1]
Link: https://lore.kernel.org/all/20251212045646.9988-1-kartikey406@gmail.com/T/ [v2]
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/inode.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index b5fcc2725a29..c95c998811ae 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1529,6 +1529,13 @@ int ocfs2_validate_inode_block(struct super_block *sb,
 		}
 	}
 
+	if ((le16_to_cpu(di->i_dyn_features) & OCFS2_HAS_REFCOUNT_FL) &&
+	    !di->i_refcount_loc) {
+		rc = ocfs2_error(sb, "Inode #%llu has refcount flag but no i_refcount_loc\n",
+				(unsigned long long)bh->b_blocknr);
+		goto bail;
+	}
+
 	rc = 0;
 
 bail:

From 1524af3685b35feac76662cc551cbc37bd14775f Mon Sep 17 00:00:00 2001
From: Deepanshu Kartikey <kartikey406@gmail.com>
Date: Fri, 12 Dec 2025 10:51:32 +0530
Subject: [PATCH 007/107] ocfs2: validate inline data i_size during inode read

When reading an inode from disk, ocfs2_validate_inode_block() performs
various sanity checks but does not validate the size of inline data.  If
the filesystem is corrupted, an inode's i_size can exceed the actual
inline data capacity (id_count).

This causes ocfs2_dir_foreach_blk_id() to iterate beyond the inline data
buffer, triggering a use-after-free when accessing directory entries from
freed memory.

In the syzbot report:
  - i_size was 1099511627576 bytes (~1TB)
  - Actual inline data capacity (id_count) is typically <256 bytes
  - A garbage rec_len (54648) caused ctx->pos to jump out of bounds
  - This triggered a UAF in ocfs2_check_dir_entry()

Fix by adding a validation check in ocfs2_validate_inode_block() to ensure
inodes with inline data have i_size <= id_count.  This catches the
corruption early during inode read and prevents all downstream code from
operating on invalid data.

Link: https://lkml.kernel.org/r/20251212052132.16750-1-kartikey406@gmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Reported-by: syzbot+c897823f699449cc3eb4@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c897823f699449cc3eb4
Tested-by: syzbot+c897823f699449cc3eb4@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/20251211115231.3560028-1-kartikey406@gmail.com/T/ [v1]
Link: https://lore.kernel.org/all/20251212040400.6377-1-kartikey406@gmail.com/T/ [v2]
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/inode.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c95c998811ae..03a51662ea8e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1494,12 +1494,25 @@ int ocfs2_validate_inode_block(struct super_block *sb,
 		goto bail;
 	}
 
-	if ((le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) &&
-	    le32_to_cpu(di->i_clusters)) {
-		rc = ocfs2_error(sb, "Invalid dinode %llu: %u clusters\n",
-				 (unsigned long long)bh->b_blocknr,
-				 le32_to_cpu(di->i_clusters));
-		goto bail;
+	if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) {
+		struct ocfs2_inline_data *data = &di->id2.i_data;
+
+		if (le32_to_cpu(di->i_clusters)) {
+			rc = ocfs2_error(sb,
+					 "Invalid dinode %llu: %u clusters\n",
+					 (unsigned long long)bh->b_blocknr,
+					 le32_to_cpu(di->i_clusters));
+			goto bail;
+		}
+
+		if (le64_to_cpu(di->i_size) > le16_to_cpu(data->id_count)) {
+			rc = ocfs2_error(sb,
+					 "Invalid dinode #%llu: inline data i_size %llu exceeds id_count %u\n",
+					 (unsigned long long)bh->b_blocknr,
+					 (unsigned long long)le64_to_cpu(di->i_size),
+					 le16_to_cpu(data->id_count));
+			goto bail;
+		}
 	}
 
 	if (le32_to_cpu(di->i_flags) & OCFS2_CHAIN_FL) {

From d3cd8de2e17e496e115f36faeccad7d219edd381 Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Thu, 11 Dec 2025 18:59:49 +0300
Subject: [PATCH 008/107] ocfs2: adjust ocfs2_xa_remove_entry() to match UBSAN
 boundary checks

After introducing 2f26f58df041 ("ocfs2: annotate flexible array members
with __counted_by_le()"), syzbot has reported the following issue:

UBSAN: array-index-out-of-bounds in fs/ocfs2/xattr.c:1955:3
index 2 is out of range for type 'struct ocfs2_xattr_entry[]
__counted_by(xh_count)' (aka 'struct ocfs2_xattr_entry[]')
...
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 ubsan_epilogue+0xa/0x40 lib/ubsan.c:233
 __ubsan_handle_out_of_bounds+0xe9/0xf0 lib/ubsan.c:455
 ocfs2_xa_remove_entry+0x36d/0x3e0 fs/ocfs2/xattr.c:1955
 ...

To address this issue, 'xh_entries[]' member removal should be performed
before actually changing 'xh_count', thus making sure that all array
accesses matches the boundary checks performed by UBSAN.

Link: https://lkml.kernel.org/r/20251211155949.774485-1-dmantipov@yandex.ru
Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reported-by: syzbot+cf96bc82a588a27346a8@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=cf96bc82a588a27346a8
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Deepanshu Kartikey <kartikey406@gmail.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/xattr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 1b21fbc16d73..5fd85f517868 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1971,8 +1971,7 @@ static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
 	ocfs2_xa_wipe_namevalue(loc);
 	loc->xl_entry = NULL;
 
-	le16_add_cpu(&xh->xh_count, -1);
-	count = le16_to_cpu(xh->xh_count);
+	count = le16_to_cpu(xh->xh_count) - 1;
 
 	/*
 	 * Only zero out the entry if there are more remaining.  This is
@@ -1987,6 +1986,8 @@ static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
 		memset(&xh->xh_entries[count], 0,
 		       sizeof(struct ocfs2_xattr_entry));
 	}
+
+	xh->xh_count = cpu_to_le16(count);
 }
 
 /*

From 4e9f69c062150566de5870536f08a50239724537 Mon Sep 17 00:00:00 2001
From: Prithvi Tambewagh <activprithvi@gmail.com>
Date: Tue, 16 Dec 2025 00:15:57 +0530
Subject: [PATCH 009/107] ocfs2: add validate function for slot map blocks

When the filesystem is being mounted, the kernel panics while the data
regarding slot map allocation to the local node, is being written to the
disk.  This occurs because the value of slot map buffer head block number,
which should have been greater than or equal to `OCFS2_SUPER_BLOCK_BLKNO`
(evaluating to 2) is less than it, indicative of disk metadata corruption.
This triggers BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) in
ocfs2_write_block(), causing the kernel to panic.

This is fixed by introducing function ocfs2_validate_slot_map_block() to
validate slot map blocks.  It first checks if the buffer head passed to it
is up to date and valid, else it panics the kernel at that point itself.
Further, it contains an if condition block, which checks if
`bh->b_blocknr` is lesser than `OCFS2_SUPER_BLOCK_BLKNO`; if yes, then
ocfs2_error is called, which prints the error log, for debugging purposes,
and the return value of ocfs2_error() is returned.  If the if condition is
false, value 0 is returned by ocfs2_validate_slot_map_block().

This function is used as validate function in calls to ocfs2_read_blocks()
in ocfs2_refresh_slot_info() and ocfs2_map_slot_buffers().

Link: https://lkml.kernel.org/r/20251215184600.13147-1-activprithvi@gmail.com
Signed-off-by: Prithvi Tambewagh <activprithvi@gmail.com>
Reported-by: syzbot+c818e5c4559444f88aa0@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c818e5c4559444f88aa0
Tested-by: <syzbot+c818e5c4559444f88aa0@syzkaller.appspotmail.com>
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/slot_map.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index e544c704b583..ea4a68abc25b 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -44,6 +44,9 @@ struct ocfs2_slot_info {
 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 				    unsigned int node_num);
 
+static int ocfs2_validate_slot_map_block(struct super_block *sb,
+					  struct buffer_head *bh);
+
 static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
 				  int slot_num)
 {
@@ -132,7 +135,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 	 * this is not true, the read of -1 (UINT64_MAX) will fail.
 	 */
 	ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
-				si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
+				si->si_bh, OCFS2_BH_IGNORE_CACHE,
+				ocfs2_validate_slot_map_block);
 	if (ret == 0) {
 		spin_lock(&osb->osb_lock);
 		ocfs2_update_slot_info(si);
@@ -332,6 +336,24 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 	return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 }
 
+static int ocfs2_validate_slot_map_block(struct super_block *sb,
+					  struct buffer_head *bh)
+{
+	int rc;
+
+	BUG_ON(!buffer_uptodate(bh));
+
+	if (bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) {
+		rc = ocfs2_error(sb,
+				 "Invalid Slot Map Buffer Head "
+				 "Block Number : %llu, Should be >= %d",
+				 (unsigned long long)bh->b_blocknr,
+				 OCFS2_SUPER_BLOCK_BLKNO);
+		return rc;
+	}
+	return 0;
+}
+
 static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 				  struct ocfs2_slot_info *si)
 {
@@ -383,7 +405,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 
 		bh = NULL;  /* Acquire a fresh bh */
 		status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
-					   1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
+					   1, &bh, OCFS2_BH_IGNORE_CACHE,
+					   ocfs2_validate_slot_map_block);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;

From e0b0f2834c9bec51b1068de5cf5e10c7519143eb Mon Sep 17 00:00:00 2001
From: Edward Adam Davis <eadavis@qq.com>
Date: Fri, 19 Dec 2025 16:31:52 +0800
Subject: [PATCH 010/107] ocfs2: fix oob in __ocfs2_find_path

syzbot constructed a corrupted image, which resulted in el->l_count from
the b-tree extent block being 0.  Since the length of the l_recs array
depends on l_count, reading its member e_blkno triggered the out-of-bounds
access reported by syzbot in [1].

The loop terminates when l_count is 0, similar to when next_free is 0.

[1]
UBSAN: array-index-out-of-bounds in fs/ocfs2/alloc.c:1838:11
index 0 is out of range for type 'struct ocfs2_extent_rec[] __counted_by(l_count)' (aka 'struct ocfs2_extent_rec[]')
Call Trace:
 __ocfs2_find_path+0x606/0xa40 fs/ocfs2/alloc.c:1838
 ocfs2_find_leaf+0xab/0x1c0 fs/ocfs2/alloc.c:1946
 ocfs2_get_clusters_nocache+0x172/0xc60 fs/ocfs2/extent_map.c:418
 ocfs2_get_clusters+0x505/0xa70 fs/ocfs2/extent_map.c:631
 ocfs2_extent_map_get_blocks+0x202/0x6a0 fs/ocfs2/extent_map.c:678
 ocfs2_read_virt_blocks+0x286/0x930 fs/ocfs2/extent_map.c:1001
 ocfs2_read_dir_block fs/ocfs2/dir.c:521 [inline]
 ocfs2_find_entry_el fs/ocfs2/dir.c:728 [inline]
 ocfs2_find_entry+0x3e4/0x2090 fs/ocfs2/dir.c:1120
 ocfs2_find_files_on_disk+0xdf/0x310 fs/ocfs2/dir.c:2023
 ocfs2_lookup_ino_from_name+0x52/0x100 fs/ocfs2/dir.c:2045
 _ocfs2_get_system_file_inode fs/ocfs2/sysfile.c:136 [inline]
 ocfs2_get_system_file_inode+0x326/0x770 fs/ocfs2/sysfile.c:112
 ocfs2_init_global_system_inodes+0x319/0x660 fs/ocfs2/super.c:461
 ocfs2_initialize_super fs/ocfs2/super.c:2196 [inline]
 ocfs2_fill_super+0x4432/0x65b0 fs/ocfs2/super.c:993
 get_tree_bdev_flags+0x40e/0x4d0 fs/super.c:1691
 vfs_get_tree+0x92/0x2a0 fs/super.c:1751
 fc_mount fs/namespace.c:1199 [inline]

Link: https://lkml.kernel.org/r/tencent_4D99464FA28D9225BE0DBA923F5DF6DD8C07@qq.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Reported-by: syzbot+151afab124dfbc5f15e6@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=151afab124dfbc5f15e6
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/alloc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 58bf58b68955..b7db177d17d6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1812,14 +1812,15 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
 			ret = -EROFS;
 			goto out;
 		}
-		if (le16_to_cpu(el->l_next_free_rec) == 0) {
+		if (!el->l_next_free_rec || !el->l_count) {
 			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
-				    "Owner %llu has empty extent list at depth %u\n",
+				    "Owner %llu has empty extent list at depth %u\n"
+				    "(next free=%u count=%u)\n",
 				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
-				    le16_to_cpu(el->l_tree_depth));
+				    le16_to_cpu(el->l_tree_depth),
+				    le16_to_cpu(el->l_next_free_rec), le16_to_cpu(el->l_count));
 			ret = -EROFS;
 			goto out;
-
 		}
 
 		for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) {

From 29300f929eb1f9b3e555b834d05f2e9d73da303f Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Tue, 21 Oct 2025 13:55:18 +0300
Subject: [PATCH 011/107] ocfs2: annotate more flexible array members with
 __counted_by_le()

Annotate flexible array members of 'struct ocfs2_local_alloc' and 'struct
ocfs2_inline_data' with '__counted_by_le()' attribute to improve array
bounds checking when CONFIG_UBSAN_BOUNDS is enabled, and prefer the
convenient 'memset()' over an explicit loop to simplify
'ocfs2_clear_local_alloc()'.

Link: https://lkml.kernel.org/r/20251021105518.119953-1-dmantipov@yandex.ru
Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/localalloc.c | 4 +---
 fs/ocfs2/ocfs2_fs.h   | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index d1aa04a5af1b..56be21c695d6 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -905,13 +905,11 @@ bail:
 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
 {
 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
-	int i;
 
 	alloc->id1.bitmap1.i_total = 0;
 	alloc->id1.bitmap1.i_used = 0;
 	la->la_bm_off = 0;
-	for(i = 0; i < le16_to_cpu(la->la_size); i++)
-		la->la_bitmap[i] = 0;
+	memset(la->la_bitmap, 0, le16_to_cpu(la->la_size));
 }
 
 #if 0
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index f7763da5c4a2..c501eb3cdcda 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -641,7 +641,7 @@ struct ocfs2_local_alloc
 	__le16 la_size;		/* Size of included bitmap, in bytes */
 	__le16 la_reserved1;
 	__le64 la_reserved2;
-/*10*/	__u8   la_bitmap[];
+/*10*/	__u8   la_bitmap[] __counted_by_le(la_size);
 };
 
 /*
@@ -654,7 +654,7 @@ struct ocfs2_inline_data
 				 * for data, starting at id_data */
 	__le16	id_reserved0;
 	__le32	id_reserved1;
-	__u8	id_data[];	/* Start of user data */
+	__u8	id_data[] __counted_by_le(id_count);	/* Start of user data */
 };
 
 /*

From 6dcd539f062d89127cb3a84a7da373a9bd28ba7b Mon Sep 17 00:00:00 2001
From: Ryota Sakamoto <sakamo.ryota@gmail.com>
Date: Mon, 15 Dec 2025 13:43:22 +0000
Subject: [PATCH 012/107] lib/tests: convert test_uuid module to KUnit

Move lib/test_uuid.c to lib/tests/uuid_kunit.c and convert it to use KUnit.

This change switches the ad-hoc test code to standard KUnit test cases.
The test data remains the same, but the verification logic is updated to
use KUNIT_EXPECT_* macros.

Also remove CONFIG_TEST_UUID from arch/*/configs/* because it is no longer
used.  The new CONFIG_UUID_KUNIT_TEST will be automatically enabled by
CONFIG_KUNIT_ALL_TESTS.

[lukas.bulwahn@redhat.com: MAINTAINERS: adjust file entry in UUID HELPERS]
  Link: https://lkml.kernel.org/r/20251217053907.2778515-1-lukas.bulwahn@redhat.com
Link: https://lkml.kernel.org/r/20251215134322.12949-1-sakamo.ryota@gmail.com
Signed-off-by: Ryota Sakamoto <sakamo.ryota@gmail.com>
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: David Gow <davidgow@google.com>
Cc: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Brendan Higgins <brendan.higgins@linux.dev>
Cc: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS                          |   2 +-
 arch/m68k/configs/amiga_defconfig    |   1 -
 arch/m68k/configs/apollo_defconfig   |   1 -
 arch/m68k/configs/atari_defconfig    |   1 -
 arch/m68k/configs/bvme6000_defconfig |   1 -
 arch/m68k/configs/hp300_defconfig    |   1 -
 arch/m68k/configs/mac_defconfig      |   1 -
 arch/m68k/configs/multi_defconfig    |   1 -
 arch/m68k/configs/mvme147_defconfig  |   1 -
 arch/m68k/configs/mvme16x_defconfig  |   1 -
 arch/m68k/configs/q40_defconfig      |   1 -
 arch/m68k/configs/sun3_defconfig     |   1 -
 arch/m68k/configs/sun3x_defconfig    |   1 -
 arch/powerpc/configs/ppc64_defconfig |   1 -
 lib/Kconfig.debug                    |  14 ++-
 lib/Makefile                         |   1 -
 lib/test_uuid.c                      | 134 ---------------------------
 lib/tests/Makefile                   |   1 +
 lib/tests/uuid_kunit.c               | 106 +++++++++++++++++++++
 19 files changed, 119 insertions(+), 152 deletions(-)
 delete mode 100644 lib/test_uuid.c
 create mode 100644 lib/tests/uuid_kunit.c

diff --git a/MAINTAINERS b/MAINTAINERS
index ebc2f1bc0ade..99407c4c0095 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27260,7 +27260,7 @@ R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	include/linux/uuid.h
-F:	lib/test_uuid.c
+F:	lib/tests/uuid_kunit.c
 F:	lib/uuid.c
 
 UV SYSFS DRIVER
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index bfc1ee7c8158..1439abb69f73 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -618,7 +618,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index d9d1f3c4c70d..6a4e71866f60 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -575,7 +575,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 523205adccc8..46ad7d57b4fc 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -595,7 +595,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 7b0a4ef0b010..867bfa13a44c 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -567,7 +567,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 089c5c394c62..5dfe602cafd4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -577,7 +577,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 5f2484c36733..f5d30310a349 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -594,7 +594,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 74f0a1f6d871..fe54e9222cc0 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -681,7 +681,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 4bee18c820e4..4ff2ff0993ad 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -567,7 +567,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 322c17e55c9a..6bb4738a65aa 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -568,7 +568,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 82f9baab8fea..14166c8fe234 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -584,7 +584,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index f94ad226cb5b..5db924e3caf7 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -564,7 +564,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a5ecfc505ab2..318c9fe42f46 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -565,7 +565,6 @@ CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 2d92c11eea7e..684b3ea80f39 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -443,7 +443,6 @@ CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
-CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba36939fda79..4bfca37f313e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2499,9 +2499,6 @@ config TEST_BITMAP
 
 	  If unsure, say N.
 
-config TEST_UUID
-	tristate "Test functions located in the uuid module at runtime"
-
 config TEST_XARRAY
 	tristate "Test the XArray code at runtime"
 
@@ -3285,6 +3282,17 @@ config RATELIMIT_KUNIT_TEST
 
 	  If unsure, say N.
 
+config UUID_KUNIT_TEST
+	tristate "KUnit test for UUID" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This option enables the KUnit test suite for the uuid library,
+	  which provides functions for generating and parsing UUID and GUID.
+	  The test suite checks parsing of UUID and GUID strings.
+
+	  If unsure, say N.
+
 config INT_POW_KUNIT_TEST
 	tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/Makefile b/lib/Makefile
index aaf677cf4527..586a9f9b27a9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -89,7 +89,6 @@ ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy)
 GCOV_PROFILE_test_bitmap.o := n
 endif
 
-obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
deleted file mode 100644
index 0124fad5d72c..000000000000
--- a/lib/test_uuid.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Test cases for lib/uuid.c module.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/uuid.h>
-
-struct test_uuid_data {
-	const char *uuid;
-	guid_t le;
-	uuid_t be;
-};
-
-static const struct test_uuid_data test_uuid_test_data[] = {
-	{
-		.uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
-		.le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
-		.be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
-	},
-	{
-		.uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
-		.le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
-		.be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
-	},
-	{
-		.uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
-		.le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
-		.be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
-	},
-};
-
-static const char * const test_uuid_wrong_data[] = {
-	"c33f4995-3701-450e-9fbf206a2e98e576 ",	/* no hyphen(s) */
-	"64b4371c-77c1-48f9-8221-29f054XX023b",	/* invalid character(s) */
-	"0cb4ddff-a545-4401-9d06-688af53e",	/* not enough data */
-};
-
-static unsigned total_tests __initdata;
-static unsigned failed_tests __initdata;
-
-static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
-				    const char *data, const char *actual)
-{
-	pr_err("%s test #%u %s %s data: '%s'\n",
-	       prefix,
-	       total_tests,
-	       wrong ? "passed on wrong" : "failed on",
-	       be ? "BE" : "LE",
-	       data);
-	if (actual && *actual)
-		pr_err("%s test #%u actual data: '%s'\n",
-		       prefix,
-		       total_tests,
-		       actual);
-	failed_tests++;
-}
-
-static void __init test_uuid_test(const struct test_uuid_data *data)
-{
-	guid_t le;
-	uuid_t be;
-	char buf[48];
-
-	/* LE */
-	total_tests++;
-	if (guid_parse(data->uuid, &le))
-		test_uuid_failed("conversion", false, false, data->uuid, NULL);
-
-	total_tests++;
-	if (!guid_equal(&data->le, &le)) {
-		sprintf(buf, "%pUl", &le);
-		test_uuid_failed("cmp", false, false, data->uuid, buf);
-	}
-
-	/* BE */
-	total_tests++;
-	if (uuid_parse(data->uuid, &be))
-		test_uuid_failed("conversion", false, true, data->uuid, NULL);
-
-	total_tests++;
-	if (!uuid_equal(&data->be, &be)) {
-		sprintf(buf, "%pUb", &be);
-		test_uuid_failed("cmp", false, true, data->uuid, buf);
-	}
-}
-
-static void __init test_uuid_wrong(const char *data)
-{
-	guid_t le;
-	uuid_t be;
-
-	/* LE */
-	total_tests++;
-	if (!guid_parse(data, &le))
-		test_uuid_failed("negative", true, false, data, NULL);
-
-	/* BE */
-	total_tests++;
-	if (!uuid_parse(data, &be))
-		test_uuid_failed("negative", true, true, data, NULL);
-}
-
-static int __init test_uuid_init(void)
-{
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++)
-		test_uuid_test(&test_uuid_test_data[i]);
-
-	for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++)
-		test_uuid_wrong(test_uuid_wrong_data[i]);
-
-	if (failed_tests == 0)
-		pr_info("all %u tests passed\n", total_tests);
-	else
-		pr_err("failed %u out of %u tests\n", failed_tests, total_tests);
-
-	return failed_tests ? -EINVAL : 0;
-}
-module_init(test_uuid_init);
-
-static void __exit test_uuid_exit(void)
-{
-	/* do nothing */
-}
-module_exit(test_uuid_exit);
-
-MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
-MODULE_DESCRIPTION("Test cases for lib/uuid.c module");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index 601dba4b7d96..9a20608f65f5 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -49,5 +49,6 @@ obj-$(CONFIG_STRING_HELPERS_KUNIT_TEST) += string_helpers_kunit.o
 obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o
 obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o
 obj-$(CONFIG_RATELIMIT_KUNIT_TEST) += test_ratelimit.o
+obj-$(CONFIG_UUID_KUNIT_TEST) += uuid_kunit.o
 
 obj-$(CONFIG_TEST_RUNTIME_MODULE)		+= module/
diff --git a/lib/tests/uuid_kunit.c b/lib/tests/uuid_kunit.c
new file mode 100644
index 000000000000..de71b2649dac
--- /dev/null
+++ b/lib/tests/uuid_kunit.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/*
+ * Test cases for lib/uuid.c module.
+ */
+
+#include <kunit/test.h>
+#include <linux/uuid.h>
+
+struct test_uuid_data {
+	const char *uuid;
+	guid_t le;
+	uuid_t be;
+};
+
+static const struct test_uuid_data test_uuid_test_data[] = {
+	{
+		.uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
+		.le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+	},
+	{
+		.uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
+		.le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+	},
+	{
+		.uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
+		.le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+	},
+};
+
+static const char * const test_uuid_wrong_data[] = {
+	"c33f4995-3701-450e-9fbf206a2e98e576 ",	/* no hyphen(s) */
+	"64b4371c-77c1-48f9-8221-29f054XX023b",	/* invalid character(s) */
+	"0cb4ddff-a545-4401-9d06-688af53e",	/* not enough data */
+};
+
+static void uuid_test_guid_valid(struct kunit *test)
+{
+	unsigned int i;
+	const struct test_uuid_data *data;
+	guid_t le;
+
+	for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++) {
+		data = &test_uuid_test_data[i];
+		KUNIT_EXPECT_EQ(test, guid_parse(data->uuid, &le), 0);
+		KUNIT_EXPECT_TRUE(test, guid_equal(&data->le, &le));
+	}
+}
+
+static void uuid_test_uuid_valid(struct kunit *test)
+{
+	unsigned int i;
+	const struct test_uuid_data *data;
+	uuid_t be;
+
+	for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++) {
+		data = &test_uuid_test_data[i];
+		KUNIT_EXPECT_EQ(test, uuid_parse(data->uuid, &be), 0);
+		KUNIT_EXPECT_TRUE(test, uuid_equal(&data->be, &be));
+	}
+}
+
+static void uuid_test_guid_invalid(struct kunit *test)
+{
+	unsigned int i;
+	const char *uuid;
+	guid_t le;
+
+	for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++) {
+		uuid = test_uuid_wrong_data[i];
+		KUNIT_EXPECT_EQ(test, guid_parse(uuid, &le), -EINVAL);
+	}
+}
+
+static void uuid_test_uuid_invalid(struct kunit *test)
+{
+	unsigned int i;
+	const char *uuid;
+	uuid_t be;
+
+	for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++) {
+		uuid = test_uuid_wrong_data[i];
+		KUNIT_EXPECT_EQ(test, uuid_parse(uuid, &be), -EINVAL);
+	}
+}
+
+static struct kunit_case uuid_test_cases[] = {
+	KUNIT_CASE(uuid_test_guid_valid),
+	KUNIT_CASE(uuid_test_uuid_valid),
+	KUNIT_CASE(uuid_test_guid_invalid),
+	KUNIT_CASE(uuid_test_uuid_invalid),
+	{},
+};
+
+static struct kunit_suite uuid_test_suite = {
+	.name = "uuid",
+	.test_cases = uuid_test_cases,
+};
+
+kunit_test_suite(uuid_test_suite);
+
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_DESCRIPTION("Test cases for lib/uuid.c module");
+MODULE_LICENSE("Dual BSD/GPL");

From 24c776355f4097316a763005434ffff716aa21a8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 14 Dec 2025 16:51:56 -0800
Subject: [PATCH 013/107] kernel.h: drop hex.h and update all hex.h users

Remove <linux/hex.h> from <linux/kernel.h> and update all users/callers of
hex.h interfaces to directly #include <linux/hex.h> as part of the process
of putting kernel.h on a diet.

Removing hex.h from kernel.h means that 36K C source files don't have to
pay the price of parsing hex.h for the roughly 120 C source files that
need it.

This change has been build-tested with allmodconfig on most ARCHes.  Also,
all users/callers of <linux/hex.h> in the entire source tree have been
updated if needed (if not already #included).

Link: https://lkml.kernel.org/r/20251215005206.2362276-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/mips/kernel/setup.c                                         | 1 +
 arch/mips/rb532/devices.c                                        | 1 +
 arch/powerpc/kernel/btext.c                                      | 1 +
 arch/s390/kernel/alternative.c                                   | 1 +
 arch/s390/kernel/stackprotector.c                                | 1 +
 arch/um/drivers/vector_kern.c                                    | 1 +
 arch/xtensa/platforms/iss/network.c                              | 1 +
 certs/blacklist.c                                                | 1 +
 crypto/asymmetric_keys/asymmetric_type.c                         | 1 +
 crypto/asymmetric_keys/x509_public_key.c                         | 1 +
 crypto/krb5/selftest.c                                           | 1 +
 drivers/atm/nicstar.c                                            | 1 +
 drivers/auxdisplay/hd44780_common.c                              | 1 +
 drivers/auxdisplay/lcd2s.c                                       | 1 +
 drivers/bus/moxtet.c                                             | 1 +
 drivers/char/tpm/tpm.h                                           | 1 +
 drivers/comedi/drivers/jr3_pci.c                                 | 1 +
 drivers/firmware/broadcom/bcm47xx_sprom.c                        | 1 +
 drivers/gpio/gpio-macsmc.c                                       | 1 +
 drivers/hid/hid-picolcd_debugfs.c                                | 1 +
 drivers/hwmon/pmbus/q54sj108a2.c                                 | 1 +
 drivers/hwmon/pmbus/ucd9000.c                                    | 1 +
 drivers/infiniband/ulp/srp/ib_srp.c                              | 1 +
 drivers/infiniband/ulp/srpt/ib_srpt.c                            | 1 +
 drivers/input/touchscreen/iqs5xx.c                               | 1 +
 drivers/md/dm-crypt.c                                            | 1 +
 drivers/md/dm-integrity.c                                        | 1 +
 drivers/md/dm-verity-target.c                                    | 1 +
 .../media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c    | 1 +
 drivers/media/cec/usb/rainshadow/rainshadow-cec.c                | 1 +
 drivers/media/i2c/ccs/ccs-reg-access.c                           | 1 +
 drivers/media/usb/pvrusb2/pvrusb2-debugifc.c                     | 1 +
 drivers/misc/kgdbts.c                                            | 1 +
 drivers/misc/pch_phub.c                                          | 1 +
 drivers/net/bonding/bond_options.c                               | 1 +
 drivers/net/can/can327.c                                         | 1 +
 drivers/net/can/slcan/slcan-core.c                               | 1 +
 drivers/net/ethernet/chelsio/cxgb3/common.h                      | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c    | 1 +
 drivers/net/ethernet/micrel/ksz884x.c                            | 1 +
 drivers/net/ethernet/pasemi/pasemi_mac.c                         | 1 +
 drivers/net/netconsole.c                                         | 1 +
 drivers/net/netdevsim/dev.c                                      | 1 +
 drivers/net/usb/r8152.c                                          | 1 +
 drivers/net/usb/usbnet.c                                         | 1 +
 drivers/net/wireless/ath/ath6kl/debug.c                          | 1 +
 drivers/net/wireless/intel/iwlwifi/fw/debugfs.c                  | 1 +
 drivers/net/wireless/intel/iwlwifi/mld/debugfs.c                 | 1 +
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c                 | 1 +
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h                     | 1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h               | 1 +
 drivers/net/wireless/realtek/rtw89/debug.c                       | 1 +
 drivers/net/wireless/silabs/wfx/fwio.c                           | 1 +
 drivers/nvme/target/configfs.c                                   | 1 +
 drivers/nvme/target/core.c                                       | 1 +
 drivers/nvmem/brcm_nvram.c                                       | 1 +
 drivers/nvmem/layouts/u-boot-env.c                               | 1 +
 drivers/platform/x86/intel/wmi/thunderbolt.c                     | 1 +
 drivers/pnp/support.c                                            | 1 +
 drivers/ptp/ptp_pch.c                                            | 1 +
 drivers/s390/cio/blacklist.c                                     | 1 +
 drivers/s390/crypto/ap_bus.c                                     | 1 +
 drivers/s390/crypto/zcrypt_cex4.c                                | 1 +
 drivers/s390/virtio/virtio_ccw.c                                 | 1 +
 drivers/scsi/aacraid/rx.c                                        | 1 +
 drivers/scsi/ips.c                                               | 1 +
 drivers/scsi/libsas/sas_scsi_host.c                              | 1 +
 drivers/scsi/qla2xxx/tcm_qla2xxx.c                               | 1 +
 drivers/scsi/scsi_transport_fc.c                                 | 1 +
 drivers/staging/rtl8723bs/core/rtw_ieee80211.c                   | 1 +
 drivers/target/iscsi/iscsi_target_auth.c                         | 1 +
 drivers/target/target_core_fabric_lib.c                          | 1 +
 drivers/target/target_core_spc.c                                 | 1 +
 drivers/target/tcm_fc/tfc_conf.c                                 | 1 +
 drivers/thunderbolt/switch.c                                     | 1 +
 drivers/tty/vt/vt.c                                              | 1 +
 drivers/ufs/core/ufshcd.c                                        | 1 +
 drivers/usb/atm/speedtch.c                                       | 1 +
 drivers/usb/atm/ueagle-atm.c                                     | 1 +
 drivers/usb/gadget/function/u_ether.c                            | 1 +
 drivers/usb/gadget/function/uvc_configfs.c                       | 1 +
 drivers/usb/typec/ucsi/debugfs.c                                 | 1 +
 drivers/usb/typec/ucsi/ucsi_ccg.c                                | 1 +
 drivers/watchdog/hpwdt.c                                         | 1 +
 fs/adfs/dir.c                                                    | 1 +
 fs/binfmt_misc.c                                                 | 1 +
 fs/ecryptfs/ecryptfs_kernel.h                                    | 1 +
 fs/efivarfs/vars.c                                               | 1 +
 fs/fat/dir.c                                                     | 1 +
 fs/fat/namei_vfat.c                                              | 1 +
 fs/gfs2/lock_dlm.c                                               | 1 +
 fs/nfsd/nfs4recover.c                                            | 1 +
 fs/ntfs3/ntfs_fs.h                                               | 1 +
 fs/overlayfs/namei.c                                             | 1 +
 fs/proc/array.c                                                  | 1 +
 fs/seq_file.c                                                    | 1 +
 fs/udf/unicode.c                                                 | 1 +
 include/linux/kernel.h                                           | 1 -
 kernel/audit.c                                                   | 1 +
 kernel/bpf/core.c                                                | 1 +
 kernel/bpf/syscall.c                                             | 1 +
 kernel/debug/gdbstub.c                                           | 1 +
 lib/hexdump.c                                                    | 1 +
 lib/string_helpers.c                                             | 1 +
 lib/uuid.c                                                       | 1 +
 lib/vsprintf.c                                                   | 1 +
 net/bridge/br_sysfs_br.c                                         | 1 +
 net/core/pktgen.c                                                | 1 +
 net/core/utils.c                                                 | 1 +
 net/ipv4/arp.c                                                   | 1 +
 net/mac80211/debugfs_netdev.c                                    | 1 +
 net/sunrpc/cache.c                                               | 1 +
 net/tipc/core.h                                                  | 1 +
 security/integrity/evm/evm_crypto.c                              | 1 +
 security/integrity/ima/ima_api.c                                 | 1 +
 security/ipe/digest.c                                            | 1 +
 security/keys/encrypted-keys/encrypted.c                         | 1 +
 security/keys/trusted-keys/trusted_core.c                        | 1 +
 security/keys/trusted-keys/trusted_tpm1.c                        | 1 +
 security/loadpin/loadpin.c                                       | 1 +
 security/selinux/selinuxfs.c                                     | 1 +
 sound/pci/riptide/riptide.c                                      | 1 +
 sound/usb/6fire/firmware.c                                       | 1 +
 123 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 11b9b6b63e19..c540431ed332 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/ioport.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index b7f6f782d9a1..8ecb56be81ac 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -7,6 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index ca00c4824e31..b23dddfce26d 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -6,6 +6,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/font.h>
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 90c0e6408992..02d04ae621ba 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -4,6 +4,7 @@
 #define pr_fmt(fmt)	"alt: " fmt
 #endif
 
+#include <linux/hex.h>
 #include <linux/uaccess.h>
 #include <linux/printk.h>
 #include <asm/nospec-branch.h>
diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c
index d4e40483f008..8bd3ecf9200a 100644
--- a/arch/s390/kernel/stackprotector.c
+++ b/arch/s390/kernel/stackprotector.c
@@ -5,6 +5,7 @@
 #endif
 
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/uaccess.h>
 #include <linux/printk.h>
 #include <asm/abs_lowcore.h>
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 25d9258fa592..28cfe1c700f0 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -13,6 +13,7 @@
 #include <linux/memblock.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/hex.h>
 #include <linux/inetdevice.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index f0a63b2f85cc..832579143891 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
+#include <linux/hex.h>
 #include <linux/list.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
diff --git a/certs/blacklist.c b/certs/blacklist.c
index 675dd7a8f07a..11fc858b2921 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/ctype.h>
 #include <linux/err.h>
+#include <linux/hex.h>
 #include <linux/seq_file.h>
 #include <linux/uidgid.h>
 #include <keys/asymmetric-type.h>
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 348966ea2175..b7a08de58064 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -9,6 +9,7 @@
 #include <keys/asymmetric-subtype.h>
 #include <keys/asymmetric-parser.h>
 #include <crypto/public_key.h>
+#include <linux/hex.h>
 #include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/overflow.h>
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 12e3341e806b..0499b13ba3ef 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -10,6 +10,7 @@
 #include <keys/asymmetric-parser.h>
 #include <keys/asymmetric-subtype.h>
 #include <keys/system_keyring.h>
+#include <linux/hex.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/crypto/krb5/selftest.c b/crypto/krb5/selftest.c
index 4519c572d37e..67c4accd8cbd 100644
--- a/crypto/krb5/selftest.c
+++ b/crypto/krb5/selftest.c
@@ -7,6 +7,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/hex.h>
 #include <linux/slab.h>
 #include <crypto/skcipher.h>
 #include <crypto/hash.h>
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 45952cfea06b..bc8dbba77b87 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -43,6 +43,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
diff --git a/drivers/auxdisplay/hd44780_common.c b/drivers/auxdisplay/hd44780_common.c
index 1792fe2a4460..b71db39f9249 100644
--- a/drivers/auxdisplay/hd44780_common.c
+++ b/drivers/auxdisplay/hd44780_common.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/hex.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c
index 045dbef49dee..defb0573e43c 100644
--- a/drivers/auxdisplay/lcd2s.c
+++ b/drivers/auxdisplay/lcd2s.c
@@ -11,6 +11,7 @@
  *  Author: Lars Pöschel <poeschel@lemonage.de>
  *  All rights reserved.
  */
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index 7ce61d629a87..5a53bfab470a 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -8,6 +8,7 @@
 #include <dt-bindings/bus/moxtet.h>
 #include <linux/bitops.h>
 #include <linux/debugfs.h>
+#include <linux/hex.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/moxtet.h>
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 02c07fef41ba..87d68ddf270a 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -20,6 +20,7 @@
 
 #include <linux/module.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/platform_device.h>
diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c
index 61792d940a3d..51287cbc3e48 100644
--- a/drivers/comedi/drivers/jr3_pci.c
+++ b/drivers/comedi/drivers/jr3_pci.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
+#include <linux/hex.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c
index fdcd3a07abcd..bca03fd85808 100644
--- a/drivers/firmware/broadcom/bcm47xx_sprom.c
+++ b/drivers/firmware/broadcom/bcm47xx_sprom.c
@@ -30,6 +30,7 @@
 #include <linux/bcm47xx_sprom.h>
 #include <linux/bcma/bcma.h>
 #include <linux/etherdevice.h>
+#include <linux/hex.h>
 #include <linux/if_ether.h>
 #include <linux/ssb/ssb.h>
 
diff --git a/drivers/gpio/gpio-macsmc.c b/drivers/gpio/gpio-macsmc.c
index 30ef258e7655..b0952d066a9d 100644
--- a/drivers/gpio/gpio-macsmc.c
+++ b/drivers/gpio/gpio-macsmc.c
@@ -10,6 +10,7 @@
 #include <linux/bitmap.h>
 #include <linux/device.h>
 #include <linux/gpio/driver.h>
+#include <linux/hex.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/macsmc.h>
 
diff --git a/drivers/hid/hid-picolcd_debugfs.c b/drivers/hid/hid-picolcd_debugfs.c
index d01176da8896..085847a92e07 100644
--- a/drivers/hid/hid-picolcd_debugfs.c
+++ b/drivers/hid/hid-picolcd_debugfs.c
@@ -11,6 +11,7 @@
 #include <linux/hid-debug.h>
 
 #include <linux/fb.h>
+#include <linux/hex.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 
diff --git a/drivers/hwmon/pmbus/q54sj108a2.c b/drivers/hwmon/pmbus/q54sj108a2.c
index 4d7086d83aa3..fc030ca34480 100644
--- a/drivers/hwmon/pmbus/q54sj108a2.c
+++ b/drivers/hwmon/pmbus/q54sj108a2.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/hex.h>
 #include <linux/i2c.h>
 #include <linux/kstrtox.h>
 #include <linux/module.h>
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index 55e7af3a5f98..9b5d34a110ba 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -8,6 +8,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 23ed2fc688f0..2012ba22a7af 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -33,6 +33,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 71269446353d..e314e6a84d96 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -33,6 +33,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
diff --git a/drivers/input/touchscreen/iqs5xx.c b/drivers/input/touchscreen/iqs5xx.c
index 4ebd7565ae6e..c63819abaf9b 100644
--- a/drivers/input/touchscreen/iqs5xx.c
+++ b/drivers/input/touchscreen/iqs5xx.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/firmware.h>
 #include <linux/gpio/consumer.h>
+#include <linux/hex.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/input/mt.h>
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 79704fbc523b..cbeb5f918d09 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -11,6 +11,7 @@
 #include <linux/completion.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/key.h>
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 170bf67a2edd..b41424a4c139 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -17,6 +17,7 @@
 #include <linux/sort.h>
 #include <linux/rbtree.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/random.h>
 #include <linux/reboot.h>
 #include <crypto/hash.h>
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 5c17472d7896..ca094f14a287 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -17,6 +17,7 @@
 #include "dm-verity-fec.h"
 #include "dm-verity-verify-sig.h"
 #include "dm-audit.h"
+#include <linux/hex.h>
 #include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/string.h>
diff --git a/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c b/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c
index e2eff17952ab..bf92576bb2fc 100644
--- a/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c
+++ b/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c
@@ -19,6 +19,7 @@
 #include <linux/completion.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
diff --git a/drivers/media/cec/usb/rainshadow/rainshadow-cec.c b/drivers/media/cec/usb/rainshadow/rainshadow-cec.c
index 08f58456d682..6c0cee4b066f 100644
--- a/drivers/media/cec/usb/rainshadow/rainshadow-cec.c
+++ b/drivers/media/cec/usb/rainshadow/rainshadow-cec.c
@@ -19,6 +19,7 @@
 #include <linux/completion.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
diff --git a/drivers/media/i2c/ccs/ccs-reg-access.c b/drivers/media/i2c/ccs/ccs-reg-access.c
index fd36889ccc1d..a0181a5d2f34 100644
--- a/drivers/media/i2c/ccs/ccs-reg-access.c
+++ b/drivers/media/i2c/ccs/ccs-reg-access.c
@@ -12,6 +12,7 @@
 #include <linux/unaligned.h>
 
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/i2c.h>
 
 #include "ccs.h"
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c b/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c
index 81d711269ab5..9f936085acbb 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2005 Mike Isely <isely@pobox.com>
  */
 
+#include <linux/hex.h>
 #include <linux/string.h>
 #include "pvrusb2-debugifc.h"
 #include "pvrusb2-hdw.h"
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 0cf31164b470..3b7a041ea351 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -89,6 +89,7 @@
 #include <linux/syscalls.h>
 #include <linux/nmi.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/sched/task.h>
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
index 7bee179841bc..0d63e834dbe7 100644
--- a/drivers/misc/pch_phub.c
+++ b/drivers/misc/pch_phub.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/uaccess.h>
 #include <linux/string.h>
 #include <linux/pci.h>
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 384499c869b8..fa65a0e92b8e 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/if.h>
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c
index b66fc16aedd2..90f5e35f3c8f 100644
--- a/drivers/net/can/can327.c
+++ b/drivers/net/can/can327.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c
index cd789e178d34..7439849d5c84 100644
--- a/drivers/net/can/slcan/slcan-core.c
+++ b/drivers/net/can/slcan/slcan-core.c
@@ -50,6 +50,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h
index ecd025dda8d6..14000977730c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/common.h
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
 #include <linux/mdio.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
index 030a5776c937..8803fa071c50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
@@ -2,6 +2,7 @@
 // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 #include <linux/debugfs.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
 #include <linux/version.h>
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index cdde19b8edc4..4980a6e44607 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index fe58024b5901..00909372ea61 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/netdevice.h>
 #include <linux/of_mdio.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 9cb4dfc242f5..bbf9c02e09d4 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -36,6 +36,7 @@
 #include <linux/inet.h>
 #include <linux/configfs.h>
 #include <linux/etherdevice.h>
+#include <linux/hex.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/utsname.h>
 #include <linux/rtnetlink.h>
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 2683a989873e..351ff4ed3eac 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -18,6 +18,7 @@
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/etherdevice.h>
+#include <linux/hex.h>
 #include <linux/inet.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index fa5192583860..29179e582067 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -10,6 +10,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
+#include <linux/hex.h>
 #include <linux/phy.h>
 #include <linux/usb.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 36742e64cff7..960f200cd52c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index b837d31416df..84403aab21c0 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -19,6 +19,7 @@
 
 #include <linux/skbuff.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
index 3b0e8c43ba4a..3c4bee85b825 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
@@ -7,6 +7,7 @@
 #include "api/commands.h"
 #include "debugfs.h"
 #include "dbg.h"
+#include <linux/hex.h>
 #include <linux/seq_file.h>
 
 #define FWRT_DEBUGFS_OPEN_WRAPPER(name, buflen, argtype)		\
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c
index b9c9cd3f44e4..ce2fc98782c3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c
@@ -24,6 +24,7 @@
 #include "fw/api/rfi.h"
 #include "fw/dhc-utils.h"
 #include <linux/dmi.h>
+#include <linux/hex.h>
 
 #define MLD_DEBUGFS_READ_FILE_OPS(name, bufsz)				\
 	_MLD_DEBUGFS_READ_FILE_OPS(name, bufsz, struct iwl_mld)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 683c0ba5fb39..e6b9896dc4ac 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -6,6 +6,7 @@
  */
 #include <linux/vmalloc.h>
 #include <linux/err.h>
+#include <linux/hex.h>
 #include <linux/ieee80211.h>
 #include <linux/netdevice.h>
 #include <linux/dmi.h>
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 301d590fe0bd..d7e9c2b7980e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -11,6 +11,7 @@
 #include <linux/spinlock.h>
 #include <linux/cleanup.h>
 #include <linux/leds.h>
+#include <linux/hex.h>
 #include <linux/in6.h>
 
 #ifdef CONFIG_THERMAL
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index e16865dd8e52..c93fd245c90f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -5,6 +5,7 @@
 #define __MT7615_H
 
 #include <linux/completion.h>
+#include <linux/hex.h>
 #include <linux/interrupt.h>
 #include <linux/ktime.h>
 #include <linux/regmap.h>
diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c
index 1264c2f82600..8666e26c8c80 100644
--- a/drivers/net/wireless/realtek/rtw89/debug.c
+++ b/drivers/net/wireless/realtek/rtw89/debug.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2019-2020  Realtek Corporation
  */
 
+#include <linux/hex.h>
 #include <linux/vmalloc.h>
 
 #include "coex.h"
diff --git a/drivers/net/wireless/silabs/wfx/fwio.c b/drivers/net/wireless/silabs/wfx/fwio.c
index 52c7f560b062..edd5ac30ed19 100644
--- a/drivers/net/wireless/silabs/wfx/fwio.c
+++ b/drivers/net/wireless/silabs/wfx/fwio.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2010, ST-Ericsson
  */
 #include <linux/firmware.h>
+#include <linux/hex.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/bitfield.h>
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e44ef69dffc2..127dae51fec1 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/hex.h>
 #include <linux/kstrtox.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index cc88e5a28c8a..eab3e4fc0f74 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/hex.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/rculist.h>
diff --git a/drivers/nvmem/brcm_nvram.c b/drivers/nvmem/brcm_nvram.c
index b4cf245fb246..2dce6a7b8039 100644
--- a/drivers/nvmem/brcm_nvram.c
+++ b/drivers/nvmem/brcm_nvram.c
@@ -5,6 +5,7 @@
 
 #include <linux/bcm47xx_nvram.h>
 #include <linux/etherdevice.h>
+#include <linux/hex.h>
 #include <linux/if_ether.h>
 #include <linux/io.h>
 #include <linux/mod_devicetable.h>
diff --git a/drivers/nvmem/layouts/u-boot-env.c b/drivers/nvmem/layouts/u-boot-env.c
index ab32bf1291af..f27f387bb52a 100644
--- a/drivers/nvmem/layouts/u-boot-env.c
+++ b/drivers/nvmem/layouts/u-boot-env.c
@@ -6,6 +6,7 @@
 #include <linux/crc32.h>
 #include <linux/etherdevice.h>
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/if_ether.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/nvmem-provider.h>
diff --git a/drivers/platform/x86/intel/wmi/thunderbolt.c b/drivers/platform/x86/intel/wmi/thunderbolt.c
index 08df560a2c7a..15e5763a20dd 100644
--- a/drivers/platform/x86/intel/wmi/thunderbolt.c
+++ b/drivers/platform/x86/intel/wmi/thunderbolt.c
@@ -10,6 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c
index a6073db10ec6..f6c866851769 100644
--- a/drivers/pnp/support.c
+++ b/drivers/pnp/support.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/hex.h>
 #include <linux/pnp.h>
 #include "base.h"
 
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index b8a9a54a176c..f854da2fd812 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -10,6 +10,7 @@
 
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/hex.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index 738d5e2d5304..020d210bde9f 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c
@@ -10,6 +10,7 @@
 
 #define pr_fmt(fmt) "cio: " fmt
 
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index a445494fd2be..6b1b7b014816 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -16,6 +16,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/moduleparam.h>
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/err.h>
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index 6ba7fbddd3f7..e9a984903bff 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -6,6 +6,7 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/atomic.h>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 4904b831c0a7..1653cc668dcf 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel_stat.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/err.h>
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index e06ff83b69ce..ba9f3256c258 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/pci.h>
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 3393a288fd23..40af961382dc 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -167,6 +167,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index ffa5b49aaf08..da02457f0b09 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -10,6 +10,7 @@
 #include <linux/firmware.h>
 #include <linux/export.h>
 #include <linux/ctype.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 
 #include "sas_internal.h"
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 2fff68935338..9f16164faa1e 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/utsname.h>
 #include <linux/vmalloc.h>
+#include <linux/hex.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/types.h>
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 987befb02408..6bd68f493f20 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/bsg-lib.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
index 8fdeeda88a6d..e89b24fa5e05 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
@@ -6,6 +6,7 @@
  ******************************************************************************/
 
 #include <drv_types.h>
+#include <linux/hex.h>
 #include <linux/of.h>
 #include <linux/unaligned.h>
 
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index c8a248bd11be..2c4d583fe3e6 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/err.h>
+#include <linux/hex.h>
 #include <linux/random.h>
 #include <linux/scatterlist.h>
 #include <target/iscsi/iscsi_target_core.h>
diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index ec7bc6e30228..87c5d26a5089 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c
@@ -16,6 +16,7 @@
  * on the formats implemented in this file.
  */
 
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index fe2b888bcb43..6360b66c7445 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -7,6 +7,7 @@
  * Nicholas A. Bellinger <nab@kernel.org>
  */
 
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/unaligned.h>
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index f686d95d3273..a29b20b5f78e 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -17,6 +17,7 @@
 #include <linux/moduleparam.h>
 #include <generated/utsrelease.h>
 #include <linux/utsname.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index b3948aad0b95..e2732c575bad 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/nvmem-provider.h>
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 59b4b5e126ba..edda91bfdf62 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -79,6 +79,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/kd.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 604043a7533d..31950fc51a4c 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -18,6 +18,7 @@
 #include <linux/blkdev.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/pm_opp.h>
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index 773ac2725532..e6b610a87482 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/firmware.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index cd0f7b4bd82a..78a2585f33ec 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/crc32.h>
+#include <linux/hex.h>
 #include <linux/usb.h>
 #include <linux/firmware.h>
 #include <linux/ctype.h>
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index f58590bf5e02..c47965d850d4 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -16,6 +16,7 @@
 #include <linux/ctype.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/hex.h>
 #include <linux/if_vlan.h>
 #include <linux/string_helpers.h>
 #include <linux/usb/composite.h>
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index a4a2d3dcb0d6..5a87516ddb31 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -12,6 +12,7 @@
 
 #include "uvc_configfs.h"
 
+#include <linux/hex.h>
 #include <linux/sort.h>
 #include <linux/usb/uvc.h>
 #include <linux/usb/video.h>
diff --git a/drivers/usb/typec/ucsi/debugfs.c b/drivers/usb/typec/ucsi/debugfs.c
index f3684ab787fe..d1f5832165c3 100644
--- a/drivers/usb/typec/ucsi/debugfs.c
+++ b/drivers/usb/typec/ucsi/debugfs.c
@@ -8,6 +8,7 @@
  *	    Gopal Saranya <saranya.gopal@intel.com>
  */
 #include <linux/debugfs.h>
+#include <linux/hex.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/types.h>
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index d83a0051c737..199799b319c2 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -10,6 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
+#include <linux/hex.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/pci.h>
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index ae30e394d176..2a848c35c14d 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/device.h>
+#include <linux/hex.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 77fbd196008f..4f9dc276da6f 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -6,6 +6,7 @@
  *
  *  Common directory handling for ADFS
  */
+#include <linux/hex.h>
 #include <linux/slab.h>
 #include "adfs.h"
 
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 8cb1a94339b8..2b772613a74c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/sched/mm.h>
 #include <linux/magic.h>
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 62a2ea7f59ed..0acc1e638454 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/fs_stack.h>
+#include <linux/hex.h>
 #include <linux/namei.h>
 #include <linux/scatterlist.h>
 #include <linux/hash.h>
diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c
index 6edc10958ecf..798a1bc36022 100644
--- a/fs/efivarfs/vars.c
+++ b/fs/efivarfs/vars.c
@@ -9,6 +9,7 @@
 #include <linux/capability.h>
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 92b091783966..af7dedf8adcb 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,6 +16,7 @@
 
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/hex.h>
 #include <linux/uaccess.h>
 #include <linux/iversion.h>
 #include "fat.h"
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 5dbc4cbb8fce..4f3cc2b3089e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -20,6 +20,7 @@
 #include <linux/ctype.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/iversion.h>
 #include "fat.h"
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index b8d249925395..065ade6a1192 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/dlm.h>
+#include <linux/hex.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/delay.h>
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 441dfbfe2d2b..1e6b2dd47ba7 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -39,6 +39,7 @@
 #include <linux/namei.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/module.h>
 #include <net/net_namespace.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index a4559c9f64e6..f18349689458 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/kernel.h>
+#include <linux/hex.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/page-flags.h>
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index e9a69c95be91..cda26bdef3b9 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/cred.h>
 #include <linux/ctype.h>
+#include <linux/hex.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/ratelimit.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 42932f88141a..39e9246f6e4a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -55,6 +55,7 @@
 
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/time.h>
 #include <linux/time_namespace.h>
 #include <linux/kernel.h>
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 8bbb1ad46335..8894cbde8d3a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -11,6 +11,7 @@
 #include <linux/cache.h>
 #include <linux/fs.h>
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 32c7f3d27f74..87580ff827ee 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -16,6 +16,7 @@
 
 #include "udfdecl.h"
 
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/string.h>	/* for memset */
 #include <linux/nls.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5b46924fdff5..35b8f2a5aca5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -21,7 +21,6 @@
 #include <linux/compiler.h>
 #include <linux/container_of.h>
 #include <linux/bitops.h>
-#include <linux/hex.h>
 #include <linux/kstrtox.h>
 #include <linux/log2.h>
 #include <linux/math.h>
diff --git a/kernel/audit.c b/kernel/audit.c
index 26a332ffb1b8..2f2db2907055 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -32,6 +32,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/file.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/atomic.h>
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1b9b18e5b03c..f1c5fc66ef01 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -25,6 +25,7 @@
 #include <linux/prandom.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/hex.h>
 #include <linux/objtool.h>
 #include <linux/overflow.h>
 #include <linux/rbtree_latch.h>
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4ff82144f885..4216de60e371 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -9,6 +9,7 @@
 #include <linux/bpf_verifier.h>
 #include <linux/bsearch.h>
 #include <linux/btf.h>
+#include <linux/hex.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 22fe969c5d2e..f586afd76c80 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -27,6 +27,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/hex.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
 #include <linux/serial_core.h>
diff --git a/lib/hexdump.c b/lib/hexdump.c
index c3db7c3a7643..2e5cd8c24769 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/minmax.h>
 #include <linux/export.h>
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ffb8ead6d4cd..8cb6f66c9c2b 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/limits.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/lib/uuid.c b/lib/uuid.c
index e309b4c5be3d..e8543c668dc7 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -10,6 +10,7 @@
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/uuid.h>
 #include <linux/random.h>
 
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index a3790c43a0ab..800b8ac49f53 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
 #include <linux/math64.h>
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index cb4855ed9500..dcd727345cac 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/hex.h>
 #include <linux/if_bridge.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d41b03fd1f63..8e185b318288 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -126,6 +126,7 @@
 #include <linux/string.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/capability.h>
diff --git a/net/core/utils.c b/net/core/utils.c
index 5e63b0ea21f3..dd86913988f4 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c8c3e1713c0e..51d70180e1cc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 30a5a978a678..f3c6a41e4911 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -7,6 +7,7 @@
 
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/hex.h>
 #include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/interrupt.h>
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 131090f31e6a..d808c0b63f30 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/hex.h>
 #include <linux/slab.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 7f3fe3401c45..9ce5f9ff6cc0 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -44,6 +44,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/hex.h>
 #include <linux/mm.h>
 #include <linux/timer.h>
 #include <linux/string.h>
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index a5e730ffda57..465a32f59c11 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) "EVM: "fmt
 
 #include <linux/export.h>
+#include <linux/hex.h>
 #include <linux/crypto.h>
 #include <linux/xattr.h>
 #include <linux/evm.h>
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index c35ea613c9f8..c6d1c7be8a3e 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/xattr.h>
 #include <linux/evm.h>
 #include <linux/fsverity.h>
diff --git a/security/ipe/digest.c b/security/ipe/digest.c
index 493716370570..5006366837ba 100644
--- a/security/ipe/digest.c
+++ b/security/ipe/digest.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2020-2024 Microsoft Corporation. All rights reserved.
  */
 
+#include <linux/hex.h>
 #include "digest.h"
 
 /**
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 596e7a30bd3c..56b531587a1e 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -13,6 +13,7 @@
 
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/parser.h>
diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
index b1680ee53f86..16168ba5c83e 100644
--- a/security/keys/trusted-keys/trusted_core.c
+++ b/security/keys/trusted-keys/trusted_core.c
@@ -14,6 +14,7 @@
 #include <keys/trusted_tpm.h>
 #include <linux/capability.h>
 #include <linux/err.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/key-type.h>
 #include <linux/module.h>
diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c
index 636acb66a4f6..c865c97aa1b4 100644
--- a/security/keys/trusted-keys/trusted_tpm1.c
+++ b/security/keys/trusted-keys/trusted_tpm1.c
@@ -9,6 +9,7 @@
 #include <crypto/hash_info.h>
 #include <crypto/sha1.h>
 #include <crypto/utils.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/parser.h>
diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
index 273ffbd6defe..019840006096 100644
--- a/security/loadpin/loadpin.c
+++ b/security/loadpin/loadpin.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/hex.h>
 #include <linux/kernel_read_file.h>
 #include <linux/lsm_hooks.h>
 #include <linux/mount.h>
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 896acad1f5f7..4d58c7ad1a23 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -18,6 +18,7 @@
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/fs_context.h>
+#include <linux/hex.h>
 #include <linux/mount.h>
 #include <linux/mutex.h>
 #include <linux/namei.h>
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index e983cd657e28..f91fe64bf4f9 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -75,6 +75,7 @@
 */
 
 #include <linux/delay.h>
+#include <linux/hex.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c
index c51abc54d2f8..cc8caec946cc 100644
--- a/sound/usb/6fire/firmware.c
+++ b/sound/usb/6fire/firmware.c
@@ -12,6 +12,7 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/bitrev.h>
+#include <linux/hex.h>
 #include <linux/kernel.h>
 
 #include "firmware.h"

From 436debc9cad892576d4f3287446b64474922764c Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <alx@kernel.org>
Date: Thu, 11 Dec 2025 11:43:49 +0100
Subject: [PATCH 014/107] array_size.h: add ARRAY_END()

Patch series "Add ARRAY_END(), and use it to fix off-by-one bugs", v6.

Add ARRAY_END(), and use it to fix off-by-one bugs

ARRAY_END() is a macro to calculate a pointer to one past the last element
of an array argument.  This is a very common pointer, which is used to
iterate over all elements of an array:

        for (T *p = a; p < ARRAY_END(a); p++)
                ...

Of course, this pointer should never be dereferenced.  A pointer one past
the last element of an array should not be dereferenced; it's perfectly
fine to hold such a pointer --and a good thing to do--, but the only thing
it should be used for is comparing it with other pointers derived from the
same array.

Due to how special these pointers are, it would be good to use consistent
naming.  It's common to name such a pointer 'end' --in fact, we have many
such cases in the kernel--.  C++ even standardized this name with
std::end().  Let's try naming such pointers 'end', and try also avoid
using 'end' for pointers that are not the result of ARRAY_END().

It has been incorrectly suggested that these pointers are dangerous, and
that they should never be used, suggesting to use something like

	#define ARRAY_LAST(a)  ((a) + ARRAY_SIZE(a) - 1)

	for (T *p = a; p <= ARRAY_LAST(a); p++)
		...

This is bogus, as it doesn't scale down to arrays of 0 elements.  In the
case of an array of 0 elements, ARRAY_LAST() would underflow the pointer,
which not only it can't be dereferenced, it can't even be held (it
produces Undefined Behavior).  That would be a footgun.  Such arrays don't
exist per the ISO C standard; however, GCC supports them as an extension
(with partial support, though; GCC has a few bugs which need to be fixed).

This patch set fixes a few places where it was intended to use the array
end (that is, one past the last element), but accidentally a pointer to
the last element was used instead, thus wasting one byte.

It also replaces other places where the array end was correctly calculated
with ARRAY_SIZE(), by using the simpler ARRAY_END().

Also, there was one drivers/ file that already defined this macro.  We
remove that definition, to not conflict with this one.


This patch (of 4):

ARRAY_END() returns a pointer one past the end of the last element in the
array argument.  This pointer is useful for iterating over the elements of
an array:

	for (T *p = a, p < ARRAY_END(a); p++)
		...

Link: https://lkml.kernel.org/r/cover.1765449750.git.alx@kernel.org
Link: https://lkml.kernel.org/r/5973cfb674192bc8e533485dbfb54e3062896be1.1765449750.git.alx@kernel.org
Signed-off-by: Alejandro Colomar <alx@kernel.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Christopher Bazley <chris.bazley.wg14@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Marco Elver <elver@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/floppy.c     | 2 --
 include/linux/array_size.h | 6 ++++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index c28786e0fe1c..92e446a64371 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4802,8 +4802,6 @@ static void floppy_release_allocated_regions(int fdc, const struct io_region *p)
 	}
 }
 
-#define ARRAY_END(X) (&((X)[ARRAY_SIZE(X)]))
-
 static int floppy_request_regions(int fdc)
 {
 	const struct io_region *p;
diff --git a/include/linux/array_size.h b/include/linux/array_size.h
index 06d7d83196ca..0c4fec98822e 100644
--- a/include/linux/array_size.h
+++ b/include/linux/array_size.h
@@ -10,4 +10,10 @@
  */
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
+/**
+ * ARRAY_END - get a pointer to one past the last element in array @arr
+ * @arr: array
+ */
+#define ARRAY_END(arr)  (&(arr)[ARRAY_SIZE(arr)])
+
 #endif  /* _LINUX_ARRAY_SIZE_H */

From 8118f197b7b738285eb4b66a80d01a5c8f35e231 Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <alx@kernel.org>
Date: Thu, 11 Dec 2025 11:43:54 +0100
Subject: [PATCH 015/107] mm: fix benign off-by-one bugs

We were wasting a byte due to an off-by-one bug.  s[c]nprintf() doesn't
write more than $2 bytes including the null byte, so trying to pass
'size-1' there is wasting one byte.

Link: https://lkml.kernel.org/r/9c38dd009c17b0219889c7089d9bdde5aaf28a8e.1765449750.git.alx@kernel.org
Signed-off-by: Alejandro Colomar <alx@kernel.org>
Acked-by: Marco Elver <elver@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Christopher Bazley <chris.bazley.wg14@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kfence/kfence_test.c | 4 ++--
 mm/kmsan/kmsan_test.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 00034e37bc9f..5725a367246d 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -110,7 +110,7 @@ static bool report_matches(const struct expect_report *r)
 
 	/* Title */
 	cur = expect[0];
-	end = &expect[0][sizeof(expect[0]) - 1];
+	end = ARRAY_END(expect[0]);
 	switch (r->type) {
 	case KFENCE_ERROR_OOB:
 		cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s",
@@ -140,7 +140,7 @@ static bool report_matches(const struct expect_report *r)
 
 	/* Access information */
 	cur = expect[1];
-	end = &expect[1][sizeof(expect[1]) - 1];
+	end = ARRAY_END(expect[1]);
 
 	switch (r->type) {
 	case KFENCE_ERROR_OOB:
diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c
index 902ec48b1e3e..b5ad5dfb2c00 100644
--- a/mm/kmsan/kmsan_test.c
+++ b/mm/kmsan/kmsan_test.c
@@ -105,7 +105,7 @@ static bool report_matches(const struct expect_report *r)
 
 	/* Title */
 	cur = expected_header;
-	end = &expected_header[sizeof(expected_header) - 1];
+	end = ARRAY_END(expected_header);
 
 	cur += scnprintf(cur, end - cur, "BUG: KMSAN: %s", r->error_type);
 

From a9e5620c9a9e237b3344702dec0839b89159a060 Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <alx@kernel.org>
Date: Thu, 11 Dec 2025 11:44:00 +0100
Subject: [PATCH 016/107] kernel: fix off-by-one benign bugs

We were wasting a byte due to an off-by-one bug.  s[c]nprintf() doesn't
write more than $2 bytes including the null byte, so trying to pass
'size-1' there is wasting one byte.

This is essentially the same as the previous commit, in a different
file.

Link: https://lkml.kernel.org/r/b4a945a4d40b7104364244f616eb9fb9f1fa691f.1765449750.git.alx@kernel.org
Signed-off-by: Alejandro Colomar <alx@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Christopher Bazley <chris.bazley.wg14@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Marco Elver <elver@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kcsan/kcsan_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 219d22857c98..8ef8167be745 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -176,7 +176,7 @@ static bool __report_matches(const struct expect_report *r)
 
 	/* Title */
 	cur = expect[0];
-	end = &expect[0][sizeof(expect[0]) - 1];
+	end = ARRAY_END(expect[0]);
 	cur += scnprintf(cur, end - cur, "BUG: KCSAN: %s in ",
 			 is_assert ? "assert: race" : "data-race");
 	if (r->access[1].fn) {
@@ -200,7 +200,7 @@ static bool __report_matches(const struct expect_report *r)
 
 	/* Access 1 */
 	cur = expect[1];
-	end = &expect[1][sizeof(expect[1]) - 1];
+	end = ARRAY_END(expect[1]);
 	if (!r->access[1].fn)
 		cur += scnprintf(cur, end - cur, "race at unknown origin, with ");
 

From 61e9210e23921cf1176af23b426b9bad8b08ffff Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <alx@kernel.org>
Date: Thu, 11 Dec 2025 11:44:04 +0100
Subject: [PATCH 017/107] mm: use ARRAY_END() instead of open-coding it

There aren't any bugs in this code; it's purely cosmetic.

By using ARRAY_END(), we prevent future issues, in case the code is
modified; it has less moving parts.  Also, it should be more readable (and
perhaps more importantly, greppable), as there are several ways of writing
an expression that gets the end of an array, which are unified by this API
name.

Link: https://lkml.kernel.org/r/2335917d123891fec074ab1b3acfb517cf14b5a7.1765449750.git.alx@kernel.org
Signed-off-by: Alejandro Colomar <alx@kernel.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christopher Bazley <chris.bazley.wg14@gmail.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Maciej W. Rozycki <macro@orcam.me.uk>
Cc: Marco Elver <elver@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmemleak.c      | 2 +-
 mm/memcontrol-v1.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 1ac56ceb29b6..fe33f2edfe07 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -510,7 +510,7 @@ static void mem_pool_free(struct kmemleak_object *object)
 {
 	unsigned long flags;
 
-	if (object < mem_pool || object >= mem_pool + ARRAY_SIZE(mem_pool)) {
+	if (object < mem_pool || object >= ARRAY_END(mem_pool)) {
 		kmem_cache_free(object_cache, object);
 		return;
 	}
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 6eed14bff742..b2f37bd939fa 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -1794,7 +1794,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
 
 	mem_cgroup_flush_stats(memcg);
 
-	for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
+	for (stat = stats; stat < ARRAY_END(stats); stat++) {
 		seq_printf(m, "%s=%lu", stat->name,
 			   mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
 						   false));
@@ -1805,7 +1805,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
 		seq_putc(m, '\n');
 	}
 
-	for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
+	for (stat = stats; stat < ARRAY_END(stats); stat++) {
 
 		seq_printf(m, "hierarchical_%s=%lu", stat->name,
 			   mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,

From 76103d1b268e6f45735aa92d70bea5b5e8174a70 Mon Sep 17 00:00:00 2001
From: Pnina Feder <pnina.feder@mobileye.com>
Date: Tue, 16 Dec 2025 15:28:00 +0200
Subject: [PATCH 018/107] kernel: vmcoreinfo: allocate vmcoreinfo_data based on
 VMCOREINFO_BYTES

Patch series "vmcoreinfo: support VMCOREINFO_BYTES larger than PAGE_SIZE".

VMCOREINFO_BYTES is defined as a configurable size, but multiple
code paths implicitly assume it always fits into a single page.

This series removes that assumption by allocating and mapping
vmcoreinfo based on its actual size.

Patch 1 updates vmcoreinfo allocation to use get_order(VMCOREINFO_BYTES).
Patch 2 updates crash kernel handling to correctly allocate and map
multiple pages when copying vmcoreinfo.

This makes vmcoreinfo size consistent across the kernel and avoids
future breakage if VMCOREINFO_BYTES grows.

(No functional change when VMCOREINFO_BYTES == PAGE_SIZE.)


This patch (of 2):

VMCOREINFO_BYTES defines the size of vmcoreinfo data, but the current
implementation assumes a single page allocation.

Allocate vmcoreinfo_data using get_order(VMCOREINFO_BYTES) so that
vmcoreinfo can safely grow beyond PAGE_SIZE.

This avoids hidden assumptions and keeps vmcoreinfo size consistent across
the kernel.

Link: https://lkml.kernel.org/r/20251216132801.807260-1-pnina.feder@mobileye.com
Link: https://lkml.kernel.org/r/20251216132801.807260-2-pnina.feder@mobileye.com
Signed-off-by: Pnina Feder <pnina.feder@mobileye.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/vmcore_info.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index fe9bf8db1922..22b3205dd4dc 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -137,7 +137,9 @@ EXPORT_SYMBOL_GPL(hwerr_log_error_type);
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+	int order;
+	order = get_order(VMCOREINFO_BYTES);
+	vmcoreinfo_data = (unsigned char *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 	if (!vmcoreinfo_data) {
 		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
 		return -ENOMEM;
@@ -146,7 +148,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
 						GFP_KERNEL | __GFP_ZERO);
 	if (!vmcoreinfo_note) {
-		free_page((unsigned long)vmcoreinfo_data);
+		free_pages((unsigned long)vmcoreinfo_data, order);
 		vmcoreinfo_data = NULL;
 		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
 		return -ENOMEM;

From b5bfcc1ffe512c7879cb90befdeabaa43d9f07ca Mon Sep 17 00:00:00 2001
From: Pnina Feder <pnina.feder@mobileye.com>
Date: Tue, 16 Dec 2025 15:28:01 +0200
Subject: [PATCH 019/107] kernel/crash: handle multi-page vmcoreinfo in crash
 kernel copy

kimage_crash_copy_vmcoreinfo() currently assumes vmcoreinfo fits in a
single page.  This breaks if VMCOREINFO_BYTES exceeds PAGE_SIZE.

Allocate the required order of control pages and vmap all pages needed to
safely copy vmcoreinfo into the crash kernel image.

Link: https://lkml.kernel.org/r/20251216132801.807260-3-pnina.feder@mobileye.com
Signed-off-by: Pnina Feder <pnina.feder@mobileye.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/crash_core.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 99dac1aa972a..3952b3e102e0 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -44,9 +44,15 @@ note_buf_t __percpu *crash_notes;
 
 int kimage_crash_copy_vmcoreinfo(struct kimage *image)
 {
-	struct page *vmcoreinfo_page;
+	struct page *vmcoreinfo_base;
+	struct page *vmcoreinfo_pages[DIV_ROUND_UP(VMCOREINFO_BYTES, PAGE_SIZE)];
+	unsigned int order, nr_pages;
+	int i;
 	void *safecopy;
 
+	nr_pages = DIV_ROUND_UP(VMCOREINFO_BYTES, PAGE_SIZE);
+	order = get_order(VMCOREINFO_BYTES);
+
 	if (!IS_ENABLED(CONFIG_CRASH_DUMP))
 		return 0;
 	if (image->type != KEXEC_TYPE_CRASH)
@@ -61,12 +67,15 @@ int kimage_crash_copy_vmcoreinfo(struct kimage *image)
 	 * happens to generate vmcoreinfo note, hereby we rely on
 	 * vmap for this purpose.
 	 */
-	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
-	if (!vmcoreinfo_page) {
+	vmcoreinfo_base = kimage_alloc_control_pages(image, order);
+	if (!vmcoreinfo_base) {
 		pr_warn("Could not allocate vmcoreinfo buffer\n");
 		return -ENOMEM;
 	}
-	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+	for (i = 0; i < nr_pages; i++)
+		vmcoreinfo_pages[i] = vmcoreinfo_base + i;
+
+	safecopy = vmap(vmcoreinfo_pages, nr_pages, VM_MAP, PAGE_KERNEL);
 	if (!safecopy) {
 		pr_warn("Could not vmap vmcoreinfo buffer\n");
 		return -ENOMEM;

From e700f5d1560798aacf0e56fdcc70ee2c20bf56ec Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 16 Dec 2025 02:45:21 -0500
Subject: [PATCH 020/107] watchdog: softlockup: panic when lockup duration
 exceeds N thresholds

The softlockup_panic sysctl is currently a binary option: panic
immediately or never panic on soft lockups.

Panicking on any soft lockup, regardless of duration, can be overly
aggressive for brief stalls that may be caused by legitimate operations.
Conversely, never panicking may allow severe system hangs to persist
undetected.

Extend softlockup_panic to accept an integer threshold, allowing the
kernel to panic only when the normalized lockup duration exceeds N
watchdog threshold periods.  This provides finer-grained control to
distinguish between transient delays and persistent system failures.

The accepted values are:
- 0: Don't panic (unchanged)
- 1: Panic when duration >= 1 * threshold (20s default, original behavior)
- N > 1: Panic when duration >= N * threshold (e.g., 2 = 40s, 3 = 60s.)

The original behavior is preserved for values 0 and 1, maintaining full
backward compatibility while allowing systems to tolerate brief lockups
while still catching severe, persistent hangs.

[lirongqing@baidu.com: v2]
  Link: https://lkml.kernel.org/r/20251218074300.4080-1-lirongqing@baidu.com
Link: https://lkml.kernel.org/r/20251216074521.2796-1-lirongqing@baidu.com
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt     | 10 +++++-----
 arch/arm/configs/aspeed_g5_defconfig                |  2 +-
 arch/arm/configs/pxa3xx_defconfig                   |  2 +-
 arch/openrisc/configs/or1klitex_defconfig           |  2 +-
 arch/powerpc/configs/skiroot_defconfig              |  2 +-
 drivers/gpu/drm/ci/arm.config                       |  2 +-
 drivers/gpu/drm/ci/arm64.config                     |  2 +-
 drivers/gpu/drm/ci/x86_64.config                    |  2 +-
 kernel/configs/debug.config                         |  2 +-
 kernel/watchdog.c                                   | 10 ++++++----
 lib/Kconfig.debug                                   | 13 +++++++------
 tools/testing/selftests/bpf/config                  |  2 +-
 .../testing/selftests/wireguard/qemu/kernel.config  |  2 +-
 13 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1058f2a6d6a8..73d846211144 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6969,12 +6969,12 @@ Kernel parameters
 
 	softlockup_panic=
 			[KNL] Should the soft-lockup detector generate panics.
-			Format: 0 | 1
+			Format: <int>
 
-			A value of 1 instructs the soft-lockup detector
-			to panic the machine when a soft-lockup occurs. It is
-			also controlled by the kernel.softlockup_panic sysctl
-			and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
+			A value of non-zero instructs the soft-lockup detector
+			to panic the machine when a soft-lockup duration exceeds
+			N thresholds. It is also controlled by the kernel.softlockup_panic
+			sysctl and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
 			respective build-time switch to that functionality.
 
 	softlockup_all_cpu_backtrace=
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
index 2e6ea13c1e9b..ec558e57d081 100644
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -306,7 +306,7 @@ CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PANIC_TIMEOUT=-1
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
 CONFIG_WQ_WATCHDOG=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/arm/configs/pxa3xx_defconfig b/arch/arm/configs/pxa3xx_defconfig
index 07d422f0ff34..fb272e3a2337 100644
--- a/arch/arm/configs/pxa3xx_defconfig
+++ b/arch/arm/configs/pxa3xx_defconfig
@@ -100,7 +100,7 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_SHIRQ=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig
index fb1eb9a68bd6..984b0e3b2768 100644
--- a/arch/openrisc/configs/or1klitex_defconfig
+++ b/arch/openrisc/configs/or1klitex_defconfig
@@ -52,5 +52,5 @@ CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,bpf"
 CONFIG_PRINTK_TIME=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BUG_ON_DATA_CORRUPTION=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 2b71a6dc399e..a4114fca5a39 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -289,7 +289,7 @@ CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
 CONFIG_WQ_WATCHDOG=y
diff --git a/drivers/gpu/drm/ci/arm.config b/drivers/gpu/drm/ci/arm.config
index 411e814819a8..d7c51670da2f 100644
--- a/drivers/gpu/drm/ci/arm.config
+++ b/drivers/gpu/drm/ci/arm.config
@@ -52,7 +52,7 @@ CONFIG_TMPFS=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_DEBUG_LOCKDEP=n
 CONFIG_SOFTLOCKUP_DETECTOR=n
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=n
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
 
 CONFIG_FW_LOADER_COMPRESS=y
 
diff --git a/drivers/gpu/drm/ci/arm64.config b/drivers/gpu/drm/ci/arm64.config
index fddfbd4d2493..ea0e30737c4d 100644
--- a/drivers/gpu/drm/ci/arm64.config
+++ b/drivers/gpu/drm/ci/arm64.config
@@ -161,7 +161,7 @@ CONFIG_TMPFS=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_DEBUG_LOCKDEP=n
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 
 CONFIG_DETECT_HUNG_TASK=y
 
diff --git a/drivers/gpu/drm/ci/x86_64.config b/drivers/gpu/drm/ci/x86_64.config
index 8eaba388b141..7ac98a78691e 100644
--- a/drivers/gpu/drm/ci/x86_64.config
+++ b/drivers/gpu/drm/ci/x86_64.config
@@ -47,7 +47,7 @@ CONFIG_TMPFS=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_DEBUG_LOCKDEP=n
 CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 
 CONFIG_DETECT_HUNG_TASK=y
 
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 9f6ab7dabf67..774702591d26 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -84,7 +84,7 @@ CONFIG_SLUB_DEBUG_ON=y
 # Debug Oops, Lockups and Hangs
 #
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=0
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PANIC_ON_OOPS=y
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 366122f4a0f8..b4d5fbdb933a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -363,7 +363,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
 
 /* Global variables, exported for sysctl */
 unsigned int __read_mostly softlockup_panic =
-			IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
+			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC;
 
 static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
@@ -774,8 +774,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
 	unsigned long touch_ts, period_ts, now;
 	struct pt_regs *regs = get_irq_regs();
-	int duration;
 	int softlockup_all_cpu_backtrace;
+	int duration, thresh_count;
 	unsigned long flags;
 
 	if (!watchdog_enabled)
@@ -879,7 +879,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 
 		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
 		sys_info(softlockup_si_mask & ~SYS_INFO_ALL_BT);
-		if (softlockup_panic)
+		thresh_count = duration / get_softlockup_thresh();
+
+		if (softlockup_panic && thresh_count >= softlockup_panic)
 			panic("softlockup: hung tasks");
 	}
 
@@ -1228,7 +1230,7 @@ static const struct ctl_table watchdog_sysctls[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
+		.extra2		= SYSCTL_INT_MAX,
 	},
 	{
 		.procname	= "softlockup_sys_info",
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4bfca37f313e..947e62e92da8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1110,13 +1110,14 @@ config SOFTLOCKUP_DETECTOR_INTR_STORM
 	  the CPU stats and the interrupt counts during the "soft lockups".
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
-	bool "Panic (Reboot) On Soft Lockups"
+	int "Panic (Reboot) On Soft Lockups"
 	depends on SOFTLOCKUP_DETECTOR
+	default 0
 	help
-	  Say Y here to enable the kernel to panic on "soft lockups",
-	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 20 seconds (configurable using the watchdog_thresh
-	  sysctl), without giving other tasks a chance to run.
+	  Set to a non-zero value N to enable the kernel to panic on "soft
+	  lockups", which are bugs that cause the kernel to loop in kernel
+	  mode for more than (N * 20 seconds) (configurable using the
+	  watchdog_thresh sysctl), without giving other tasks a chance to run.
 
 	  The panic can be used in combination with panic_timeout,
 	  to cause the system to reboot automatically after a
@@ -1124,7 +1125,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 	  high-availability systems that have uptime guarantees and
 	  where a lockup must be resolved ASAP.
 
-	  Say N if unsure.
+	  Say 0 if unsure.
 
 config HAVE_HARDLOCKUP_DETECTOR_BUDDY
 	bool
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 558839e3c185..24855381290d 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -1,6 +1,6 @@
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BPF=y
 CONFIG_BPF_EVENTS=y
 CONFIG_BPF_JIT=y
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 0504c11c2de6..bb89d2dfaa2a 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -80,7 +80,7 @@ CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1
 CONFIG_PANIC_TIMEOUT=-1
 CONFIG_STACKTRACE=y

From b8f690f6d1d9008ffab1f58fffc769ba813da373 Mon Sep 17 00:00:00 2001
From: Lalit Shankar Chowdhury <lalitshankarch@gmail.com>
Date: Tue, 2 Dec 2025 03:14:04 +0530
Subject: [PATCH 021/107] fat: remove unused parameter

Remove unused inode parameter from fat_cache_alloc().

Link: https://lkml.kernel.org/r/20251201214403.90604-2-lalitshankarch@gmail.com
Signed-off-by: Lalit Shankar Chowdhury <lalitshankarch@gmail.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fat/cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 630f3056658e..1b87354e24ba 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -54,7 +54,7 @@ void fat_cache_destroy(void)
 	kmem_cache_destroy(fat_cache_cachep);
 }
 
-static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
+static inline struct fat_cache *fat_cache_alloc(void)
 {
 	return kmem_cache_alloc(fat_cache_cachep, GFP_NOFS);
 }
@@ -144,7 +144,7 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
 			MSDOS_I(inode)->nr_caches++;
 			spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
 
-			tmp = fat_cache_alloc(inode);
+			tmp = fat_cache_alloc();
 			if (!tmp) {
 				spin_lock(&MSDOS_I(inode)->cache_lru_lock);
 				MSDOS_I(inode)->nr_caches--;

From 7c5b0f6a9ff5041ea6f4213c9827170c60a376f0 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Wed, 17 Dec 2025 12:23:27 +0800
Subject: [PATCH 022/107] .editorconfig: respect .editorconfig settings from
 parent directories
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting 'root' to 'true' prevents the editor from searching for other
.editorconfig files in parent directories.  However, a common workflow
involves generating a patch with 'git format-patch' and opening it in an
editor within the kernel source directory.  In such cases, we want any
specific settings for patch files defined in an .editorconfig located
above the kernel source directory to remain effective.  Therefore, remove
the 'root' setting from the kernel .editorconfig.

Link: https://lkml.kernel.org/r/20251217-editconfig-v1-1-883e6dd6dbfa@gmail.com
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Cc: Íñigo Huguet <ihuguet@redhat.com>
Cc: Danny Lin <danny@kdrag0n.dev>
Cc: Mickaël Salaün <mic@digikod.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .editorconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.editorconfig b/.editorconfig
index 29a30ccfc07b..b5ea32b6954b 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-root = true
-
 [{*.{awk,c,dts,dtsi,dtso,h,mk,s,S},Kconfig,Makefile,Makefile.*}]
 charset = utf-8
 end_of_line = lf

From 426295ef18c5d5f0b7f75ac89d09022fcfafd25c Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:14 +0100
Subject: [PATCH 023/107] kallsyms: clean up @namebuf initialization in
 kallsyms_lookup_buildid()

Patch series "kallsyms: Prevent invalid access when showing module
buildid", v3.

We have seen nested crashes in __sprint_symbol(), see below.  They seem to
be caused by an invalid pointer to "buildid".  This patchset cleans up
kallsyms code related to module buildid and fixes this invalid access when
printing backtraces.

I made an audit of __sprint_symbol() and found several situations
when the buildid might be wrong:

  + bpf_address_lookup() does not set @modbuildid

  + ftrace_mod_address_lookup() does not set @modbuildid

  + __sprint_symbol() does not take rcu_read_lock and
    the related struct module might get removed before
    mod->build_id is printed.

This patchset solves these problems:

  + 1st, 2nd patches are preparatory
  + 3rd, 4th, 6th patches fix the above problems
  + 5th patch cleans up a suspicious initialization code.

This is the backtrace, we have seen. But it is not really important.
The problems fixed by the patchset are obvious:

  crash64> bt [62/2029]
  PID: 136151 TASK: ffff9f6c981d4000 CPU: 367 COMMAND: "btrfs"
  #0 [ffffbdb687635c28] machine_kexec at ffffffffb4c845b3
  #1 [ffffbdb687635c80] __crash_kexec at ffffffffb4d86a6a
  #2 [ffffbdb687635d08] hex_string at ffffffffb51b3b61
  #3 [ffffbdb687635d40] crash_kexec at ffffffffb4d87964
  #4 [ffffbdb687635d50] oops_end at ffffffffb4c41fc8
  #5 [ffffbdb687635d70] do_trap at ffffffffb4c3e49a
  #6 [ffffbdb687635db8] do_error_trap at ffffffffb4c3e6a4
  #7 [ffffbdb687635df8] exc_stack_segment at ffffffffb5666b33
  #8 [ffffbdb687635e20] asm_exc_stack_segment at ffffffffb5800cf9
  ...


This patch (of 7)

The function kallsyms_lookup_buildid() initializes the given @namebuf by
clearing the first and the last byte.  It is not clear why.

The 1st byte makes sense because some callers ignore the return code and
expect that the buffer contains a valid string, for example:

  - function_stat_show()
    - kallsyms_lookup()
      - kallsyms_lookup_buildid()

The initialization of the last byte does not make much sense because it
can later be overwritten.  Fortunately, it seems that all called functions
behave correctly:

  -  kallsyms_expand_symbol() explicitly adds the trailing '\0'
     at the end of the function.

  - All *__address_lookup() functions either use the safe strscpy()
    or they do not touch the buffer at all.

Document the reason for clearing the first byte.  And remove the useless
initialization of the last byte.

Link: https://lkml.kernel.org/r/20251128135920.217303-2-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Aaron Tomlin <atomlin@atomlin.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kallsyms.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 049e296f586c..9559bf947c6b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -355,7 +355,12 @@ static int kallsyms_lookup_buildid(unsigned long addr,
 {
 	int ret;
 
-	namebuf[KSYM_NAME_LEN - 1] = 0;
+	/*
+	 * kallsyms_lookus() returns pointer to namebuf on success and
+	 * NULL on error. But some callers ignore the return value.
+	 * Instead they expect @namebuf filled either with valid
+	 * or empty string.
+	 */
 	namebuf[0] = 0;
 
 	if (is_ksym_addr(addr)) {

From fda024fb64769e9d6b3916d013c78d6b189129f8 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:15 +0100
Subject: [PATCH 024/107] kallsyms: clean up modname and modbuildid
 initialization in kallsyms_lookup_buildid()

The @modname and @modbuildid optional return parameters are set only when
the symbol is in a module.

Always initialize them so that they do not need to be cleared when the
module is not in a module.  It simplifies the logic and makes the code
even slightly more safe.

Note that bpf_address_lookup() function will get updated in a separate
patch.

Link: https://lkml.kernel.org/r/20251128135920.217303-3-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kallsyms.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 9559bf947c6b..66ad899124c5 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -362,6 +362,14 @@ static int kallsyms_lookup_buildid(unsigned long addr,
 	 * or empty string.
 	 */
 	namebuf[0] = 0;
+	/*
+	 * Initialize the module-related return values. They are not set
+	 * when the symbol is in vmlinux or it is a bpf address.
+	 */
+	if (modname)
+		*modname = NULL;
+	if (modbuildid)
+		*modbuildid = NULL;
 
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
@@ -370,10 +378,6 @@ static int kallsyms_lookup_buildid(unsigned long addr,
 		/* Grab name */
 		kallsyms_expand_symbol(get_symbol_offset(pos),
 				       namebuf, KSYM_NAME_LEN);
-		if (modname)
-			*modname = NULL;
-		if (modbuildid)
-			*modbuildid = NULL;
 
 		return strlen(namebuf);
 	}

From acfdbb4ab2910ff6f03becb569c23ac7b2223913 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:16 +0100
Subject: [PATCH 025/107] module: add helper function for reading
 module_buildid()

Add a helper function for reading the optional "build_id" member of struct
module.  It is going to be used also in ftrace_mod_address_lookup().

Use "#ifdef" instead of "#if IS_ENABLED()" to match the declaration of the
optional field in struct module.

Link: https://lkml.kernel.org/r/20251128135920.217303-4-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/module.h   | 9 +++++++++
 kernel/module/kallsyms.c | 9 ++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index d80c3ea57472..ac254525014c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -748,6 +748,15 @@ static inline void __module_get(struct module *module)
 	__mod ? __mod->name : "kernel";		\
 })
 
+static inline const unsigned char *module_buildid(struct module *mod)
+{
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+	return mod->build_id;
+#else
+	return NULL;
+#endif
+}
+
 /* Dereference module function descriptor */
 void *dereference_module_function_descriptor(struct module *mod, void *ptr);
 
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 00a60796327c..0fc11e45df9b 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -334,13 +334,8 @@ int module_address_lookup(unsigned long addr,
 	if (mod) {
 		if (modname)
 			*modname = mod->name;
-		if (modbuildid) {
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
-			*modbuildid = mod->build_id;
-#else
-			*modbuildid = NULL;
-#endif
-		}
+		if (modbuildid)
+			*modbuildid = module_buildid(mod);
 
 		sym = find_kallsyms_symbol(mod, addr, size, offset);
 

From 8e81dac4cd5477731169b92cff7c24f8f6635950 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:17 +0100
Subject: [PATCH 026/107] kallsyms: cleanup code for appending the module
 buildid

Put the code for appending the optional "buildid" into a helper function,
It makes __sprint_symbol() better readable.

Also print a warning when the "modname" is set and the "buildid" isn't.
It might catch a situation when some lookup function in
kallsyms_lookup_buildid() does not handle the "buildid".

Use pr_*_once() to avoid an infinite recursion when the function is called
from printk().  The recursion is rather theoretical but better be on the
safe side.

Link: https://lkml.kernel.org/r/20251128135920.217303-5-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kallsyms.c | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 66ad899124c5..c0898327836c 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -435,6 +435,37 @@ int lookup_symbol_name(unsigned long addr, char *symname)
 	return lookup_module_symbol_name(addr, symname);
 }
 
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+
+static int append_buildid(char *buffer,  const char *modname,
+			  const unsigned char *buildid)
+{
+	if (!modname)
+		return 0;
+
+	if (!buildid) {
+		pr_warn_once("Undefined buildid for the module %s\n", modname);
+		return 0;
+	}
+
+	/* build ID should match length of sprintf */
+#ifdef CONFIG_MODULES
+	static_assert(sizeof(typeof_member(struct module, build_id)) == 20);
+#endif
+
+	return sprintf(buffer, " %20phN", buildid);
+}
+
+#else /* CONFIG_STACKTRACE_BUILD_ID */
+
+static int append_buildid(char *buffer,   const char *modname,
+			  const unsigned char *buildid)
+{
+	return 0;
+}
+
+#endif /* CONFIG_STACKTRACE_BUILD_ID */
+
 /* Look up a kernel symbol and return it in a text buffer. */
 static int __sprint_symbol(char *buffer, unsigned long address,
 			   int symbol_offset, int add_offset, int add_buildid)
@@ -457,15 +488,8 @@ static int __sprint_symbol(char *buffer, unsigned long address,
 
 	if (modname) {
 		len += sprintf(buffer + len, " [%s", modname);
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
-		if (add_buildid && buildid) {
-			/* build ID should match length of sprintf */
-#if IS_ENABLED(CONFIG_MODULES)
-			static_assert(sizeof(typeof_member(struct module, build_id)) == 20);
-#endif
-			len += sprintf(buffer + len, " %20phN", buildid);
-		}
-#endif
+		if (add_buildid)
+			len += append_buildid(buffer + len, modname, buildid);
 		len += sprintf(buffer + len, "]");
 	}
 

From cd6735896d0343942cf3dafb48ce32eb79341990 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:18 +0100
Subject: [PATCH 027/107] kallsyms/bpf: rename __bpf_address_lookup() to
 bpf_address_lookup()

bpf_address_lookup() has been used only in kallsyms_lookup_buildid().  It
was supposed to set @modname and @modbuildid when the symbol was in a
module.

But it always just cleared @modname because BPF symbols were never in a
module.  And it did not clear @modbuildid because the pointer was not
passed.

The wrapper is no longer needed.  Both @modname and @modbuildid are now
always initialized to NULL in kallsyms_lookup_buildid().

Remove the wrapper and rename __bpf_address_lookup() to
bpf_address_lookup() because this variant is used everywhere.

[akpm@linux-foundation.org: fix loongarch]
Link: https://lkml.kernel.org/r/20251128135920.217303-6-pmladek@suse.com
Fixes: 9294523e3768 ("module: add printk formats to add module build ID to stacktraces")
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/net/bpf_jit_comp.c   |  2 +-
 arch/loongarch/net/bpf_jit.c    |  2 +-
 arch/powerpc/net/bpf_jit_comp.c |  2 +-
 include/linux/filter.h          | 26 ++++----------------------
 kernel/bpf/core.c               |  4 ++--
 kernel/kallsyms.c               |  5 ++---
 6 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index b6eb7a465ad2..1d657bd3ce65 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2951,7 +2951,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
 	u64 plt_target = 0ULL;
 	bool poking_bpf_entry;
 
-	if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
+	if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
 		/* Only poking bpf text is supported. Since kernel function
 		 * entry is set up by ftrace, we reply on ftrace to poke kernel
 		 * functions.
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index d1d5a65308b9..3b63bc5b99d9 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1319,7 +1319,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
 	/* Only poking bpf text is supported. Since kernel function entry
 	 * is set up by ftrace, we rely on ftrace to poke kernel functions.
 	 */
-	if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
+	if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
 		return -ENOTSUPP;
 
 	image = ip - offset;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 5e976730b2f5..e199976e410a 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1122,7 +1122,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
 	bpf_func = (unsigned long)ip;
 
 	/* We currently only support poking bpf programs */
-	if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) {
+	if (!bpf_address_lookup(bpf_func, &size, &offset, name)) {
 		pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func);
 		return -EOPNOTSUPP;
 	}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fd54fed8f95f..7452817d707d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1375,24 +1375,13 @@ static inline bool bpf_jit_kallsyms_enabled(void)
 	return false;
 }
 
-int __bpf_address_lookup(unsigned long addr, unsigned long *size,
-				 unsigned long *off, char *sym);
+int bpf_address_lookup(unsigned long addr, unsigned long *size,
+		       unsigned long *off, char *sym);
 bool is_bpf_text_address(unsigned long addr);
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym);
 struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
 
-static inline int
-bpf_address_lookup(unsigned long addr, unsigned long *size,
-		   unsigned long *off, char **modname, char *sym)
-{
-	int ret = __bpf_address_lookup(addr, size, off, sym);
-
-	if (ret && modname)
-		*modname = NULL;
-	return ret;
-}
-
 void bpf_prog_kallsyms_add(struct bpf_prog *fp);
 void bpf_prog_kallsyms_del(struct bpf_prog *fp);
 
@@ -1431,8 +1420,8 @@ static inline bool bpf_jit_kallsyms_enabled(void)
 }
 
 static inline int
-__bpf_address_lookup(unsigned long addr, unsigned long *size,
-		     unsigned long *off, char *sym)
+bpf_address_lookup(unsigned long addr, unsigned long *size,
+		   unsigned long *off, char *sym)
 {
 	return 0;
 }
@@ -1453,13 +1442,6 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
 	return NULL;
 }
 
-static inline int
-bpf_address_lookup(unsigned long addr, unsigned long *size,
-		   unsigned long *off, char **modname, char *sym)
-{
-	return 0;
-}
-
 static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 {
 }
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f1c5fc66ef01..8f6d8f1c4946 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -714,8 +714,8 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
 	return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
 }
 
-int __bpf_address_lookup(unsigned long addr, unsigned long *size,
-				 unsigned long *off, char *sym)
+int bpf_address_lookup(unsigned long addr, unsigned long *size,
+		       unsigned long *off, char *sym)
 {
 	struct bpf_ksym *ksym;
 	int ret = 0;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index c0898327836c..a37cafdf52ca 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -345,7 +345,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
 		return 1;
 	}
 	return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) ||
-	       !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
+	       !!bpf_address_lookup(addr, symbolsize, offset, namebuf);
 }
 
 static int kallsyms_lookup_buildid(unsigned long addr,
@@ -386,8 +386,7 @@ static int kallsyms_lookup_buildid(unsigned long addr,
 	ret = module_address_lookup(addr, symbolsize, offset,
 				    modname, modbuildid, namebuf);
 	if (!ret)
-		ret = bpf_address_lookup(addr, symbolsize,
-					 offset, modname, namebuf);
+		ret = bpf_address_lookup(addr, symbolsize, offset, namebuf);
 
 	if (!ret)
 		ret = ftrace_mod_address_lookup(addr, symbolsize,

From e8a1e7eaa19d0b757b06a2f913e3eeb4b1c002c6 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:19 +0100
Subject: [PATCH 028/107] kallsyms/ftrace: set module buildid in
 ftrace_mod_address_lookup()

__sprint_symbol() might access an invalid pointer when
kallsyms_lookup_buildid() returns a symbol found by
ftrace_mod_address_lookup().

The ftrace lookup function must set both @modname and @modbuildid the same
way as module_address_lookup().

Link: https://lkml.kernel.org/r/20251128135920.217303-7-pmladek@suse.com
Fixes: 9294523e3768 ("module: add printk formats to add module build ID to stacktraces")
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Aaron Tomlin <atomlin@atomlin.com>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ftrace.h | 6 ++++--
 kernel/kallsyms.c      | 4 ++--
 kernel/trace/ftrace.c  | 5 ++++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a3a8989e3268..dc844d7e693d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -87,11 +87,13 @@ struct ftrace_hash;
 	defined(CONFIG_DYNAMIC_FTRACE)
 int
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
-		   unsigned long *off, char **modname, char *sym);
+			  unsigned long *off, char **modname,
+			  const unsigned char **modbuildid, char *sym);
 #else
 static inline int
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
-		   unsigned long *off, char **modname, char *sym)
+			  unsigned long *off, char **modname,
+			  const unsigned char **modbuildid, char *sym)
 {
 	return 0;
 }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index a37cafdf52ca..0f639c907336 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -389,8 +389,8 @@ static int kallsyms_lookup_buildid(unsigned long addr,
 		ret = bpf_address_lookup(addr, symbolsize, offset, namebuf);
 
 	if (!ret)
-		ret = ftrace_mod_address_lookup(addr, symbolsize,
-						offset, modname, namebuf);
+		ret = ftrace_mod_address_lookup(addr, symbolsize, offset,
+						modname, modbuildid, namebuf);
 
 	return ret;
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index aa758efc3731..304505c11686 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7753,7 +7753,8 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
 
 int
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
-		   unsigned long *off, char **modname, char *sym)
+			  unsigned long *off, char **modname,
+			  const unsigned char **modbuildid, char *sym)
 {
 	struct ftrace_mod_map *mod_map;
 	int ret = 0;
@@ -7765,6 +7766,8 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
 		if (ret) {
 			if (modname)
 				*modname = mod_map->mod->name;
+			if (modbuildid)
+				*modbuildid = module_buildid(mod_map->mod);
 			break;
 		}
 	}

From 3b07086444f80c844351255fd94c2cb0a7224df2 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 28 Nov 2025 14:59:20 +0100
Subject: [PATCH 029/107] kallsyms: prevent module removal when printing module
 name and buildid

kallsyms_lookup_buildid() copies the symbol name into the given buffer so
that it can be safely read anytime later.  But it just copies pointers to
mod->name and mod->build_id which might get reused after the related
struct module gets removed.

The lifetime of struct module is synchronized using RCU.  Take the rcu
read lock for the entire __sprint_symbol().

Link: https://lkml.kernel.org/r/20251128135920.217303-8-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Aaron Tomlin <atomlin@atomlin.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kallsyms.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 0f639c907336..e0813ca9469a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -474,6 +474,9 @@ static int __sprint_symbol(char *buffer, unsigned long address,
 	unsigned long offset, size;
 	int len;
 
+	/* Prevent module removal until modname and modbuildid are printed */
+	guard(rcu)();
+
 	address += symbol_offset;
 	len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
 				       buffer);

From f34e19c34e4e92338d2ceaab2b95dd7790d262de Mon Sep 17 00:00:00 2001
From: Minu Jin <s9430939@naver.com>
Date: Tue, 25 Nov 2025 09:04:07 +0900
Subject: [PATCH 030/107] fork-comment-fix: remove ambiguous question mark in
 CLONE_CHILD_CLEARTID comment

The current comment "Clear TID on mm_release()?" ends with a question
mark, implying uncertainty about whether the TID is actually cleared in
mm_release().

However, the code flow is deterministic.  When a task exits, mm_release()
explicitly checks 'tsk->clear_child_tid' and clears.

Since this behavior is unambiguous, remove the confusing question mark and
rephrase the comment to clearly state that TID is cleared in mm_release().

Link: https://lkml.kernel.org/r/20251125000407.24470-1-s9430939@naver.com
Signed-off-by: Minu Jin <s9430939@naver.com>
Cc: Ben Segall <bsegall@google.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..b21eccc9e11c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2071,7 +2071,7 @@ __latent_entropy struct task_struct *copy_process(
 
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
 	/*
-	 * Clear TID on mm_release()?
+	 * TID is cleared in mm_release() when the task exits
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
 

From c243413740b39b0cf0a88732de5efc2b45716d81 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 19 Dec 2025 21:45:41 -0800
Subject: [PATCH 031/107] kfifo: fix kmalloc_array_node() argument order

To be consistent, pass the kmalloc_array_node() parameters in the order
(number_of_elements, element_size).  Since only the product of the two
values is used, this is not a bug fix.

Link: https://lkml.kernel.org/r/20251220054541.2295599-1-rdunlap@infradead.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216015
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/kfifo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/kfifo.c b/lib/kfifo.c
index 525e66f8294c..2633f9cc336c 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -41,7 +41,7 @@ int __kfifo_alloc_node(struct __kfifo *fifo, unsigned int size,
 		return -EINVAL;
 	}
 
-	fifo->data = kmalloc_array_node(esize, size, gfp_mask, node);
+	fifo->data = kmalloc_array_node(size, esize, gfp_mask, node);
 
 	if (!fifo->data) {
 		fifo->mask = 0;

From 0e7fd23f9293cee3c7f341498a0011d09c491510 Mon Sep 17 00:00:00 2001
From: Kari Argillander <kari.argillander@gmail.com>
Date: Fri, 19 Dec 2025 18:25:11 +0200
Subject: [PATCH 032/107] editorconfig: add rst extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have a lot of .rst documentation; use editorconfig rules for those.
This sets the default tab width to 8, which makes indentation consistent
and avoids requiring developers to adjust editor settings manually.

Link: https://lkml.kernel.org/r/20251219-editorconfig-rst-v1-1-58d4fa397664@gmail.com
Signed-off-by: Kari Argillander <kari.argillander@gmail.com>
Cc: Danny Lin <danny@kdrag0n.dev>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Mickael Salaun <mic@digikod.net>
Cc: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .editorconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.editorconfig b/.editorconfig
index b5ea32b6954b..69718ac91747 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-[{*.{awk,c,dts,dtsi,dtso,h,mk,s,S},Kconfig,Makefile,Makefile.*}]
+[{*.{awk,c,dts,dtsi,dtso,h,mk,rst,s,S},Kconfig,Makefile,Makefile.*}]
 charset = utf-8
 end_of_line = lf
 insert_final_newline = true

From d30aca3eeffc18452e5cc5c4e59f1a4da2bd2f12 Mon Sep 17 00:00:00 2001
From: Ryota Sakamoto <sakamo.ryota@gmail.com>
Date: Sun, 21 Dec 2025 13:35:16 +0000
Subject: [PATCH 033/107] lib/tests: convert test_min_heap module to KUnit

Move lib/test_min_heap.c to lib/tests/min_heap_kunit.c and convert it to
use KUnit.

This change switches the ad-hoc test code to standard KUnit test cases.
The test data remains the same, but the verification logic is updated to
use KUNIT_EXPECT_* macros.

Also remove CONFIG_TEST_MIN_HEAP from arch/*/configs/* because it is no
longer used.  The new CONFIG_MIN_HEAP_KUNIT_TEST will be automatically
enabled by CONFIG_KUNIT_ALL_TESTS.

The reasons for converting to KUnit are:

1. Standardization:
    Switching from ad-hoc printk-based reporting to the standard
    KTAP format makes it easier for CI systems to parse and report test
    results

2. Better Diagnostics:
    Using KUNIT_EXPECT_* macros automatically provides detailed
    diagnostics on failure.

3. Tooling Integration:
    It allows the test to be managed and executed using standard
    KUnit tools.

Link: https://lkml.kernel.org/r/20251221133516.321846-1-sakamo.ryota@gmail.com
Signed-off-by: Ryota Sakamoto <sakamo.ryota@gmail.com>
Acked-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: David Gow <davidgow@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS                                   |   2 +-
 arch/m68k/configs/amiga_defconfig             |   1 -
 arch/m68k/configs/apollo_defconfig            |   1 -
 arch/m68k/configs/atari_defconfig             |   1 -
 arch/m68k/configs/bvme6000_defconfig          |   1 -
 arch/m68k/configs/hp300_defconfig             |   1 -
 arch/m68k/configs/mac_defconfig               |   1 -
 arch/m68k/configs/multi_defconfig             |   1 -
 arch/m68k/configs/mvme147_defconfig           |   1 -
 arch/m68k/configs/mvme16x_defconfig           |   1 -
 arch/m68k/configs/q40_defconfig               |   1 -
 arch/m68k/configs/sun3_defconfig              |   1 -
 arch/m68k/configs/sun3x_defconfig             |   1 -
 arch/powerpc/configs/ppc64_defconfig          |   1 -
 arch/s390/configs/debug_defconfig             |   2 +-
 lib/Kconfig.debug                             |  21 +--
 lib/Makefile                                  |   1 -
 lib/tests/Makefile                            |   1 +
 .../min_heap_kunit.c}                         | 145 ++++++++----------
 19 files changed, 80 insertions(+), 105 deletions(-)
 rename lib/{test_min_heap.c => tests/min_heap_kunit.c} (58%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 99407c4c0095..4dcbcb5c14f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17456,7 +17456,7 @@ S:	Maintained
 F:	Documentation/core-api/min_heap.rst
 F:	include/linux/min_heap.h
 F:	lib/min_heap.c
-F:	lib/test_min_heap.c
+F:	lib/tests/min_heap_kunit.c
 
 MIPI CCS, SMIA AND SMIA++ IMAGE SENSOR DRIVER
 M:	Sakari Ailus <sakari.ailus@linux.intel.com>
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1439abb69f73..46598efbea54 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -609,7 +609,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 6a4e71866f60..63bef7a6d858 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -566,7 +566,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 46ad7d57b4fc..1342adfbd855 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -586,7 +586,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 867bfa13a44c..484f21a2da37 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -558,7 +558,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5dfe602cafd4..ce97c816aa21 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -568,7 +568,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index f5d30310a349..f5b57ea2d681 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -585,7 +585,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index fe54e9222cc0..85efdb31c898 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -672,7 +672,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 4ff2ff0993ad..7102579b83d3 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -558,7 +558,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 6bb4738a65aa..18c0493ed0ff 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -559,7 +559,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 14166c8fe234..1b3a34ab1c74 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -575,7 +575,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 5db924e3caf7..1a41a1c6bde1 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -555,7 +555,6 @@ CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 318c9fe42f46..8f182684e54b 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -556,7 +556,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_DHRY=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_TEST_MULDIV64=m
 CONFIG_REED_SOLOMON_TEST=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 684b3ea80f39..f1e937222a83 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -426,7 +426,6 @@ CONFIG_BOOTX_TEXT=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_LKDTM=m
-CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_DIV64=m
 CONFIG_BACKTRACE_SELF_TEST=m
 CONFIG_TEST_REF_TRACKER=m
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 0713914b25b4..4be3a7540909 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -921,7 +921,7 @@ CONFIG_FAULT_INJECTION_DEBUG_FS=y
 CONFIG_FAULT_INJECTION_CONFIGFS=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
 CONFIG_LKDTM=m
-CONFIG_TEST_MIN_HEAP=y
+CONFIG_MIN_HEAP_KUNIT_TEST=m
 CONFIG_KPROBES_SANITY_TEST=m
 CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 947e62e92da8..3a31bbf53425 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2278,16 +2278,6 @@ config TEST_LIST_SORT
 
 	  If unsure, say N.
 
-config TEST_MIN_HEAP
-	tristate "Min heap test"
-	depends on DEBUG_KERNEL || m
-	help
-	  Enable this to turn on min heap function tests. This test is
-	  executed only once during system boot (so affects only boot time),
-	  or at module load time.
-
-	  If unsure, say N.
-
 config TEST_SORT
 	tristate "Array-based sort test" if !KUNIT_ALL_TESTS
 	depends on KUNIT
@@ -2878,6 +2868,17 @@ config MEMCPY_KUNIT_TEST
 
 	  If unsure, say N.
 
+config MIN_HEAP_KUNIT_TEST
+	tristate "Min heap test" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This option enables the KUnit test suite for the min heap library
+	  which provides functions for creating and managing min heaps.
+	  The test suite checks the functionality of the min heap library.
+
+	  If unsure, say N
+
 config IS_SIGNED_TYPE_KUNIT_TEST
 	tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/Makefile b/lib/Makefile
index 586a9f9b27a9..1f87a174a317 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -75,7 +75,6 @@ obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
 CFLAGS_test_ubsan.o += $(call cc-disable-warning, unused-but-set-variable)
 UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
-obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index 9a20608f65f5..088b80d16383 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -33,6 +33,7 @@ CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes)
 obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o
 
 obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
+obj-$(CONFIG_MIN_HEAP_KUNIT_TEST) += min_heap_kunit.o
 CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-of-range-compare)
 obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
 obj-$(CONFIG_PRINTF_KUNIT_TEST) += printf_kunit.o
diff --git a/lib/test_min_heap.c b/lib/tests/min_heap_kunit.c
similarity index 58%
rename from lib/test_min_heap.c
rename to lib/tests/min_heap_kunit.c
index a9c4a74d3898..9c1122661698 100644
--- a/lib/test_min_heap.c
+++ b/lib/tests/min_heap_kunit.c
@@ -1,60 +1,66 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#define pr_fmt(fmt) "min_heap_test: " fmt
-
 /*
  * Test cases for the min max heap.
  */
 
-#include <linux/log2.h>
+#include <kunit/test.h>
 #include <linux/min_heap.h>
 #include <linux/module.h>
-#include <linux/printk.h>
 #include <linux/random.h>
 
+struct min_heap_test_case {
+	const char *str;
+	bool min_heap;
+};
+
+static struct min_heap_test_case min_heap_cases[] = {
+	{
+		.str = "min",
+		.min_heap = true,
+	},
+	{
+		.str = "max",
+		.min_heap = false,
+	},
+};
+
+KUNIT_ARRAY_PARAM_DESC(min_heap, min_heap_cases, str);
+
 DEFINE_MIN_HEAP(int, min_heap_test);
 
-static __init bool less_than(const void *lhs, const void *rhs, void __always_unused *args)
+static bool less_than(const void *lhs, const void *rhs, void __always_unused *args)
 {
 	return *(int *)lhs < *(int *)rhs;
 }
 
-static __init bool greater_than(const void *lhs, const void *rhs, void __always_unused *args)
+static bool greater_than(const void *lhs, const void *rhs, void __always_unused *args)
 {
 	return *(int *)lhs > *(int *)rhs;
 }
 
-static __init int pop_verify_heap(bool min_heap,
-				struct min_heap_test *heap,
-				const struct min_heap_callbacks *funcs)
+static void pop_verify_heap(struct kunit *test,
+			    bool min_heap,
+			    struct min_heap_test *heap,
+			    const struct min_heap_callbacks *funcs)
 {
 	int *values = heap->data;
-	int err = 0;
 	int last;
 
 	last = values[0];
 	min_heap_pop_inline(heap, funcs, NULL);
 	while (heap->nr > 0) {
-		if (min_heap) {
-			if (last > values[0]) {
-				pr_err("error: expected %d <= %d\n", last,
-					values[0]);
-				err++;
-			}
-		} else {
-			if (last < values[0]) {
-				pr_err("error: expected %d >= %d\n", last,
-					values[0]);
-				err++;
-			}
-		}
+		if (min_heap)
+			KUNIT_EXPECT_LE(test, last, values[0]);
+		else
+			KUNIT_EXPECT_GE(test, last, values[0]);
 		last = values[0];
 		min_heap_pop_inline(heap, funcs, NULL);
 	}
-	return err;
 }
 
-static __init int test_heapify_all(bool min_heap)
+static void test_heapify_all(struct kunit *test)
 {
+	const struct min_heap_test_case *params = test->param_value;
 	int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0,
 			 -3, -1, -2, -4, 0x8000000, 0x7FFFFFF };
 	struct min_heap_test heap = {
@@ -63,15 +69,14 @@ static __init int test_heapify_all(bool min_heap)
 		.size =  ARRAY_SIZE(values),
 	};
 	struct min_heap_callbacks funcs = {
-		.less = min_heap ? less_than : greater_than,
+		.less = params->min_heap ? less_than : greater_than,
 		.swp = NULL,
 	};
-	int i, err;
+	int i;
 
 	/* Test with known set of values. */
 	min_heapify_all_inline(&heap, &funcs, NULL);
-	err = pop_verify_heap(min_heap, &heap, &funcs);
-
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 
 	/* Test with randomly generated values. */
 	heap.nr = ARRAY_SIZE(values);
@@ -79,13 +84,12 @@ static __init int test_heapify_all(bool min_heap)
 		values[i] = get_random_u32();
 
 	min_heapify_all_inline(&heap, &funcs, NULL);
-	err += pop_verify_heap(min_heap, &heap, &funcs);
-
-	return err;
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 }
 
-static __init int test_heap_push(bool min_heap)
+static void test_heap_push(struct kunit *test)
 {
+	const struct min_heap_test_case *params = test->param_value;
 	const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0,
 			     -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF };
 	int values[ARRAY_SIZE(data)];
@@ -95,29 +99,28 @@ static __init int test_heap_push(bool min_heap)
 		.size =  ARRAY_SIZE(values),
 	};
 	struct min_heap_callbacks funcs = {
-		.less = min_heap ? less_than : greater_than,
+		.less = params->min_heap ? less_than : greater_than,
 		.swp = NULL,
 	};
-	int i, temp, err;
+	int i, temp;
 
 	/* Test with known set of values copied from data. */
 	for (i = 0; i < ARRAY_SIZE(data); i++)
 		min_heap_push_inline(&heap, &data[i], &funcs, NULL);
 
-	err = pop_verify_heap(min_heap, &heap, &funcs);
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 
 	/* Test with randomly generated values. */
 	while (heap.nr < heap.size) {
 		temp = get_random_u32();
 		min_heap_push_inline(&heap, &temp, &funcs, NULL);
 	}
-	err += pop_verify_heap(min_heap, &heap, &funcs);
-
-	return err;
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 }
 
-static __init int test_heap_pop_push(bool min_heap)
+static void test_heap_pop_push(struct kunit *test)
 {
+	const struct min_heap_test_case *params = test->param_value;
 	const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0,
 			     -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF };
 	int values[ARRAY_SIZE(data)];
@@ -127,13 +130,13 @@ static __init int test_heap_pop_push(bool min_heap)
 		.size =  ARRAY_SIZE(values),
 	};
 	struct min_heap_callbacks funcs = {
-		.less = min_heap ? less_than : greater_than,
+		.less = params->min_heap ? less_than : greater_than,
 		.swp = NULL,
 	};
-	int i, temp, err;
+	int i, temp;
 
 	/* Fill values with data to pop and replace. */
-	temp = min_heap ? 0x80000000 : 0x7FFFFFFF;
+	temp = params->min_heap ? 0x80000000 : 0x7FFFFFFF;
 	for (i = 0; i < ARRAY_SIZE(data); i++)
 		min_heap_push_inline(&heap, &temp, &funcs, NULL);
 
@@ -141,7 +144,7 @@ static __init int test_heap_pop_push(bool min_heap)
 	for (i = 0; i < ARRAY_SIZE(data); i++)
 		min_heap_pop_push_inline(&heap, &data[i], &funcs, NULL);
 
-	err = pop_verify_heap(min_heap, &heap, &funcs);
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 
 	heap.nr = 0;
 	for (i = 0; i < ARRAY_SIZE(data); i++)
@@ -152,13 +155,12 @@ static __init int test_heap_pop_push(bool min_heap)
 		temp = get_random_u32();
 		min_heap_pop_push_inline(&heap, &temp, &funcs, NULL);
 	}
-	err += pop_verify_heap(min_heap, &heap, &funcs);
-
-	return err;
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 }
 
-static __init int test_heap_del(bool min_heap)
+static void test_heap_del(struct kunit *test)
 {
+	const struct min_heap_test_case *params = test->param_value;
 	int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0,
 			 -3, -1, -2, -4, 0x8000000, 0x7FFFFFF };
 	struct min_heap_test heap;
@@ -166,17 +168,16 @@ static __init int test_heap_del(bool min_heap)
 	min_heap_init_inline(&heap, values, ARRAY_SIZE(values));
 	heap.nr = ARRAY_SIZE(values);
 	struct min_heap_callbacks funcs = {
-		.less = min_heap ? less_than : greater_than,
+		.less = params->min_heap ? less_than : greater_than,
 		.swp = NULL,
 	};
-	int i, err;
+	int i;
 
 	/* Test with known set of values. */
 	min_heapify_all_inline(&heap, &funcs, NULL);
 	for (i = 0; i < ARRAY_SIZE(values) / 2; i++)
 		min_heap_del_inline(&heap, get_random_u32() % heap.nr, &funcs, NULL);
-	err = pop_verify_heap(min_heap, &heap, &funcs);
-
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 
 	/* Test with randomly generated values. */
 	heap.nr = ARRAY_SIZE(values);
@@ -186,37 +187,23 @@ static __init int test_heap_del(bool min_heap)
 
 	for (i = 0; i < ARRAY_SIZE(values) / 2; i++)
 		min_heap_del_inline(&heap, get_random_u32() % heap.nr, &funcs, NULL);
-	err += pop_verify_heap(min_heap, &heap, &funcs);
-
-	return err;
+	pop_verify_heap(test, params->min_heap, &heap, &funcs);
 }
 
-static int __init test_min_heap_init(void)
-{
-	int err = 0;
+static struct kunit_case min_heap_test_cases[] = {
+	KUNIT_CASE_PARAM(test_heapify_all, min_heap_gen_params),
+	KUNIT_CASE_PARAM(test_heap_push, min_heap_gen_params),
+	KUNIT_CASE_PARAM(test_heap_pop_push, min_heap_gen_params),
+	KUNIT_CASE_PARAM(test_heap_del, min_heap_gen_params),
+	{},
+};
 
-	err += test_heapify_all(true);
-	err += test_heapify_all(false);
-	err += test_heap_push(true);
-	err += test_heap_push(false);
-	err += test_heap_pop_push(true);
-	err += test_heap_pop_push(false);
-	err += test_heap_del(true);
-	err += test_heap_del(false);
-	if (err) {
-		pr_err("test failed with %d errors\n", err);
-		return -EINVAL;
-	}
-	pr_info("test passed\n");
-	return 0;
-}
-module_init(test_min_heap_init);
+static struct kunit_suite min_heap_test_suite = {
+	.name = "min_heap",
+	.test_cases = min_heap_test_cases,
+};
 
-static void __exit test_min_heap_exit(void)
-{
-	/* do nothing */
-}
-module_exit(test_min_heap_exit);
+kunit_test_suite(min_heap_test_suite);
 
 MODULE_DESCRIPTION("Test cases for the min max heap");
 MODULE_LICENSE("GPL");

From 1965bbb8f3c72e5f1972b5eeb6f19a36664a676d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Mon, 22 Dec 2025 08:55:10 +0100
Subject: [PATCH 034/107] ipc/shm: uapi: remove dependency on libc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using libc types and headers from the UAPI headers is problematic as it
introduces a dependency on a full C toolchain.  shm.h does not even use
any symbols from the libc header as the usage of getpagesize() was removed
a decade ago in commit 060028bac94b ("ipc/shm.c: increase the defaults for
SHMALL, SHMMAX")

Drop the unnecessary inclusion.

Link: https://lkml.kernel.org/r/20251222-uapi-shm-v1-1-270bb7f75d97@linutronix.de
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/shm.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index 8d1f17a4e08e..7269f9f402e3 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -5,9 +5,6 @@
 #include <linux/ipc.h>
 #include <linux/errno.h>
 #include <asm-generic/hugetlb_encode.h>
-#ifndef __KERNEL__
-#include <unistd.h>
-#endif
 
 /*
  * SHMMNI, SHMMAX and SHMALL are default upper limits which can be

From ad533a740c7ccb801619ed962807605254fe7545 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 13 Dec 2025 12:53:09 +0100
Subject: [PATCH 035/107] resource: provide 0args DEFINE_RES variant for unset
 resource desc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a variant of DEFINE_RES that takes 0 arguments to initialize an
"unset" resource descriptor.

This should be used for the improper case of

	struct resource res = {};

where DEFINE_RES() should be used.

With this new helper variant, it would result in:

	struct resource res = DEFINE_RES();

instead of having to define the full 3 arguments:

	struct resource res = DEFINE_RES(0, 0, IORESOURCE_UNSET);

DEFINE_RES() with no args, will set the flags to IORESOURCE_UNSET
signaling the resource descriptor is UNSET and doesn't reflect an actual
resource currently.

Link: https://lkml.kernel.org/r/20251213115314.16700-1-ansuelsmth@gmail.com
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Suggested-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ioport.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 9afa30f9346f..e974fc087059 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -10,6 +10,7 @@
 #define _LINUX_IOPORT_H
 
 #ifndef __ASSEMBLY__
+#include <linux/args.h>
 #include <linux/bits.h>
 #include <linux/compiler.h>
 #include <linux/minmax.h>
@@ -165,8 +166,12 @@ enum {
 
 #define DEFINE_RES_NAMED(_start, _size, _name, _flags)			\
 	DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, IORES_DESC_NONE)
-#define DEFINE_RES(_start, _size, _flags)				\
+#define __DEFINE_RES0()							\
+	DEFINE_RES_NAMED(0, 0, NULL, IORESOURCE_UNSET)
+#define __DEFINE_RES3(_start, _size, _flags)				\
 	DEFINE_RES_NAMED(_start, _size, NULL, _flags)
+#define DEFINE_RES(...)							\
+	CONCATENATE(__DEFINE_RES, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
 
 #define DEFINE_RES_IO_NAMED(_start, _size, _name)			\
 	DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO)

From 1921044eebf1d6861a6de1a76e3f63729a45e712 Mon Sep 17 00:00:00 2001
From: Chaitanya Mishra <chaitanyamishra.ai@gmail.com>
Date: Sat, 27 Dec 2025 14:52:29 +0530
Subject: [PATCH 036/107] lib/kstrtox: fix kstrtobool() docstring to mention
 enabled/disabled

Commit ae5b3500856f ("kstrtox: add support for enabled and disabled in
kstrtobool()") added support for 'e'/'E' (enabled) and 'd'/'D' (disabled)
inputs, but did not update the docstring accordingly.

Update the docstring to include 'Ee' (for true) and 'Dd' (for false) in
the list of accepted first characters.

Link: https://lkml.kernel.org/r/20251227092229.57330-1-chaitanyamishra.ai@gmail.com
Fixes: ae5b3500856f ("kstrtox: add support for enabled and disabled in kstrtobool()")
Signed-off-by: Chaitanya Mishra <chaitanyamishra.ai@gmail.com>
Cc: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/kstrtox.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index bdde40cd69d7..97be2a39f537 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -340,8 +340,8 @@ EXPORT_SYMBOL(kstrtos8);
  * @s: input string
  * @res: result
  *
- * This routine returns 0 iff the first character is one of 'YyTt1NnFf0', or
- * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
+ * This routine returns 0 iff the first character is one of 'EeYyTt1DdNnFf0',
+ * or [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
  * pointed to by res is updated upon finding a match.
  */
 noinline

From 998be0a4dbcaa796a05c7b52327f3a09c29d3662 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Tue, 30 Dec 2025 11:14:02 -0500
Subject: [PATCH 037/107] liveupdate: separate memfd support into
 LIVEUPDATE_MEMFD

Decouple memfd preservation support from the core Live Update Orchestrator
configuration.

Previously, enabling CONFIG_LIVEUPDATE forced a dependency on CONFIG_SHMEM
and unconditionally compiled memfd_luo.o.  However, Live Update may be
used for purposes that do not require memfd-backed memory preservation.

Introduce CONFIG_LIVEUPDATE_MEMFD to gate memfd_luo.o.  This moves the
SHMEM and MEMFD_CREATE dependencies to the specific feature that needs
them, allowing the base LIVEUPDATE option to be selected independently of
shared memory support.

Link: https://lkml.kernel.org/r/20251230161402.1542099-1-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/Kconfig | 17 ++++++++++++++++-
 mm/Makefile               |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig
index d2aeaf13c3ac..1a8513f16ef7 100644
--- a/kernel/liveupdate/Kconfig
+++ b/kernel/liveupdate/Kconfig
@@ -54,7 +54,6 @@ config KEXEC_HANDOVER_ENABLE_DEFAULT
 config LIVEUPDATE
 	bool "Live Update Orchestrator"
 	depends on KEXEC_HANDOVER
-	depends on SHMEM
 	help
 	  Enable the Live Update Orchestrator. Live Update is a mechanism,
 	  typically based on kexec, that allows the kernel to be updated
@@ -73,4 +72,20 @@ config LIVEUPDATE
 
 	  If unsure, say N.
 
+config LIVEUPDATE_MEMFD
+	bool "Live update support for memfd"
+	depends on LIVEUPDATE
+	depends on MEMFD_CREATE
+	depends on SHMEM
+	default LIVEUPDATE
+	help
+	  Enable live update support for memfd regions. This allows preserving
+	  memfd-backed memory across kernel live updates.
+
+	  This can be used to back VM memory with memfds, allowing the guest
+	  memory to persist, or for other user workloads needing to preserve
+	  pages.
+
+	  If unsure, say N.
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index 2d0570a16e5b..e38fcfbb805c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -100,7 +100,7 @@ obj-$(CONFIG_NUMA) += memory-tiers.o
 obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
-obj-$(CONFIG_LIVEUPDATE) += memfd_luo.o
+obj-$(CONFIG_LIVEUPDATE_MEMFD) += memfd_luo.o
 obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
 ifdef CONFIG_SWAP

From e896c44aecfb7b3470470b4e63495dfa2b359060 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Tue, 30 Dec 2025 08:13:15 +0100
Subject: [PATCH 038/107] types: drop definition of __EXPORTED_HEADERS__
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This definition disarms the warning in uapi/linux/types.h about including
kernel headers from user space.  However the warning is already disarmed
due to the fact that kernel code is built with -D__KERNEL__.

Drop the pointless definition.

Link: https://lkml.kernel.org/r/20251230-exported-headers-types-h-v1-1-947fc606f3d8@linutronix.de
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/types.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/types.h b/include/linux/types.h
index d4437e9c452c..0cbb684eec5c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -2,7 +2,6 @@
 #ifndef _LINUX_TYPES_H
 #define _LINUX_TYPES_H
 
-#define __EXPORTED_HEADERS__
 #include <uapi/linux/types.h>
 
 #ifndef __ASSEMBLY__

From 10d1c75ed4382a8e79874379caa2ead8952734f9 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Tue, 30 Dec 2025 22:16:07 -0800
Subject: [PATCH 039/107] ima: verify the previous kernel's IMA buffer lies in
 addressable RAM

Patch series "Address page fault in ima_restore_measurement_list()", v3.

When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>" we observe a pafe fault that happens.

    BUG: unable to handle page fault for address: ffff97793ff47000
    RIP: ima_restore_measurement_list+0xdc/0x45a
    #PF: error_code(0x0000)  not-present page

This happens on x86_64 only, as this is already fixed in aarch64 in
commit: cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds")


This patch (of 3):

When the second-stage kernel is booted with a limiting command line (e.g.
"mem=<size>"), the IMA measurement buffer handed over from the previous
kernel may fall outside the addressable RAM of the new kernel.  Accessing
such a buffer can fault during early restore.

Introduce a small generic helper, ima_validate_range(), which verifies
that a physical [start, end] range for the previous-kernel IMA buffer lies
within addressable memory:
	- On x86, use pfn_range_is_mapped().
	- On OF based architectures, use page_is_ram().

Link: https://lkml.kernel.org/r/20251231061609.907170-1-harshit.m.mogalapalli@oracle.com
Link: https://lkml.kernel.org/r/20251231061609.907170-2-harshit.m.mogalapalli@oracle.com
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: guoweikang <guoweikang.kernel@gmail.com>
Cc: Henry Willard <henry.willard@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Jonathan McDowell <noodles@fb.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Webb <paul.x.webb@oracle.com>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Yifei Liu <yifei.l.liu@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ima.h                |  1 +
 security/integrity/ima/ima_kexec.c | 35 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 8e29cb4e6a01..abf8923f8fc5 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -69,6 +69,7 @@ static inline int ima_measure_critical_data(const char *event_label,
 #ifdef CONFIG_HAVE_IMA_KEXEC
 int __init ima_free_kexec_buffer(void);
 int __init ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_validate_range(phys_addr_t phys, size_t size);
 #endif
 
 #ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 5beb69edd12f..36a34c54de58 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -12,6 +12,8 @@
 #include <linux/kexec.h>
 #include <linux/of.h>
 #include <linux/ima.h>
+#include <linux/mm.h>
+#include <linux/overflow.h>
 #include <linux/reboot.h>
 #include <asm/page.h>
 #include "ima.h"
@@ -294,3 +296,36 @@ void __init ima_load_kexec_buffer(void)
 		pr_debug("Error restoring the measurement list: %d\n", rc);
 	}
 }
+
+/*
+ * ima_validate_range - verify a physical buffer lies in addressable RAM
+ * @phys: physical start address of the buffer from previous kernel
+ * @size: size of the buffer
+ *
+ * On success return 0. On failure returns -EINVAL so callers can skip
+ * restoring.
+ */
+int ima_validate_range(phys_addr_t phys, size_t size)
+{
+	unsigned long start_pfn, end_pfn;
+	phys_addr_t end_phys;
+
+	if (check_add_overflow(phys, (phys_addr_t)size - 1, &end_phys))
+		return -EINVAL;
+
+	start_pfn = PHYS_PFN(phys);
+	end_pfn = PHYS_PFN(end_phys);
+
+#ifdef CONFIG_X86
+	if (!pfn_range_is_mapped(start_pfn, end_pfn))
+#else
+	if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn))
+#endif
+	{
+		pr_warn("IMA: previous kernel measurement buffer %pa (size 0x%zx) lies outside available memory\n",
+			&phys, size);
+		return -EINVAL;
+	}
+
+	return 0;
+}

From 4d02233235ed0450de9c10fcdcf3484e3c9401ce Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Tue, 30 Dec 2025 22:16:08 -0800
Subject: [PATCH 040/107] of/kexec: refactor ima_get_kexec_buffer() to use
 ima_validate_range()

Refactor the OF/DT ima_get_kexec_buffer() to use a generic helper to
validate the address range.  No functional change intended.

Link: https://lkml.kernel.org/r/20251231061609.907170-3-harshit.m.mogalapalli@oracle.com
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: guoweikang <guoweikang.kernel@gmail.com>
Cc: Henry Willard <henry.willard@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Jonathan McDowell <noodles@fb.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Webb <paul.x.webb@oracle.com>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Yifei Liu <yifei.l.liu@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/of/kexec.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 1ee2d31816ae..c4cf3552c018 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -128,7 +128,6 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size)
 {
 	int ret, len;
 	unsigned long tmp_addr;
-	unsigned long start_pfn, end_pfn;
 	size_t tmp_size;
 	const void *prop;
 
@@ -144,17 +143,9 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size)
 	if (!tmp_size)
 		return -ENOENT;
 
-	/*
-	 * Calculate the PFNs for the buffer and ensure
-	 * they are with in addressable memory.
-	 */
-	start_pfn = PHYS_PFN(tmp_addr);
-	end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1);
-	if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) {
-		pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n",
-			tmp_addr, tmp_size);
-		return -EINVAL;
-	}
+	ret = ima_validate_range(tmp_addr, tmp_size);
+	if (ret)
+		return ret;
 
 	*addr = __va(tmp_addr);
 	*size = tmp_size;

From c5489d04337b47e93c0623e8145fcba3f5739efd Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Tue, 30 Dec 2025 22:16:09 -0800
Subject: [PATCH 041/107] x86/kexec: add a sanity check on previous kernel's
 ima kexec buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>", the physical range that contains the carried
over IMA measurement list may fall outside the truncated RAM leading to a
kernel panic.

    BUG: unable to handle page fault for address: ffff97793ff47000
    RIP: ima_restore_measurement_list+0xdc/0x45a
    #PF: error_code(0x0000) – not-present page

Other architectures already validate the range with page_is_ram(), as done
in commit cbf9c4b9617b ("of: check previous kernel's ima-kexec-buffer
against memory bounds") do a similar check on x86.

Without carrying the measurement list across kexec, the attestation
would fail.

Link: https://lkml.kernel.org/r/20251231061609.907170-4-harshit.m.mogalapalli@oracle.com
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Reported-by: Paul Webb <paul.x.webb@oracle.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: guoweikang <guoweikang.kernel@gmail.com>
Cc: Henry Willard <henry.willard@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Jonathan McDowell <noodles@fb.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Yifei Liu <yifei.l.liu@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/setup.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1b2edd07a3e1..383d4a4784f5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -439,9 +439,15 @@ int __init ima_free_kexec_buffer(void)
 
 int __init ima_get_kexec_buffer(void **addr, size_t *size)
 {
+	int ret;
+
 	if (!ima_kexec_buffer_size)
 		return -ENOENT;
 
+	ret = ima_validate_range(ima_kexec_buffer_phys, ima_kexec_buffer_size);
+	if (ret)
+		return ret;
+
 	*addr = __va(ima_kexec_buffer_phys);
 	*size = ima_kexec_buffer_size;
 

From 77983f611fa61d749db4579d20908663e5a0895e Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@inria.fr>
Date: Tue, 30 Dec 2025 15:25:13 +0100
Subject: [PATCH 042/107] ocfs2: adjust function name reference

There is no function dlm_mast_regions().  However, dlm_match_regions() is
passed the buffer "local", which it uses internally, so it seems like
dlm_match_regions() was intended.

Link: https://lkml.kernel.org/r/20251230142513.95467-1-Julia.Lawall@inria.fr
Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmdomain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 2347a50f079b..cf3ca2f597c2 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1105,7 +1105,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
 	mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
 	     qr->qr_domain);
 
-	/* buffer used in dlm_mast_regions() */
+	/* buffer used in dlm_match_regions() */
 	local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
 	if (!local)
 		return -ENOMEM;

From a7e53bfb43667dd0eaf046c1725105e2cfe3be7c Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 5 Jan 2026 18:58:34 +0200
Subject: [PATCH 043/107] kho/abi: luo: make generated documentation more
 coherent

Patch series "kho: ABI headers and Documentation updates".

LUO started adding KHO ABI headers to include/linux/kho/abi, but the core
parts of KHO and memblock are still using the old way for descriptions on
their ABIs.

Let's consolidate all things KHO in include/linux/kho/abi.

And while on that, make some documentation updates to have more coherent
KHO docs.


This patch (of 6):

LUO ABI description starts with "This header defines" which is fine in the
header but reads weird in the generated html documentation.

Update it to make the generated documentation coherent.

Link: https://lkml.kernel.org/r/20260105165839.285270-1-rppt@kernel.org
Link: https://lkml.kernel.org/r/20260105165839.285270-2-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Jason Miu <jasonmiu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kho/abi/luo.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h
index bb099c92e469..beb86847b544 100644
--- a/include/linux/kho/abi/luo.h
+++ b/include/linux/kho/abi/luo.h
@@ -8,10 +8,10 @@
 /**
  * DOC: Live Update Orchestrator ABI
  *
- * This header defines the stable Application Binary Interface used by the
- * Live Update Orchestrator to pass state from a pre-update kernel to a
- * post-update kernel. The ABI is built upon the Kexec HandOver framework
- * and uses a Flattened Device Tree to describe the preserved data.
+ * Live Update Orchestrator uses the stable Application Binary Interface
+ * defined below to pass state from a pre-update kernel to a post-update
+ * kernel. The ABI is built upon the Kexec HandOver framework and uses a
+ * Flattened Device Tree to describe the preserved data.
  *
  * This interface is a contract. Any modification to the FDT structure, node
  * properties, compatible strings, or the layout of the `__packed` serialization

From 32cb2729c956162e5ca96fe5509b38eb9561e8c0 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 5 Jan 2026 18:58:35 +0200
Subject: [PATCH 044/107] kho/abi: memfd: make generated documentation more
 coherent

memfd preservation ABI description starts with "This header defines" which
is fine in the header but reads weird in the generated html documentation.

Update it to make the generated documentation coherent.

Link: https://lkml.kernel.org/r/20260105165839.285270-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Jason Miu <jasonmiu@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kho/abi/memfd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h
index da7d063474a1..c211c31334a3 100644
--- a/include/linux/kho/abi/memfd.h
+++ b/include/linux/kho/abi/memfd.h
@@ -17,8 +17,8 @@
 /**
  * DOC: memfd Live Update ABI
  *
- * This header defines the ABI for preserving the state of a memfd across a
- * kexec reboot using the LUO.
+ * memfd uses the ABI defined below for preserving its state across a kexec
+ * reboot using the LUO.
  *
  * The state is serialized into a packed structure `struct memfd_luo_ser`
  * which is handed over to the next kernel via the KHO mechanism.

From a6f4e56828029bc3b9a79910b38026fd2958915e Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 5 Jan 2026 18:58:36 +0200
Subject: [PATCH 045/107] kho: docs: combine concepts and FDT documentation

Currently index.rst in KHO documentation looks empty and sad as it only
contains links to "Kexec Handover Concepts" and "KHO FDT" chapters.

Inline contents of these chapters into index.rst to provide a single
coherent chapter describing KHO.

While on it, drop parts of the KHO FDT description that will be superseded
by addition of KHO ABI documentation.

[rppt@kernel.org: fix Documentation/core-api/kho/index.rst]
  Link: https://lkml.kernel.org/r/aV4bnHlBXGpT_FMc@kernel.org
Link: https://lkml.kernel.org/r/20260105165839.285270-4-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Jason Miu <jasonmiu@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Pratyush Yadav <pratyush@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/kho/concepts.rst | 74 -----------------------
 Documentation/core-api/kho/fdt.rst      | 80 -------------------------
 Documentation/core-api/kho/index.rst    | 77 ++++++++++++++++++++++--
 Documentation/core-api/liveupdate.rst   |  2 +-
 Documentation/mm/memfd_preservation.rst |  2 +-
 kernel/liveupdate/luo_core.c            |  3 +-
 6 files changed, 75 insertions(+), 163 deletions(-)
 delete mode 100644 Documentation/core-api/kho/concepts.rst
 delete mode 100644 Documentation/core-api/kho/fdt.rst

diff --git a/Documentation/core-api/kho/concepts.rst b/Documentation/core-api/kho/concepts.rst
deleted file mode 100644
index d626d1dbd678..000000000000
--- a/Documentation/core-api/kho/concepts.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0-or-later
-.. _kho-concepts:
-
-=======================
-Kexec Handover Concepts
-=======================
-
-Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
-regions, which could contain serialized system states, across kexec.
-
-It introduces multiple concepts:
-
-KHO FDT
-=======
-
-Every KHO kexec carries a KHO specific flattened device tree (FDT) blob
-that describes preserved memory regions. These regions contain either
-serialized subsystem states, or in-memory data that shall not be touched
-across kexec. After KHO, subsystems can retrieve and restore preserved
-memory regions from KHO FDT.
-
-KHO only uses the FDT container format and libfdt library, but does not
-adhere to the same property semantics that normal device trees do: Properties
-are passed in native endianness and standardized properties like ``regs`` and
-``ranges`` do not exist, hence there are no ``#...-cells`` properties.
-
-KHO is still under development. The FDT schema is unstable and would change
-in the future.
-
-Scratch Regions
-===============
-
-To boot into kexec, we need to have a physically contiguous memory range that
-contains no handed over memory. Kexec then places the target kernel and initrd
-into that region. The new kernel exclusively uses this region for memory
-allocations before during boot up to the initialization of the page allocator.
-
-We guarantee that we always have such regions through the scratch regions: On
-first boot KHO allocates several physically contiguous memory regions. Since
-after kexec these regions will be used by early memory allocations, there is a
-scratch region per NUMA node plus a scratch region to satisfy allocations
-requests that do not require particular NUMA node assignment.
-By default, size of the scratch region is calculated based on amount of memory
-allocated during boot. The ``kho_scratch`` kernel command line option may be
-used to explicitly define size of the scratch regions.
-The scratch regions are declared as CMA when page allocator is initialized so
-that their memory can be used during system lifetime. CMA gives us the
-guarantee that no handover pages land in that region, because handover pages
-must be at a static physical memory location and CMA enforces that only
-movable pages can be located inside.
-
-After KHO kexec, we ignore the ``kho_scratch`` kernel command line option and
-instead reuse the exact same region that was originally allocated. This allows
-us to recursively execute any amount of KHO kexecs. Because we used this region
-for boot memory allocations and as target memory for kexec blobs, some parts
-of that memory region may be reserved. These reservations are irrelevant for
-the next KHO, because kexec can overwrite even the original kernel.
-
-.. _kho-finalization-phase:
-
-KHO finalization phase
-======================
-
-To enable user space based kexec file loader, the kernel needs to be able to
-provide the FDT that describes the current kernel's state before
-performing the actual kexec. The process of generating that FDT is
-called serialization. When the FDT is generated, some properties
-of the system may become immutable because they are already written down
-in the FDT. That state is called the KHO finalization phase.
-
-Public API
-==========
-.. kernel-doc:: kernel/liveupdate/kexec_handover.c
-   :export:
diff --git a/Documentation/core-api/kho/fdt.rst b/Documentation/core-api/kho/fdt.rst
deleted file mode 100644
index 62505285d60d..000000000000
--- a/Documentation/core-api/kho/fdt.rst
+++ /dev/null
@@ -1,80 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0-or-later
-
-=======
-KHO FDT
-=======
-
-KHO uses the flattened device tree (FDT) container format and libfdt
-library to create and parse the data that is passed between the
-kernels. The properties in KHO FDT are stored in native format.
-It includes the physical address of an in-memory structure describing
-all preserved memory regions, as well as physical addresses of KHO users'
-own FDTs. Interpreting those sub FDTs is the responsibility of KHO users.
-
-KHO nodes and properties
-========================
-
-Property ``preserved-memory-map``
----------------------------------
-
-KHO saves a special property named ``preserved-memory-map`` under the root node.
-This node contains the physical address of an in-memory structure for KHO to
-preserve memory regions across kexec.
-
-Property ``compatible``
------------------------
-
-The ``compatible`` property determines compatibility between the kernel
-that created the KHO FDT and the kernel that attempts to load it.
-If the kernel that loads the KHO FDT is not compatible with it, the entire
-KHO process will be bypassed.
-
-Property ``fdt``
-----------------
-
-Generally, a KHO user serialize its state into its own FDT and instructs
-KHO to preserve the underlying memory, such that after kexec, the new kernel
-can recover its state from the preserved FDT.
-
-A KHO user thus can create a node in KHO root tree and save the physical address
-of its own FDT in that node's property ``fdt`` .
-
-Examples
-========
-
-The following example demonstrates KHO FDT that preserves two memory
-regions created with ``reserve_mem`` kernel command line parameter::
-
-  /dts-v1/;
-
-  / {
-  	compatible = "kho-v1";
-
-	preserved-memory-map = <0x40be16 0x1000000>;
-
-  	memblock {
-		fdt = <0x1517 0x1000000>;
-  	};
-  };
-
-where the ``memblock`` node contains an FDT that is requested by the
-subsystem memblock for preservation. The FDT contains the following
-serialized data::
-
-  /dts-v1/;
-
-  / {
-  	compatible = "memblock-v1";
-
-  	n1 {
-  		compatible = "reserve-mem-v1";
-  		start = <0xc06b 0x4000000>;
-  		size = <0x04 0x00>;
-  	};
-
-  	n2 {
-  		compatible = "reserve-mem-v1";
-  		start = <0xc067 0x4000000>;
-  		size = <0x04 0x00>;
-  	};
-  };
diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst
index 0c63b0c5c143..1733b3c3e976 100644
--- a/Documentation/core-api/kho/index.rst
+++ b/Documentation/core-api/kho/index.rst
@@ -1,13 +1,80 @@
 .. SPDX-License-Identifier: GPL-2.0-or-later
 
+.. _kho-concepts:
+
 ========================
 Kexec Handover Subsystem
 ========================
 
-.. toctree::
-   :maxdepth: 1
+Overview
+========
 
-   concepts
-   fdt
+Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
+regions, which could contain serialized system states, across kexec.
 
-.. only::  subproject and html
+KHO uses :ref:`flattened device tree (FDT) <kho_fdt>` to pass information about
+the preserved state from pre-exec kernel to post-kexec kernel and :ref:`scratch
+memory regions <kho_scratch>` to ensure integrity of the preserved memory.
+
+.. _kho_fdt:
+
+KHO FDT
+=======
+Every KHO kexec carries a KHO specific flattened device tree (FDT) blob that
+describes the preserved state. The FDT includes properties describing preserved
+memory regions and nodes that hold subsystem specific state.
+
+The preserved memory regions contain either serialized subsystem states, or
+in-memory data that shall not be touched across kexec. After KHO, subsystems
+can retrieve and restore the preserved state from KHO FDT.
+
+Subsystems participating in KHO can define their own format for state
+serialization and preservation.
+
+.. _kho_scratch:
+
+Scratch Regions
+===============
+
+To boot into kexec, we need to have a physically contiguous memory range that
+contains no handed over memory. Kexec then places the target kernel and initrd
+into that region. The new kernel exclusively uses this region for memory
+allocations before during boot up to the initialization of the page allocator.
+
+We guarantee that we always have such regions through the scratch regions: On
+first boot KHO allocates several physically contiguous memory regions. Since
+after kexec these regions will be used by early memory allocations, there is a
+scratch region per NUMA node plus a scratch region to satisfy allocations
+requests that do not require particular NUMA node assignment.
+By default, size of the scratch region is calculated based on amount of memory
+allocated during boot. The ``kho_scratch`` kernel command line option may be
+used to explicitly define size of the scratch regions.
+The scratch regions are declared as CMA when page allocator is initialized so
+that their memory can be used during system lifetime. CMA gives us the
+guarantee that no handover pages land in that region, because handover pages
+must be at a static physical memory location and CMA enforces that only
+movable pages can be located inside.
+
+After KHO kexec, we ignore the ``kho_scratch`` kernel command line option and
+instead reuse the exact same region that was originally allocated. This allows
+us to recursively execute any amount of KHO kexecs. Because we used this region
+for boot memory allocations and as target memory for kexec blobs, some parts
+of that memory region may be reserved. These reservations are irrelevant for
+the next KHO, because kexec can overwrite even the original kernel.
+
+.. _kho-finalization-phase:
+
+KHO finalization phase
+======================
+
+To enable user space based kexec file loader, the kernel needs to be able to
+provide the FDT that describes the current kernel's state before
+performing the actual kexec. The process of generating that FDT is
+called serialization. When the FDT is generated, some properties
+of the system may become immutable because they are already written down
+in the FDT. That state is called the KHO finalization phase.
+
+See Also
+========
+
+- :doc:`/admin-guide/mm/kho`
diff --git a/Documentation/core-api/liveupdate.rst b/Documentation/core-api/liveupdate.rst
index 7960eb15a81f..e2aba13494cf 100644
--- a/Documentation/core-api/liveupdate.rst
+++ b/Documentation/core-api/liveupdate.rst
@@ -58,4 +58,4 @@ See Also
 ========
 
 - :doc:`Live Update uAPI </userspace-api/liveupdate>`
-- :doc:`/core-api/kho/concepts`
+- :doc:`/core-api/kho/index`
diff --git a/Documentation/mm/memfd_preservation.rst b/Documentation/mm/memfd_preservation.rst
index 66e0fb6d5ef0..a8a5b476afd3 100644
--- a/Documentation/mm/memfd_preservation.rst
+++ b/Documentation/mm/memfd_preservation.rst
@@ -20,4 +20,4 @@ See Also
 ========
 
 - :doc:`/core-api/liveupdate`
-- :doc:`/core-api/kho/concepts`
+- :doc:`/core-api/kho/index`
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index 944663d99dd9..a26c093eb8eb 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -35,8 +35,7 @@
  * iommu, interrupts, vfio, participating filesystems, and memory management.
  *
  * LUO uses Kexec Handover to transfer memory state from the current kernel to
- * the next kernel. For more details see
- * Documentation/core-api/kho/concepts.rst.
+ * the next kernel. For more details see Documentation/core-api/kho/index.rst.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

From 5e1ea1e27b6ff237122ac6cb30e0b8ea4618f75f Mon Sep 17 00:00:00 2001
From: Jason Miu <jasonmiu@google.com>
Date: Mon, 5 Jan 2026 18:58:37 +0200
Subject: [PATCH 046/107] kho: introduce KHO FDT ABI header

Introduce the `include/linux/kho/abi/kexec_handover.h` header file, which
defines the stable ABI for the KHO mechanism.  This header specifies how
preserved data is passed between kernels using an FDT.

The ABI contract includes the FDT structure, node properties, and the
"kho-v1" compatible string.  By centralizing these definitions, this
header serves as the foundational agreement for inter-kernel communication
of preserved states, ensuring forward compatibility and preventing
misinterpretation of data across kexec transitions.

Since the ABI definitions are now centralized in the header files, the
YAML files that previously described the FDT interfaces are redundant.
These redundant files have therefore been removed.

Link: https://lkml.kernel.org/r/20260105165839.285270-5-rppt@kernel.org
Signed-off-by: Jason Miu <jasonmiu@google.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/kho/abi.rst            | 16 ++++
 Documentation/core-api/kho/bindings/kho.yaml  | 43 ----------
 .../core-api/kho/bindings/sub-fdt.yaml        | 27 ------
 Documentation/core-api/kho/index.rst          |  9 ++
 MAINTAINERS                                   |  1 +
 include/linux/kho/abi/kexec_handover.h        | 85 +++++++++++++++++++
 kernel/liveupdate/kexec_handover.c            | 19 ++---
 7 files changed, 120 insertions(+), 80 deletions(-)
 create mode 100644 Documentation/core-api/kho/abi.rst
 delete mode 100644 Documentation/core-api/kho/bindings/kho.yaml
 delete mode 100644 Documentation/core-api/kho/bindings/sub-fdt.yaml
 create mode 100644 include/linux/kho/abi/kexec_handover.h

diff --git a/Documentation/core-api/kho/abi.rst b/Documentation/core-api/kho/abi.rst
new file mode 100644
index 000000000000..a1ee0f481727
--- /dev/null
+++ b/Documentation/core-api/kho/abi.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+==================
+Kexec Handover ABI
+==================
+
+Core Kexec Handover ABI
+========================
+
+.. kernel-doc:: include/linux/kho/abi/kexec_handover.h
+   :doc: Kexec Handover ABI
+
+See Also
+========
+
+- :doc:`/admin-guide/mm/kho`
diff --git a/Documentation/core-api/kho/bindings/kho.yaml b/Documentation/core-api/kho/bindings/kho.yaml
deleted file mode 100644
index 11e8ab7b219d..000000000000
--- a/Documentation/core-api/kho/bindings/kho.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-title: Kexec HandOver (KHO) root tree
-
-maintainers:
-  - Mike Rapoport <rppt@kernel.org>
-  - Changyuan Lyu <changyuanl@google.com>
-
-description: |
-  System memory preserved by KHO across kexec.
-
-properties:
-  compatible:
-    enum:
-      - kho-v1
-
-  preserved-memory-map:
-    description: |
-      physical address (u64) of an in-memory structure describing all preserved
-      folios and memory ranges.
-
-patternProperties:
-  "$[0-9a-f_]+^":
-    $ref: sub-fdt.yaml#
-    description: physical address of a KHO user's own FDT.
-
-required:
-  - compatible
-  - preserved-memory-map
-
-additionalProperties: false
-
-examples:
-  - |
-    kho {
-        compatible = "kho-v1";
-        preserved-memory-map = <0xf0be16 0x1000000>;
-
-        memblock {
-                fdt = <0x80cc16 0x1000000>;
-        };
-    };
diff --git a/Documentation/core-api/kho/bindings/sub-fdt.yaml b/Documentation/core-api/kho/bindings/sub-fdt.yaml
deleted file mode 100644
index b9a3d2d24850..000000000000
--- a/Documentation/core-api/kho/bindings/sub-fdt.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-title: KHO users' FDT address
-
-maintainers:
-  - Mike Rapoport <rppt@kernel.org>
-  - Changyuan Lyu <changyuanl@google.com>
-
-description: |
-  Physical address of an FDT blob registered by a KHO user.
-
-properties:
-  fdt:
-    description: |
-      physical address (u64) of an FDT blob.
-
-required:
-  - fdt
-
-additionalProperties: false
-
-examples:
-  - |
-    memblock {
-            fdt = <0x80cc16 0x1000000>;
-    };
diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst
index 1733b3c3e976..dcc6a36cc134 100644
--- a/Documentation/core-api/kho/index.rst
+++ b/Documentation/core-api/kho/index.rst
@@ -31,6 +31,15 @@ can retrieve and restore the preserved state from KHO FDT.
 Subsystems participating in KHO can define their own format for state
 serialization and preservation.
 
+KHO FDT and structures defined by the subsystems form an ABI between pre-kexec
+and post-kexec kernels. This ABI is defined by header files in
+``include/linux/kho/abi`` directory.
+
+.. toctree::
+   :maxdepth: 1
+
+   abi.rst
+
 .. _kho_scratch:
 
 Scratch Regions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4dcbcb5c14f0..9d724a7ade71 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13968,6 +13968,7 @@ F:	Documentation/admin-guide/mm/kho.rst
 F:	Documentation/core-api/kho/*
 F:	include/linux/kexec_handover.h
 F:	include/linux/kho/
+F:	include/linux/kho/abi/
 F:	kernel/liveupdate/kexec_handover*
 F:	lib/test_kho.c
 F:	tools/testing/selftests/kho/
diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h
new file mode 100644
index 000000000000..af9fa8c134c7
--- /dev/null
+++ b/include/linux/kho/abi/kexec_handover.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Copyright (C) 2023 Alexander Graf <graf@amazon.com>
+ * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
+ * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
+ * Copyright (C) 2025 Google LLC, Jason Miu <jasonmiu@google.com>
+ */
+
+#ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H
+#define _LINUX_KHO_ABI_KEXEC_HANDOVER_H
+
+/**
+ * DOC: Kexec Handover ABI
+ *
+ * Kexec Handover uses the ABI defined below for passing preserved data from
+ * one kernel to the next.
+ * The ABI uses Flattened Device Tree (FDT) format. The first kernel creates an
+ * FDT which is then passed to the next kernel during a kexec handover.
+ *
+ * This interface is a contract. Any modification to the FDT structure, node
+ * properties, compatible string, or the layout of the data structures
+ * referenced here constitutes a breaking change. Such changes require
+ * incrementing the version number in KHO_FDT_COMPATIBLE to prevent a new kernel
+ * from misinterpreting data from an older kernel. Changes are allowed provided
+ * the compatibility version is incremented. However, backward/forward
+ * compatibility is only guaranteed for kernels supporting the same ABI version.
+ *
+ * FDT Structure Overview:
+ *   The FDT serves as a central registry for physical
+ *   addresses of preserved data structures and sub-FDTs. The first kernel
+ *   populates this FDT with references to memory regions and other FDTs that
+ *   need to persist across the kexec transition. The subsequent kernel then
+ *   parses this FDT to locate and restore the preserved data.::
+ *
+ *     / {
+ *         compatible = "kho-v1";
+ *
+ *         preserved-memory-map = <0x...>;
+ *
+ *         <subnode-name-1> {
+ *             fdt = <0x...>;
+ *         };
+ *
+ *         <subnode-name-2> {
+ *             fdt = <0x...>;
+ *         };
+ *               ... ...
+ *         <subnode-name-N> {
+ *             fdt = <0x...>;
+ *         };
+ *     };
+ *
+ *   Root KHO Node (/):
+ *     - compatible: "kho-v1"
+ *
+ *       Indentifies the overall KHO ABI version.
+ *
+ *     - preserved-memory-map: u64
+ *
+ *       Physical memory address pointing to the root of the
+ *       preserved memory map data structure.
+ *
+ *   Subnodes (<subnode-name-N>):
+ *     Subnodes can also be added to the root node to
+ *     describe other preserved data blobs. The <subnode-name-N>
+ *     is provided by the subsystem that uses KHO for preserving its
+ *     data.
+ *
+ *     - fdt: u64
+ *
+ *       Physical address pointing to a subnode FDT blob that is also
+ *       being preserved.
+ */
+
+/* The compatible string for the KHO FDT root node. */
+#define KHO_FDT_COMPATIBLE "kho-v1"
+
+/* The FDT property for the preserved memory map. */
+#define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map"
+
+/* The FDT property for sub-FDTs. */
+#define KHO_FDT_SUB_TREE_PROP_NAME "fdt"
+
+#endif	/* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index d4482b6e3cae..8f57d6e040af 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -15,6 +15,7 @@
 #include <linux/count_zeros.h>
 #include <linux/kexec.h>
 #include <linux/kexec_handover.h>
+#include <linux/kho/abi/kexec_handover.h>
 #include <linux/libfdt.h>
 #include <linux/list.h>
 #include <linux/memblock.h>
@@ -33,10 +34,7 @@
 #include "../kexec_internal.h"
 #include "kexec_handover_internal.h"
 
-#define KHO_FDT_COMPATIBLE "kho-v1"
-#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map"
-#define PROP_SUB_FDT "fdt"
-
+/* The magic token for preserved pages */
 #define KHO_PAGE_MAGIC 0x4b484f50U /* ASCII for 'KHOP' */
 
 /*
@@ -378,7 +376,7 @@ static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk)
 	void *ptr;
 	u64 phys;
 
-	ptr = fdt_getprop_w(kho_out.fdt, 0, PROP_PRESERVED_MEMORY_MAP, NULL);
+	ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL);
 
 	/* Check and discard previous memory map */
 	phys = get_unaligned((u64 *)ptr);
@@ -466,7 +464,7 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
 	const void *mem_ptr;
 	int len;
 
-	mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
+	mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
 	if (!mem_ptr || len != sizeof(u64)) {
 		pr_err("failed to get preserved memory bitmaps\n");
 		return 0;
@@ -727,7 +725,8 @@ int kho_add_subtree(const char *name, void *fdt)
 		goto out_pack;
 	}
 
-	err = fdt_setprop(root_fdt, off, PROP_SUB_FDT, &phys, sizeof(phys));
+	err = fdt_setprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME,
+			  &phys, sizeof(phys));
 	if (err < 0)
 		goto out_pack;
 
@@ -758,7 +757,7 @@ void kho_remove_subtree(void *fdt)
 		const u64 *val;
 		int len;
 
-		val = fdt_getprop(root_fdt, off, PROP_SUB_FDT, &len);
+		val = fdt_getprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, &len);
 		if (!val || len != sizeof(phys_addr_t))
 			continue;
 
@@ -1305,7 +1304,7 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
 	if (offset < 0)
 		return -ENOENT;
 
-	val = fdt_getprop(fdt, offset, PROP_SUB_FDT, &len);
+	val = fdt_getprop(fdt, offset, KHO_FDT_SUB_TREE_PROP_NAME, &len);
 	if (!val || len != sizeof(*val))
 		return -EINVAL;
 
@@ -1325,7 +1324,7 @@ static __init int kho_out_fdt_setup(void)
 	err |= fdt_finish_reservemap(root);
 	err |= fdt_begin_node(root, "");
 	err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
-	err |= fdt_property(root, PROP_PRESERVED_MEMORY_MAP, &empty_mem_map,
+	err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map,
 			    sizeof(empty_mem_map));
 	err |= fdt_end_node(root);
 	err |= fdt_finish(root);

From ac2d8102c4b88713a8fa371d5d802fcff131d6ac Mon Sep 17 00:00:00 2001
From: Jason Miu <jasonmiu@google.com>
Date: Mon, 5 Jan 2026 18:58:38 +0200
Subject: [PATCH 047/107] kho: relocate vmalloc preservation structure to KHO
 ABI header

The `struct kho_vmalloc` defines the in-memory layout for preserving
vmalloc regions across kexec.  This layout is a contract between kernels
and part of the KHO ABI.

To reflect this relationship, the related structs and helper macros are
relocated to the ABI header, `include/linux/kho/abi/kexec_handover.h`.
This move places the structure's definition under the protection of the
KHO_FDT_COMPATIBLE version string.

The structure and its components are now also documented within the ABI
header to describe the contract and prevent ABI breaks.

[rppt@kernel.org: update comment, per Pratyush]
  Link: https://lkml.kernel.org/r/aW_Mqp6HcqLwQImS@kernel.org
Link: https://lkml.kernel.org/r/20260105165839.285270-6-rppt@kernel.org
Signed-off-by: Jason Miu <jasonmiu@google.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/kho/abi.rst     |  6 ++
 include/linux/kexec_handover.h         | 27 +--------
 include/linux/kho/abi/kexec_handover.h | 78 ++++++++++++++++++++++++++
 include/linux/kho/abi/memfd.h          |  2 +-
 kernel/liveupdate/kexec_handover.c     | 15 -----
 lib/test_kho.c                         |  1 +
 6 files changed, 88 insertions(+), 41 deletions(-)

diff --git a/Documentation/core-api/kho/abi.rst b/Documentation/core-api/kho/abi.rst
index a1ee0f481727..1d9916adee23 100644
--- a/Documentation/core-api/kho/abi.rst
+++ b/Documentation/core-api/kho/abi.rst
@@ -10,6 +10,12 @@ Core Kexec Handover ABI
 .. kernel-doc:: include/linux/kho/abi/kexec_handover.h
    :doc: Kexec Handover ABI
 
+vmalloc preservation ABI
+========================
+
+.. kernel-doc:: include/linux/kho/abi/kexec_handover.h
+   :doc: Kexec Handover ABI for vmalloc Preservation
+
 See Also
 ========
 
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 5f7b9de97e8d..a56ff3ffaf17 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -11,34 +11,11 @@ struct kho_scratch {
 	phys_addr_t size;
 };
 
+struct kho_vmalloc;
+
 struct folio;
 struct page;
 
-#define DECLARE_KHOSER_PTR(name, type) \
-	union {                        \
-		phys_addr_t phys;      \
-		type ptr;              \
-	} name
-#define KHOSER_STORE_PTR(dest, val)               \
-	({                                        \
-		typeof(val) v = val;              \
-		typecheck(typeof((dest).ptr), v); \
-		(dest).phys = virt_to_phys(v);    \
-	})
-#define KHOSER_LOAD_PTR(src)                                                 \
-	({                                                                   \
-		typeof(src) s = src;                                         \
-		(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
-	})
-
-struct kho_vmalloc_chunk;
-struct kho_vmalloc {
-	DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
-	unsigned int total_pages;
-	unsigned short flags;
-	unsigned short order;
-};
-
 #ifdef CONFIG_KEXEC_HANDOVER
 bool kho_is_enabled(void);
 bool is_kho_boot(void);
diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h
index af9fa8c134c7..2201a0d2c159 100644
--- a/include/linux/kho/abi/kexec_handover.h
+++ b/include/linux/kho/abi/kexec_handover.h
@@ -10,6 +10,8 @@
 #ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H
 #define _LINUX_KHO_ABI_KEXEC_HANDOVER_H
 
+#include <linux/types.h>
+
 /**
  * DOC: Kexec Handover ABI
  *
@@ -82,4 +84,80 @@
 /* The FDT property for sub-FDTs. */
 #define KHO_FDT_SUB_TREE_PROP_NAME "fdt"
 
+/**
+ * DOC: Kexec Handover ABI for vmalloc Preservation
+ *
+ * The Kexec Handover ABI for preserving vmalloc'ed memory is defined by
+ * a set of structures and helper macros. The layout of these structures is a
+ * stable contract between kernels and is versioned by the KHO_FDT_COMPATIBLE
+ * string.
+ *
+ * The preservation is managed through a main descriptor &struct kho_vmalloc,
+ * which points to a linked list of &struct kho_vmalloc_chunk structures. These
+ * chunks contain the physical addresses of the preserved pages, allowing the
+ * next kernel to reconstruct the vmalloc area with the same content and layout.
+ * Helper macros are also defined for storing and loading pointers within
+ * these structures.
+ */
+
+/* Helper macro to define a union for a serializable pointer. */
+#define DECLARE_KHOSER_PTR(name, type)	\
+	union {                        \
+		u64 phys;              \
+		type ptr;              \
+	} name
+
+/* Stores the physical address of a serializable pointer. */
+#define KHOSER_STORE_PTR(dest, val)               \
+	({                                        \
+		typeof(val) v = val;              \
+		typecheck(typeof((dest).ptr), v); \
+		(dest).phys = virt_to_phys(v);    \
+	})
+
+/* Loads the stored physical address back to a pointer. */
+#define KHOSER_LOAD_PTR(src)						\
+	({                                                                   \
+		typeof(src) s = src;                                         \
+		(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
+	})
+
+/*
+ * This header is embedded at the beginning of each `kho_vmalloc_chunk`
+ * and contains a pointer to the next chunk in the linked list,
+ * stored as a physical address for handover.
+ */
+struct kho_vmalloc_hdr {
+	DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
+};
+
+#define KHO_VMALLOC_SIZE				\
+	((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
+	 sizeof(u64))
+
+/*
+ * Each chunk is a single page and is part of a linked list that describes
+ * a preserved vmalloc area. It contains the header with the link to the next
+ * chunk and a zero terminated array of physical addresses of the pages that
+ * make up the preserved vmalloc area.
+ */
+struct kho_vmalloc_chunk {
+	struct kho_vmalloc_hdr hdr;
+	u64 phys[KHO_VMALLOC_SIZE];
+};
+
+static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
+
+/*
+ * Describes a preserved vmalloc memory area, including the
+ * total number of pages, allocation flags, page order, and a pointer to the
+ * first chunk of physical page addresses.
+ */
+struct kho_vmalloc {
+	DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
+	unsigned int total_pages;
+	unsigned short flags;
+	unsigned short order;
+};
+
 #endif	/* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */
diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h
index c211c31334a3..68cb6303b846 100644
--- a/include/linux/kho/abi/memfd.h
+++ b/include/linux/kho/abi/memfd.h
@@ -12,7 +12,7 @@
 #define _LINUX_KHO_ABI_MEMFD_H
 
 #include <linux/types.h>
-#include <linux/kexec_handover.h>
+#include <linux/kho/abi/kexec_handover.h>
 
 /**
  * DOC: memfd Live Update ABI
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 8f57d6e040af..66fcdda0ebdc 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -876,21 +876,6 @@ void kho_unpreserve_pages(struct page *page, unsigned int nr_pages)
 }
 EXPORT_SYMBOL_GPL(kho_unpreserve_pages);
 
-struct kho_vmalloc_hdr {
-	DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
-};
-
-#define KHO_VMALLOC_SIZE				\
-	((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
-	 sizeof(phys_addr_t))
-
-struct kho_vmalloc_chunk {
-	struct kho_vmalloc_hdr hdr;
-	phys_addr_t phys[KHO_VMALLOC_SIZE];
-};
-
-static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
-
 /* vmalloc flags KHO supports */
 #define KHO_VMALLOC_SUPPORTED_FLAGS	(VM_ALLOC | VM_ALLOW_HUGE_VMAP)
 
diff --git a/lib/test_kho.c b/lib/test_kho.c
index 47de56280795..3431daca6968 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -19,6 +19,7 @@
 #include <linux/printk.h>
 #include <linux/vmalloc.h>
 #include <linux/kexec_handover.h>
+#include <linux/kho/abi/kexec_handover.h>
 
 #include <net/checksum.h>
 

From dd1e79ef6ca188678ece81a77d0076ae7403116c Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 5 Jan 2026 18:58:39 +0200
Subject: [PATCH 048/107] kho/abi: add memblock ABI header

Introduce KHO ABI header describing preservation ABI for memblock's
reserve_mem regions and link the relevant documentation to KHO docs.

[lukas.bulwahn@redhat.com: MAINTAINERS: adjust file entry in MEMBLOCK AND MEMORY MANAGEMENT INITIALIZATION]
  Link: https://lkml.kernel.org/r/20260107090438.22901-1-lukas.bulwahn@redhat.com
[rppt@kernel.org: update reserved_mem node description, per Pratyush]
  Link: https://lkml.kernel.org/r/aW_M-HYZzx5SkbnZ@kernel.org
Link: https://lkml.kernel.org/r/20260105165839.285270-7-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Jason Miu <jasonmiu@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/kho/abi.rst            |  6 ++
 .../kho/bindings/memblock/memblock.yaml       | 39 ----------
 .../kho/bindings/memblock/reserve-mem.yaml    | 40 ----------
 MAINTAINERS                                   |  2 +-
 include/linux/kho/abi/memblock.h              | 73 +++++++++++++++++++
 mm/memblock.c                                 |  4 +-
 6 files changed, 81 insertions(+), 83 deletions(-)
 delete mode 100644 Documentation/core-api/kho/bindings/memblock/memblock.yaml
 delete mode 100644 Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
 create mode 100644 include/linux/kho/abi/memblock.h

diff --git a/Documentation/core-api/kho/abi.rst b/Documentation/core-api/kho/abi.rst
index 1d9916adee23..2e63be3486cf 100644
--- a/Documentation/core-api/kho/abi.rst
+++ b/Documentation/core-api/kho/abi.rst
@@ -16,6 +16,12 @@ vmalloc preservation ABI
 .. kernel-doc:: include/linux/kho/abi/kexec_handover.h
    :doc: Kexec Handover ABI for vmalloc Preservation
 
+memblock preservation ABI
+=========================
+
+.. kernel-doc:: include/linux/kho/abi/memblock.h
+   :doc: memblock kexec handover ABI
+
 See Also
 ========
 
diff --git a/Documentation/core-api/kho/bindings/memblock/memblock.yaml b/Documentation/core-api/kho/bindings/memblock/memblock.yaml
deleted file mode 100644
index d388c28eb91d..000000000000
--- a/Documentation/core-api/kho/bindings/memblock/memblock.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-title: Memblock reserved memory
-
-maintainers:
-  - Mike Rapoport <rppt@kernel.org>
-
-description: |
-  Memblock can serialize its current memory reservations created with
-  reserve_mem command line option across kexec through KHO.
-  The post-KHO kernel can then consume these reservations and they are
-  guaranteed to have the same physical address.
-
-properties:
-  compatible:
-    enum:
-      - reserve-mem-v1
-
-patternProperties:
-  "$[0-9a-f_]+^":
-    $ref: reserve-mem.yaml#
-    description: reserved memory regions
-
-required:
-  - compatible
-
-additionalProperties: false
-
-examples:
-  - |
-    memblock {
-      compatible = "memblock-v1";
-      n1 {
-        compatible = "reserve-mem-v1";
-        start = <0xc06b 0x4000000>;
-        size = <0x04 0x00>;
-      };
-    };
diff --git a/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml b/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
deleted file mode 100644
index 10282d3d1bcd..000000000000
--- a/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-title: Memblock reserved memory regions
-
-maintainers:
-  - Mike Rapoport <rppt@kernel.org>
-
-description: |
-  Memblock can serialize its current memory reservations created with
-  reserve_mem command line option across kexec through KHO.
-  This object describes each such region.
-
-properties:
-  compatible:
-    enum:
-      - reserve-mem-v1
-
-  start:
-    description: |
-      physical address (u64) of the reserved memory region.
-
-  size:
-    description: |
-      size (u64) of the reserved memory region.
-
-required:
-  - compatible
-  - start
-  - size
-
-additionalProperties: false
-
-examples:
-  - |
-    n1 {
-      compatible = "reserve-mem-v1";
-      start = <0xc06b 0x4000000>;
-      size = <0x04 0x00>;
-    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 9d724a7ade71..92b377cd131b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16396,7 +16396,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git for-next
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git fixes
 F:	Documentation/core-api/boot-time-mm.rst
-F:	Documentation/core-api/kho/bindings/memblock/*
+F:	include/linux/kho/abi/memblock.h
 F:	include/linux/memblock.h
 F:	mm/bootmem_info.c
 F:	mm/memblock.c
diff --git a/include/linux/kho/abi/memblock.h b/include/linux/kho/abi/memblock.h
new file mode 100644
index 000000000000..27b042f470e1
--- /dev/null
+++ b/include/linux/kho/abi/memblock.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_KHO_ABI_MEMBLOCK_H
+#define _LINUX_KHO_ABI_MEMBLOCK_H
+
+/**
+ * DOC: memblock kexec handover ABI
+ *
+ * Memblock can serialize its current memory reservations created with
+ * reserve_mem command line option across kexec through KHO.
+ * The post-KHO kernel can then consume these reservations and they are
+ * guaranteed to have the same physical address.
+ *
+ * The state is serialized using Flattened Device Tree (FDT) format. Any
+ * modification to the FDT structure, node properties, or the compatible
+ * strings constitutes a breaking change. Such changes require incrementing the
+ * version number in the relevant `_COMPATIBLE` string to prevent a new kernel
+ * from misinterpreting data from an old kernel.
+ *
+ * Changes are allowed provided the compatibility version is incremented.
+ * However, backward/forward compatibility is only guaranteed for kernels
+ * supporting the same ABI version.
+ *
+ * FDT Structure Overview:
+ *   The entire memblock state is encapsulated within a single KHO entry named
+ *   "memblock".
+ *   This entry contains an FDT with the following layout:
+ *
+ *   .. code-block:: none
+ *
+ *	/ {
+ *		compatible = "memblock-v1";
+ *
+ *		n1 {
+ *			compatible = "reserve-mem-v1";
+ *			start = <0xc06b 0x4000000>;
+ *			size = <0x04 0x00>;
+ *		};
+ *	};
+ *
+ * Main memblock node (/):
+ *
+ *   - compatible: "memblock-v1"
+
+ *     Identifies the overall memblock ABI version.
+ *
+ * reserved_mem node:
+ *   These nodes describe all reserve_mem regions. The node name is the name
+ *   defined by the user for a reserve_mem region.
+ *
+ *   - compatible: "reserve-mem-v1"
+ *
+ *     Identifies the ABI version of reserve_mem descriptions
+ *
+ *   - start: u64
+ *
+ *     Physical address of the reserved memory region.
+ *
+ *   - size: u64
+ *
+ *     size in bytes of the reserved memory region.
+ */
+
+/* Top level memblock FDT node name. */
+#define MEMBLOCK_KHO_FDT "memblock"
+
+/* The compatible string for the memblock FDT root node. */
+#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
+
+/* The compatible string for the reserve_mem FDT nodes. */
+#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
+
+#endif /* _LINUX_KHO_ABI_MEMBLOCK_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index 905d06b16348..6cff515d82f4 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -21,6 +21,7 @@
 #ifdef CONFIG_KEXEC_HANDOVER
 #include <linux/libfdt.h>
 #include <linux/kexec_handover.h>
+#include <linux/kho/abi/memblock.h>
 #endif /* CONFIG_KEXEC_HANDOVER */
 
 #include <asm/sections.h>
@@ -2442,9 +2443,6 @@ int reserve_mem_release_by_name(const char *name)
 }
 
 #ifdef CONFIG_KEXEC_HANDOVER
-#define MEMBLOCK_KHO_FDT "memblock"
-#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
-#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
 
 static int __init reserved_mem_preserve(void)
 {

From dbac35bee8fc844c2d8d6417af874a170a44d41f Mon Sep 17 00:00:00 2001
From: Tomas Glozar <tglozar@redhat.com>
Date: Tue, 6 Jan 2026 15:01:40 +0100
Subject: [PATCH 049/107] lib/Kconfig.debug: fix BOOTPARAM_HUNG_TASK_PANIC
 comment

The comment for CONFIG_BOOTPARAM_HUNG_TASK_PANIC says:

   Say N if unsure.

but since commit 9544f9e6947f ("hung_task: panic when there are more than
N hung tasks at the same time"), N is not a valid value for the option,
leading to a warning at build time:

   .config:11736:warning: symbol value 'n' invalid for BOOTPARAM_HUNG_TASK_PANIC

as well as an error when given to menuconfig.

Fix the comment to say '0' instead of 'N'.

Link: https://lkml.kernel.org/r/20260106140140.136446-1-tglozar@redhat.com
Fixes: 9544f9e6947f ("hung_task: panic when there are more than N hung tasks at the same time")
Signed-off-by: Tomas Glozar <tglozar@redhat.com>
Reported-by: Johnny Mnemonic <jm@machine-hall.org>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Cc: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3a31bbf53425..2122d5cec34d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1274,7 +1274,7 @@ config BOOTPARAM_HUNG_TASK_PANIC
 	  high-availability systems that have uptime guarantees and
 	  where a hung tasks must be resolved ASAP.
 
-	  Say N if unsure.
+	  Say 0 if unsure.
 
 config DETECT_HUNG_TASK_BLOCKER
 	bool "Dump Hung Tasks Blocker"

From c62e7e6444cd75dfea2609646f25c66f28b95082 Mon Sep 17 00:00:00 2001
From: Deepanshu Kartikey <kartikey406@gmail.com>
Date: Sun, 4 Jan 2026 19:05:04 +0530
Subject: [PATCH 050/107] ocfs2: add check for free bits before allocation in
 ocfs2_move_extent()

Add a check to verify the group descriptor has enough free bits before
attempting allocation in ocfs2_move_extent().  This prevents a kernel
BUG_ON crash in ocfs2_block_group_set_bits() when the move_extents ioctl
is called on a crafted or corrupted filesystem.

The existing validation in ocfs2_validate_gd_self() only checks static
metadata consistency (bg_free_bits_count <= bg_bits) when the descriptor
is first read from disk.  However, during move_extents operations,
multiple allocations can exhaust the free bits count below the requested
allocation size, triggering BUG_ON(le16_to_cpu(bg->bg_free_bits_count) <
num_bits).

The debug trace shows the issue clearly:
  - Block group 32 validated with bg_free_bits_count=427
  - Repeated allocations decreased count: 427 -> 171 -> 43 -> ... -> 1
  - Final request for 2 bits with only 1 available triggers BUG_ON

By adding an early check in ocfs2_move_extent() right after
ocfs2_find_victim_alloc_group(), we return -ENOSPC gracefully instead of
crashing the kernel.  This also avoids unnecessary work in
ocfs2_probe_alloc_group() and __ocfs2_move_extent() when the allocation
will fail.

Link: https://lkml.kernel.org/r/20260104133504.14810-1-kartikey406@gmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
Reported-by: syzbot+7960178e777909060224@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=7960178e777909060224
Link: https://lore.kernel.org/all/20251231115801.293726-1-kartikey406@gmail.com/T/ [v1]
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/move_extents.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 99637e34d9da..e3cdf8788484 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -662,6 +662,12 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 		goto out_commit;
 	}
 
+	gd = (struct ocfs2_group_desc *)gd_bh->b_data;
+	if (le16_to_cpu(gd->bg_free_bits_count) < len) {
+		ret = -ENOSPC;
+		goto out_commit;
+	}
+
 	/*
 	 * probe the victim cluster group to find a proper
 	 * region to fit wanted movement, it even will perform
@@ -682,7 +688,6 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 		goto out_commit;
 	}
 
-	gd = (struct ocfs2_group_desc *)gd_bh->b_data;
 	ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len,
 					       le16_to_cpu(gd->bg_chain));
 	if (ret) {

From 2bbd9e1d14d6156180d21cc871a51a3bd1839c81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?mingzhu=2Ewang=28=E7=8E=8B=E6=98=8E=E7=8F=A0=29?=
 <mingzhu.wang@transsion.com>
Date: Wed, 7 Jan 2026 08:15:32 +0000
Subject: [PATCH 051/107] kernel/fork: update obsolete use_mm references to
 kthread_use_mm

The comment for get_task_mm() in kernel/fork.c incorrectly references the
deprecated function `use_mm()`, which has been renamed to
`kthread_use_mm()` in kernel/kthread.c.

This patch updates the documentation to reflect the current function
names, ensuring accuracy when developers refer to the kernel thread memory
context API.

No functional changes were introduced.

Link: https://lkml.kernel.org/r/KUZPR04MB8965F954108B4DD7E8FFDB2B8F84A@KUZPR04MB8965.apcprd04.prod.outlook.com
Signed-off-by: mingzhu.wang <mingzhu.wang@transsion.com>
Cc: Ben Segall <bsegall@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiazi Li <jqqlijiazi@gmail.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <kees@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index b21eccc9e11c..f5ad5de49d68 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1356,7 +1356,7 @@ struct file *get_task_exe_file(struct task_struct *task)
  * @task: The task.
  *
  * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
- * this kernel workthread has transiently adopted a user mm with use_mm,
+ * this kernel workthread has transiently adopted a user mm with kthread_use_mm,
  * to do its AIO) is not set and if so returns a reference to it, after
  * bumping up the use count.  User must release the mm via mmput()
  * after use.  Typically used by /proc and ptrace.

From 105ddfb2d2b3acec7a7d9695463df48733d91e6c Mon Sep 17 00:00:00 2001
From: Alice Ryhl <aliceryhl@google.com>
Date: Wed, 7 Jan 2026 08:28:46 +0000
Subject: [PATCH 052/107] rust: task: restrict Task::group_leader() to current
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Task::group_leader() method currently allows you to access the
group_leader() of any task, for example one you hold a refcount to.  But
this is not safe in general since the group leader could change when a
task exits.  See for example commit a15f37a40145c ("kernel/sys.c: fix the
racy usage of task_lock(tsk->group_leader) in sys_prlimit64() paths").

All existing users of Task::group_leader() call this method on current,
which is guaranteed running, so there's not an actual issue in Rust code
today.  But to prevent code in the future from making this mistake,
restrict Task::group_leader() so that it can only be called on current.

There are some other cases where accessing task->group_leader is okay.
For example it can be safe if you hold tasklist_lock or rcu_read_lock().
However, only supporting current->group_leader is sufficient for all
in-tree Rust users of group_leader right now.  Safe Rust functionality for
accessing it under rcu or while holding tasklist_lock may be added in the
future if required by any future Rust module.

This patch is a bugfix in that it prevents users of this API from writing
incorrect code.  It doesn't change behavior of correct code.

Link: https://lkml.kernel.org/r/20260107-task-group-leader-v2-1-8fbf816f2a2f@google.com
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Fixes: 313c4281bc9d ("rust: add basic `Task`")
Reported-by: Oleg Nesterov <oleg@redhat.com>
Closes: https://lore.kernel.org/all/aTLnV-5jlgfk1aRK@redhat.com/
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Cc: Andreas Hindborg <a.hindborg@kernel.org>
Cc: Benno Lossin <lossin@kernel.org>
Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com>
Cc: Björn Roy Baron <bjorn3_gh@protonmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: FUJITA Tomonori <fujita.tomonori@gmail.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Panagiotis Foliadis <pfoliadis@posteo.net>
Cc: Shankari Anand <shankari.ak0208@gmail.com>
Cc: Trevor Gross <tmgross@umich.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 rust/kernel/task.rs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 49fad6de0674..cc907fb531bc 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -204,18 +204,6 @@ impl Task {
         self.0.get()
     }
 
-    /// Returns the group leader of the given task.
-    pub fn group_leader(&self) -> &Task {
-        // SAFETY: The group leader of a task never changes after initialization, so reading this
-        // field is not a data race.
-        let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
-
-        // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
-        // and given that a task has a reference to its group leader, we know it must be valid for
-        // the lifetime of the returned task reference.
-        unsafe { &*ptr.cast() }
-    }
-
     /// Returns the PID of the given task.
     pub fn pid(&self) -> Pid {
         // SAFETY: The pid of a task never changes after initialization, so reading this field is
@@ -345,6 +333,18 @@ impl CurrentTask {
         // `release_task()` call.
         Some(unsafe { PidNamespace::from_ptr(active_ns) })
     }
+
+    /// Returns the group leader of the current task.
+    pub fn group_leader(&self) -> &Task {
+        // SAFETY: The group leader of a task never changes while the task is running, and `self`
+        // is the current task, which is guaranteed running.
+        let ptr = unsafe { (*self.as_ptr()).group_leader };
+
+        // SAFETY: `current->group_leader` stays valid for at least the duration in which `current`
+        // is running, and the signature of this function ensures that the returned `&Task` can
+        // only be used while `current` is still valid, thus still running.
+        unsafe { &*ptr.cast() }
+    }
 }
 
 // SAFETY: The type invariants guarantee that `Task` is always refcounted.

From bf45794244ca1fb1c135754f36ff765eea01f9e6 Mon Sep 17 00:00:00 2001
From: Kir Chou <note351@hotmail.com>
Date: Thu, 8 Jan 2026 21:07:53 +0900
Subject: [PATCH 053/107] lib/glob: convert selftest to KUnit

This patch converts the existing glob selftest (lib/globtest.c) to use the
KUnit framework (lib/tests/glob_kunit.c).

The new test:

- Migrates all 64 test cases from the original test to the KUnit suite.
- Removes the custom 'verbose' module parameter as KUnit handles logging.
- Updates Kconfig.debug and Makefile to support the new KUnit test.
- Updates Kconfig and Makefile to remove the original selftest.
- Updates GLOB_SELFTEST to GLOB_KUNIT_TEST for arch/m68k/configs.

This commit is verified by `./tools/testing/kunit/kunit.py run'
with the .kunit/.kunitconfig:

CONFIG_KUNIT=y
CONFIG_GLOB_KUNIT_TEST=y

Link: https://lkml.kernel.org/r/20260108120753.27339-1-note351@hotmail.com
Signed-off-by: Kir Chou <note351@hotmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: <kirchou@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/m68k/configs/amiga_defconfig    |   1 -
 arch/m68k/configs/apollo_defconfig   |   1 -
 arch/m68k/configs/atari_defconfig    |   1 -
 arch/m68k/configs/bvme6000_defconfig |   1 -
 arch/m68k/configs/hp300_defconfig    |   1 -
 arch/m68k/configs/mac_defconfig      |   1 -
 arch/m68k/configs/multi_defconfig    |   1 -
 arch/m68k/configs/mvme147_defconfig  |   1 -
 arch/m68k/configs/mvme16x_defconfig  |   1 -
 arch/m68k/configs/q40_defconfig      |   1 -
 arch/m68k/configs/sun3_defconfig     |   1 -
 arch/m68k/configs/sun3x_defconfig    |   1 -
 lib/Kconfig                          |  13 ---
 lib/Kconfig.debug                    |  13 +++
 lib/Makefile                         |   1 -
 lib/globtest.c                       | 167 ---------------------------
 lib/tests/Makefile                   |   1 +
 lib/tests/glob_kunit.c               | 125 ++++++++++++++++++++
 18 files changed, 139 insertions(+), 193 deletions(-)
 delete mode 100644 lib/globtest.c
 create mode 100644 lib/tests/glob_kunit.c

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 46598efbea54..3c87c1d181a6 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -600,7 +600,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 63bef7a6d858..03eaace46fe7 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -557,7 +557,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 1342adfbd855..61228b9d2c2a 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -577,7 +577,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 484f21a2da37..83fcc12916c5 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -549,7 +549,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index ce97c816aa21..84d477e95fe8 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -559,7 +559,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index f5b57ea2d681..b1e911a138a0 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -576,7 +576,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 85efdb31c898..0a2c3ac6dc7f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -663,7 +663,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 7102579b83d3..2087fe4af3d6 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -549,7 +549,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 18c0493ed0ff..4af83b643da1 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -550,7 +550,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 1b3a34ab1c74..56c303097050 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -566,7 +566,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 1a41a1c6bde1..de2a5b27d408 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -547,7 +547,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 8f182684e54b..297b8edcff6d 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -547,7 +547,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC_BENCHMARK=y
 CONFIG_XZ_DEC_TEST=m
-CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_TEST_LOCKUP=m
diff --git a/lib/Kconfig b/lib/Kconfig
index 2923924bea78..0f2fb9610647 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -430,19 +430,6 @@ config GLOB
 	  are compiling an out-of tree driver which tells you that it
 	  depends on this.
 
-config GLOB_SELFTEST
-	tristate "glob self-test on init"
-	depends on GLOB
-	help
-	  This option enables a simple self-test of the glob_match
-	  function on startup.	It is primarily useful for people
-	  working on the code to ensure they haven't introduced any
-	  regressions.
-
-	  It only adds a little bit of code and slows kernel boot (or
-	  module load) by a small amount, so you're welcome to play with
-	  it, but you probably don't need it.
-
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2122d5cec34d..17d759a04021 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -3364,6 +3364,19 @@ config PRIME_NUMBERS_KUNIT_TEST
 
 	  If unsure, say N
 
+config GLOB_KUNIT_TEST
+	tristate "Glob matching test" if !KUNIT_ALL_TESTS
+	depends on GLOB
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test the glob functions at runtime.
+
+	  This test suite verifies the correctness of glob_match() across various
+	  scenarios, including edge cases.
+
+	  If unsure, say N
+
 endif # RUNTIME_TESTING_MENU
 
 config ARCH_USE_MEMTEST
diff --git a/lib/Makefile b/lib/Makefile
index 1f87a174a317..9839f40af5dc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -224,7 +224,6 @@ obj-$(CONFIG_CLOSURES) += closure.o
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
 obj-$(CONFIG_GLOB) += glob.o
-obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
 
 obj-$(CONFIG_DIMLIB) += dim/
 obj-$(CONFIG_SIGNATURE) += digsig.o
diff --git a/lib/globtest.c b/lib/globtest.c
deleted file mode 100644
index d8e97d43b905..000000000000
--- a/lib/globtest.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Extracted fronm glob.c
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/glob.h>
-#include <linux/printk.h>
-
-/* Boot with "glob.verbose=1" to show successful tests, too */
-static bool verbose = false;
-module_param(verbose, bool, 0);
-
-struct glob_test {
-	char const *pat, *str;
-	bool expected;
-};
-
-static bool __pure __init test(char const *pat, char const *str, bool expected)
-{
-	bool match = glob_match(pat, str);
-	bool success = match == expected;
-
-	/* Can't get string literals into a particular section, so... */
-	static char const msg_error[] __initconst =
-		KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
-	static char const msg_ok[] __initconst =
-		KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
-	static char const mismatch[] __initconst = "mismatch";
-	char const *message;
-
-	if (!success)
-		message = msg_error;
-	else if (verbose)
-		message = msg_ok;
-	else
-		return success;
-
-	printk(message, pat, str, mismatch + 3*match);
-	return success;
-}
-
-/*
- * The tests are all jammed together in one array to make it simpler
- * to place that array in the .init.rodata section.  The obvious
- * "array of structures containing char *" has no way to force the
- * pointed-to strings to be in a particular section.
- *
- * Anyway, a test consists of:
- * 1. Expected glob_match result: '1' or '0'.
- * 2. Pattern to match: null-terminated string
- * 3. String to match against: null-terminated string
- *
- * The list of tests is terminated with a final '\0' instead of
- * a glob_match result character.
- */
-static char const glob_tests[] __initconst =
-	/* Some basic tests */
-	"1" "a\0" "a\0"
-	"0" "a\0" "b\0"
-	"0" "a\0" "aa\0"
-	"0" "a\0" "\0"
-	"1" "\0" "\0"
-	"0" "\0" "a\0"
-	/* Simple character class tests */
-	"1" "[a]\0" "a\0"
-	"0" "[a]\0" "b\0"
-	"0" "[!a]\0" "a\0"
-	"1" "[!a]\0" "b\0"
-	"1" "[ab]\0" "a\0"
-	"1" "[ab]\0" "b\0"
-	"0" "[ab]\0" "c\0"
-	"1" "[!ab]\0" "c\0"
-	"1" "[a-c]\0" "b\0"
-	"0" "[a-c]\0" "d\0"
-	/* Corner cases in character class parsing */
-	"1" "[a-c-e-g]\0" "-\0"
-	"0" "[a-c-e-g]\0" "d\0"
-	"1" "[a-c-e-g]\0" "f\0"
-	"1" "[]a-ceg-ik[]\0" "a\0"
-	"1" "[]a-ceg-ik[]\0" "]\0"
-	"1" "[]a-ceg-ik[]\0" "[\0"
-	"1" "[]a-ceg-ik[]\0" "h\0"
-	"0" "[]a-ceg-ik[]\0" "f\0"
-	"0" "[!]a-ceg-ik[]\0" "h\0"
-	"0" "[!]a-ceg-ik[]\0" "]\0"
-	"1" "[!]a-ceg-ik[]\0" "f\0"
-	/* Simple wild cards */
-	"1" "?\0" "a\0"
-	"0" "?\0" "aa\0"
-	"0" "??\0" "a\0"
-	"1" "?x?\0" "axb\0"
-	"0" "?x?\0" "abx\0"
-	"0" "?x?\0" "xab\0"
-	/* Asterisk wild cards (backtracking) */
-	"0" "*??\0" "a\0"
-	"1" "*??\0" "ab\0"
-	"1" "*??\0" "abc\0"
-	"1" "*??\0" "abcd\0"
-	"0" "??*\0" "a\0"
-	"1" "??*\0" "ab\0"
-	"1" "??*\0" "abc\0"
-	"1" "??*\0" "abcd\0"
-	"0" "?*?\0" "a\0"
-	"1" "?*?\0" "ab\0"
-	"1" "?*?\0" "abc\0"
-	"1" "?*?\0" "abcd\0"
-	"1" "*b\0" "b\0"
-	"1" "*b\0" "ab\0"
-	"0" "*b\0" "ba\0"
-	"1" "*b\0" "bb\0"
-	"1" "*b\0" "abb\0"
-	"1" "*b\0" "bab\0"
-	"1" "*bc\0" "abbc\0"
-	"1" "*bc\0" "bc\0"
-	"1" "*bc\0" "bbc\0"
-	"1" "*bc\0" "bcbc\0"
-	/* Multiple asterisks (complex backtracking) */
-	"1" "*ac*\0" "abacadaeafag\0"
-	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
-	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
-	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
-	"1" "*abcd*\0" "abcabcabcabcdefg\0"
-	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
-	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
-	"0" "*abcd*\0" "abcabcabcabcefg\0"
-	"0" "*ab*cd*\0" "abcabcabcabcefg\0";
-
-static int __init glob_init(void)
-{
-	unsigned successes = 0;
-	unsigned n = 0;
-	char const *p = glob_tests;
-	static char const message[] __initconst =
-		KERN_INFO "glob: %u self-tests passed, %u failed\n";
-
-	/*
-	 * Tests are jammed together in a string.  The first byte is '1'
-	 * or '0' to indicate the expected outcome, or '\0' to indicate the
-	 * end of the tests.  Then come two null-terminated strings: the
-	 * pattern and the string to match it against.
-	 */
-	while (*p) {
-		bool expected = *p++ & 1;
-		char const *pat = p;
-
-		p += strlen(p) + 1;
-		successes += test(pat, p, expected);
-		p += strlen(p) + 1;
-		n++;
-	}
-
-	n -= successes;
-	printk(message, successes, n);
-
-	/* What's the errno for "kernel bug detected"?  Guess... */
-	return n ? -ECANCELED : 0;
-}
-
-/* We need a dummy exit function to allow unload */
-static void __exit glob_fini(void) { }
-
-module_init(glob_init);
-module_exit(glob_fini);
-
-MODULE_DESCRIPTION("glob(7) matching tests");
-MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index 088b80d16383..ab3e74d0da9e 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -19,6 +19,7 @@ CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
 CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
 obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
+obj-$(CONFIG_GLOB_KUNIT_TEST) += glob_kunit.o
 obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o
 obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
 obj-$(CONFIG_TEST_IOV_ITER) += kunit_iov_iter.o
diff --git a/lib/tests/glob_kunit.c b/lib/tests/glob_kunit.c
new file mode 100644
index 000000000000..362b1eda8e5b
--- /dev/null
+++ b/lib/tests/glob_kunit.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: MIT OR GPL-2.0
+/*
+ * Test cases for glob functions.
+ */
+
+#include <kunit/test.h>
+#include <linux/glob.h>
+#include <linux/module.h>
+
+/**
+ * struct glob_test_case - Test case for glob matching.
+ * @pat: Pattern to match.
+ * @str: String to match against.
+ * @expected: Expected glob_match result, true if matched.
+ */
+struct glob_test_case {
+	const char *pat;
+	const char *str;
+	bool expected;
+};
+
+static const struct glob_test_case glob_test_cases[] = {
+	/* Some basic tests */
+	{ .pat = "a", .str = "a", .expected = true },
+	{ .pat = "a", .str = "b", .expected = false },
+	{ .pat = "a", .str = "aa", .expected = false },
+	{ .pat = "a", .str = "", .expected = false },
+	{ .pat = "", .str = "", .expected = true },
+	{ .pat = "", .str = "a", .expected = false },
+	/* Simple character class tests */
+	{ .pat = "[a]", .str = "a", .expected = true },
+	{ .pat = "[a]", .str = "b", .expected = false },
+	{ .pat = "[!a]", .str = "a", .expected = false },
+	{ .pat = "[!a]", .str = "b", .expected = true },
+	{ .pat = "[ab]", .str = "a", .expected = true },
+	{ .pat = "[ab]", .str = "b", .expected = true },
+	{ .pat = "[ab]", .str = "c", .expected = false },
+	{ .pat = "[!ab]", .str = "c", .expected = true },
+	{ .pat = "[a-c]", .str = "b", .expected = true },
+	{ .pat = "[a-c]", .str = "d", .expected = false },
+	/* Corner cases in character class parsing */
+	{ .pat = "[a-c-e-g]", .str = "-", .expected = true },
+	{ .pat = "[a-c-e-g]", .str = "d", .expected = false },
+	{ .pat = "[a-c-e-g]", .str = "f", .expected = true },
+	{ .pat = "[]a-ceg-ik[]", .str = "a", .expected = true },
+	{ .pat = "[]a-ceg-ik[]", .str = "]", .expected = true },
+	{ .pat = "[]a-ceg-ik[]", .str = "[", .expected = true },
+	{ .pat = "[]a-ceg-ik[]", .str = "h", .expected = true },
+	{ .pat = "[]a-ceg-ik[]", .str = "f", .expected = false },
+	{ .pat = "[!]a-ceg-ik[]", .str = "h", .expected = false },
+	{ .pat = "[!]a-ceg-ik[]", .str = "]", .expected = false },
+	{ .pat = "[!]a-ceg-ik[]", .str = "f", .expected = true },
+	/* Simple wild cards */
+	{ .pat = "?", .str = "a", .expected = true },
+	{ .pat = "?", .str = "aa", .expected = false },
+	{ .pat = "??", .str = "a", .expected = false },
+	{ .pat = "?x?", .str = "axb", .expected = true },
+	{ .pat = "?x?", .str = "abx", .expected = false },
+	{ .pat = "?x?", .str = "xab", .expected = false },
+	/* Asterisk wild cards (backtracking) */
+	{ .pat = "*??", .str = "a", .expected = false },
+	{ .pat = "*??", .str = "ab", .expected = true },
+	{ .pat = "*??", .str = "abc", .expected = true },
+	{ .pat = "*??", .str = "abcd", .expected = true },
+	{ .pat = "??*", .str = "a", .expected = false },
+	{ .pat = "??*", .str = "ab", .expected = true },
+	{ .pat = "??*", .str = "abc", .expected = true },
+	{ .pat = "??*", .str = "abcd", .expected = true },
+	{ .pat = "?*?", .str = "a", .expected = false },
+	{ .pat = "?*?", .str = "ab", .expected = true },
+	{ .pat = "?*?", .str = "abc", .expected = true },
+	{ .pat = "?*?", .str = "abcd", .expected = true },
+	{ .pat = "*b", .str = "b", .expected = true },
+	{ .pat = "*b", .str = "ab", .expected = true },
+	{ .pat = "*b", .str = "ba", .expected = false },
+	{ .pat = "*b", .str = "bb", .expected = true },
+	{ .pat = "*b", .str = "abb", .expected = true },
+	{ .pat = "*b", .str = "bab", .expected = true },
+	{ .pat = "*bc", .str = "abbc", .expected = true },
+	{ .pat = "*bc", .str = "bc", .expected = true },
+	{ .pat = "*bc", .str = "bbc", .expected = true },
+	{ .pat = "*bc", .str = "bcbc", .expected = true },
+	/* Multiple asterisks (complex backtracking) */
+	{ .pat = "*ac*", .str = "abacadaeafag", .expected = true },
+	{ .pat = "*ac*ae*ag*", .str = "abacadaeafag", .expected = true },
+	{ .pat = "*a*b*[bc]*[ef]*g*", .str = "abacadaeafag", .expected = true },
+	{ .pat = "*a*b*[ef]*[cd]*g*", .str = "abacadaeafag", .expected = false },
+	{ .pat = "*abcd*", .str = "abcabcabcabcdefg", .expected = true },
+	{ .pat = "*ab*cd*", .str = "abcabcabcabcdefg", .expected = true },
+	{ .pat = "*abcd*abcdef*", .str = "abcabcdabcdeabcdefg", .expected = true },
+	{ .pat = "*abcd*", .str = "abcabcabcabcefg", .expected = false },
+	{ .pat = "*ab*cd*", .str = "abcabcabcabcefg", .expected = false },
+};
+
+static void glob_case_to_desc(const struct glob_test_case *t, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "pat:\"%s\" str:\"%s\"", t->pat, t->str);
+}
+
+KUNIT_ARRAY_PARAM(glob, glob_test_cases, glob_case_to_desc);
+
+static void glob_test_match(struct kunit *test)
+{
+	const struct glob_test_case *params = test->param_value;
+
+	KUNIT_EXPECT_EQ_MSG(test,
+			    glob_match(params->pat, params->str),
+			    params->expected,
+			    "Pattern: \"%s\", String: \"%s\", Expected: %d",
+			    params->pat, params->str, params->expected);
+}
+
+static struct kunit_case glob_kunit_test_cases[] = {
+	KUNIT_CASE_PARAM(glob_test_match, glob_gen_params),
+	{}
+};
+
+static struct kunit_suite glob_test_suite = {
+	.name = "glob",
+	.test_cases = glob_kunit_test_cases,
+};
+
+kunit_test_suite(glob_test_suite);
+MODULE_DESCRIPTION("Test cases for glob functions");
+MODULE_LICENSE("Dual MIT/GPL");

From 77ce1b4cd08fcdd049001fdf5f59c014fb4b7711 Mon Sep 17 00:00:00 2001
From: Long Wei <longwei27@huawei.com>
Date: Wed, 7 Jan 2026 10:24:27 +0800
Subject: [PATCH 054/107] kho: test: clean up residual memory upon test_kho
 module unload

During the initialization phase, the test_kho module invokes the
kho_preserve_folio function, which internally configures bitmaps within
kho_mem_track and establishes chunk linked lists in KHO.  Upon unloading
the test_kho module, it is necessary to clean up these states.

Link: https://lkml.kernel.org/r/20260107022427.4114424-1-longwei27@huawei.com
Signed-off-by: Long Wei <longwei27@huawei.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: hewenliang <hewenliang4@huawei.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_kho.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/test_kho.c b/lib/test_kho.c
index 3431daca6968..a20fafaf9846 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -340,11 +340,15 @@ module_init(kho_test_init);
 
 static void kho_test_cleanup(void)
 {
+	/* unpreserve and free the data stored in folios */
+	kho_test_unpreserve_data(&kho_test_state);
 	for (int i = 0; i < kho_test_state.nr_folios; i++)
 		folio_put(kho_test_state.folios[i]);
 
 	kvfree(kho_test_state.folios);
-	vfree(kho_test_state.folios_info);
+
+	/* Unpreserve and release the FDT folio */
+	kho_unpreserve_folio(kho_test_state.fdt);
 	folio_put(kho_test_state.fdt);
 }
 

From 25929dae28f528d7d74992edabd38bf3c374e485 Mon Sep 17 00:00:00 2001
From: Long Wei <longwei27@huawei.com>
Date: Tue, 16 Dec 2025 19:44:00 +0800
Subject: [PATCH 055/107] kho: remove duplicate header file references

kexec_handover_internal.h is included twice in kexec_handover.c.  Remove
the redundant first inclusion to eliminate the duplication.

Link: https://lkml.kernel.org/r/20251216114400.2677311-1-longwei27@huawei.com
Signed-off-by: Long Wei <longwei27@huawei.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: hewenliang <hewenliang4@huawei.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Pratyush Yadav <pratyush@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/kexec_handover.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 66fcdda0ebdc..fbe109a0d858 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -25,7 +25,6 @@
 
 #include <asm/early_ioremap.h>
 
-#include "kexec_handover_internal.h"
 /*
  * KHO is tightly coupled with mm init and needs access to some of mm
  * internal APIs.

From 8cafcb881364af5ef3a8b9fed4db254054033d8a Mon Sep 17 00:00:00 2001
From: Zhiyu Zhang <zhiyuzhang999@gmail.com>
Date: Thu, 1 Jan 2026 19:11:48 +0800
Subject: [PATCH 056/107] fat: avoid parent link count underflow in rmdir

Corrupted FAT images can leave a directory inode with an incorrect
i_nlink (e.g. 2 even though subdirectories exist). rmdir then
unconditionally calls drop_nlink(dir) and can drive i_nlink to 0,
triggering the WARN_ON in drop_nlink().

Add a sanity check in vfat_rmdir() and msdos_rmdir(): only drop the
parent link count when it is at least 3, otherwise report a filesystem
error.

Link: https://lkml.kernel.org/r/20260101111148.1437-1-zhiyuzhang999@gmail.com
Fixes: 9a53c3a783c2 ("[PATCH] r/o bind mounts: unlink: monitor i_nlink")
Signed-off-by: Zhiyu Zhang <zhiyuzhang999@gmail.com>
Reported-by: Zhiyu Zhang <zhiyuzhang999@gmail.com>
Closes: https://lore.kernel.org/linux-fsdevel/aVN06OKsKxZe6-Kv@casper.infradead.org/T/#t
Tested-by: Zhiyu Zhang <zhiyuzhang999@gmail.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fat/namei_msdos.c | 7 ++++++-
 fs/fat/namei_vfat.c  | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 0b920ee40a7f..262ec1b790b5 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -325,7 +325,12 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
 	err = fat_remove_entries(dir, &sinfo);	/* and releases bh */
 	if (err)
 		goto out;
-	drop_nlink(dir);
+	if (dir->i_nlink >= 3)
+		drop_nlink(dir);
+	else {
+		fat_fs_error(sb, "parent dir link count too low (%u)",
+			dir->i_nlink);
+	}
 
 	clear_nlink(inode);
 	fat_truncate_time(inode, NULL, S_CTIME);
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 4f3cc2b3089e..8bf5f7a9fd23 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -804,7 +804,12 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
 	err = fat_remove_entries(dir, &sinfo);	/* and releases bh */
 	if (err)
 		goto out;
-	drop_nlink(dir);
+	if (dir->i_nlink >= 3)
+		drop_nlink(dir);
+	else {
+		fat_fs_error(sb, "parent dir link count too low (%u)",
+			dir->i_nlink);
+	}
 
 	clear_nlink(inode);
 	fat_truncate_time(inode, NULL, S_ATIME|S_MTIME);

From e8eef69a99f185e75909adb24ab93d706e07bf27 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Mon, 12 Jan 2026 10:08:53 -0800
Subject: [PATCH 057/107] once: don't use a work queue to reset sleepable
 static key

Pointless overhead to use a work queue to reset the static key for a
DO_ONCE_SLEEPABLE() invocation.

Note that the previous code path included a BUG_ON() if the static key
was already disabled. Dropped that as part of this change because:
1) Use of BUG_ON() is highly discouraged.
2) There is a WARN_ON() in the static_branch_disable() code path
   that would provide adequate breadcrumbs to debug any issue.

Link: https://lkml.kernel.org/r/aWU4tfTju1l3oZCu@agluck-desk3
Signed-off-by: Tony Luck <tony.luck@intel.com>
Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/once.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/once.c b/lib/once.c
index 2c306f0e891e..8557eb489f34 100644
--- a/lib/once.c
+++ b/lib/once.c
@@ -93,6 +93,6 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key,
 {
 	*done = true;
 	mutex_unlock(&once_mutex);
-	once_disable_jump(once_key, mod);
+	static_branch_disable(once_key);
 }
 EXPORT_SYMBOL(__do_once_sleepable_done);

From 5e65b5ca7d4e1f5d18e03ada94f549086ceb6500 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Wed, 24 Dec 2025 12:38:10 -0500
Subject: [PATCH 058/107] tsacct: skip all kernel threads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch is a preparation step for HPCC, for the OOM killer
improvements.  I suspect that this patch is useful on its own, because it
really makes no sense to sum up accounting statistics of use_mm within
kernel threads which are only temporarily using those mm.

When we hit acct_account_cputime within a irq handler over a kthread that
happens to use a userspace mm, we end up summing up the mm's RSS into the
tsk acct_rss_mem1, which eventually decays.

I don't see a good rationale behind tracking the mm's rss in that way when
a kthread use a userspace mm temporarily through use_mm.

It causes issues with init_mm and efi_mm which only partially initialize
their mm_struct when introducing the new hierarchical percpu counters to
replace RSS counters, which requires a pointer dereference when reading
the approximate counter sum.  The current percpu counters simply load a
zeroed atomic counter, which happen to work.

Skip all kernel threads in acct_account_cputime(), not just those that
happen to have a NULL mm.

This is a preparation step before introducing the hierarchical percpu
counters.

Link: https://lkml.kernel.org/r/20251224173810.648699-2-mathieu.desnoyers@efficios.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Aboorva Devarajan <aboorvad@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Liam R . Howlett" <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Martin Liu <liumartin@google.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/tsacct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 6ea2f6363b90..5c153106e642 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -125,7 +125,7 @@ static void __acct_update_integrals(struct task_struct *tsk,
 {
 	u64 time, delta;
 
-	if (!likely(tsk->mm))
+	if (unlikely(!tsk->mm || (tsk->flags & PF_KTHREAD)))
 		return;
 
 	time = stime + utime;

From 4cc67b048459bebb7a60b693044ec83fb853eba1 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Sun, 11 Jan 2026 21:21:57 +0000
Subject: [PATCH 059/107] linux/log2.h: reduce instruction count for
 is_power_of_2()

Follow an observation that (n ^ (n - 1)) will only ever retain the most
significant bit set in the word operated on if that is the only bit set in
the first place, and use it to determine whether a number is a whole power
of 2, avoiding the need for an explicit check for nonzero.

This reduces the sequence produced to 3 instructions only across Alpha,
MIPS, and RISC-V targets, down from 4, 5, and 4 respectively, removing a
branch in the two latter cases.  And it's 5 instructions on POWER and
x86-64 vs 8 and 9 respectively.  There are no branches now emitted here
for targets that have a suitable conditional set operation, although an
inline expansion will often end with one, depending on what code a call to
this function is used in.

Credit goes to GCC authors for coming up with this optimisation used as
the fallback for (__builtin_popcountl(n) == 1), equivalent to this code,
for targets where the hardware population count operation is considered
expensive.

Link: https://lkml.kernel.org/r/alpine.DEB.2.21.2601111836250.30566@angie.orcam.me.uk
Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: John Garry <john.g.garry@oracle.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Su Hui <suhui@nfschina.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/log2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/log2.h b/include/linux/log2.h
index 2eac3fc9303d..e17ceb32e0c9 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -44,7 +44,7 @@ int __ilog2_u64(u64 n)
 static __always_inline __attribute__((const))
 bool is_power_of_2(unsigned long n)
 {
-	return (n != 0 && ((n & (n - 1)) == 0));
+	return n - 1 < (n ^ (n - 1));
 }
 
 /**

From a906f3ae4423d35c9804c8ec3a0db96ce9b54d44 Mon Sep 17 00:00:00 2001
From: Lillian Berry <lillian@star-ark.net>
Date: Sun, 11 Jan 2026 07:56:35 -0500
Subject: [PATCH 060/107] init/main.c: check if rdinit was explicitly set
 before printing warning

The rdinit parameter is set by default, and attempted during boot even if
not specified in the command line.  Only print the warning about rdinit
being inaccessible if the rdinit value was found in command line; it's
just noise otherwise.

[akpm@linux-foundation.org: move ramdisk_execute_command_set into __initdata]
Link: https://lkml.kernel.org/r/20260111125635.53682-1-lillian@star-ark.net
Signed-off-by: Lillian Berry <lillian@star-ark.net>
Cc: Ahmad Fatoum <a.fatoum@pengutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Francesco Valla <francesco@valla.it>
Cc: Guo Weikang <guoweikang.kernel@gmail.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Huan Yang <link@vivo.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 init/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/init/main.c b/init/main.c
index b84818ad9685..4773f3bad49c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -162,6 +162,7 @@ static size_t initargs_offs;
 
 static char *execute_command;
 static char *ramdisk_execute_command = "/init";
+static bool __initdata ramdisk_execute_command_set;
 
 /*
  * Used to generate warnings if static_key manipulation functions are used
@@ -623,6 +624,7 @@ static int __init rdinit_setup(char *str)
 	unsigned int i;
 
 	ramdisk_execute_command = str;
+	ramdisk_execute_command_set = true;
 	/* See "auto" comment in init_setup */
 	for (i = 1; i < MAX_INIT_ARGS; i++)
 		argv_init[i] = NULL;
@@ -1699,8 +1701,9 @@ static noinline void __init kernel_init_freeable(void)
 	int ramdisk_command_access;
 	ramdisk_command_access = init_eaccess(ramdisk_execute_command);
 	if (ramdisk_command_access != 0) {
-		pr_warn("check access for rdinit=%s failed: %i, ignoring\n",
-			ramdisk_execute_command, ramdisk_command_access);
+		if (ramdisk_execute_command_set)
+			pr_warn("check access for rdinit=%s failed: %i, ignoring\n",
+				ramdisk_execute_command, ramdisk_command_access);
 		ramdisk_execute_command = NULL;
 		prepare_namespace();
 	}

From 499f86de4f8c34e19a57daf2b6f0cba848e91994 Mon Sep 17 00:00:00 2001
From: Sun Jian <sun.jian.kdev@gmail.com>
Date: Tue, 13 Jan 2026 18:15:32 +0800
Subject: [PATCH 061/107] init/main: read bootconfig header with
 get_unaligned_le32()

get_boot_config_from_initrd() scans up to 3 bytes before initrd_end to
handle GRUB 4-byte alignment.  As a result, the bootconfig header
immediately preceding the magic may be unaligned.

Read the size and checksum fields with get_unaligned_le32() instead of
casting to u32 * and using le32_to_cpu(), avoiding potential unaligned
access and silencing sparse "cast to restricted __le32" warnings.

Sparse warnings (gcc + C=1):
  init/main.c:292:16: warning: cast to restricted __le32
  init/main.c:293:16: warning: cast to restricted __le32

No functional change intended.

Link: https://lkml.kernel.org/r/20260113101532.1630770-1-sun.jian.kdev@gmail.com
Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 init/main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/init/main.c b/init/main.c
index 4773f3bad49c..29741049aa79 100644
--- a/init/main.c
+++ b/init/main.c
@@ -104,6 +104,7 @@
 #include <linux/pidfs.h>
 #include <linux/ptdump.h>
 #include <linux/time_namespace.h>
+#include <linux/unaligned.h>
 #include <net/net_namespace.h>
 
 #include <asm/io.h>
@@ -270,7 +271,7 @@ static void * __init get_boot_config_from_initrd(size_t *_size)
 {
 	u32 size, csum;
 	char *data;
-	u32 *hdr;
+	u8 *hdr;
 	int i;
 
 	if (!initrd_end)
@@ -289,9 +290,9 @@ static void * __init get_boot_config_from_initrd(size_t *_size)
 	return NULL;
 
 found:
-	hdr = (u32 *)(data - 8);
-	size = le32_to_cpu(hdr[0]);
-	csum = le32_to_cpu(hdr[1]);
+	hdr = (u8 *)(data - 8);
+	size = get_unaligned_le32(hdr);
+	csum = get_unaligned_le32(hdr + 4);
 
 	data = ((void *)hdr) - size;
 	if ((unsigned long)data < initrd_start) {

From 3bb83c910971c47989aa439849265600fa67b42a Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Tue, 13 Jan 2026 16:22:28 +1100
Subject: [PATCH 062/107] bpf: explicitly align bpf_res_spin_lock

Patch series "Align atomic storage", v7.

This series adds the __aligned attribute to atomic_t and atomic64_t
definitions in include/linux and include/asm-generic (respectively) to get
natural alignment of both types on csky, m68k, microblaze, nios2, openrisc
and sh.

This series also adds Kconfig options to enable a new run-time warning to
help reveal misaligned atomic accesses on platforms which don't trap that.

The performance impact is expected to vary across platforms and workloads.
The measurements I made on m68k show that some workloads run faster and
others slower.


This patch (of 4):

Align bpf_res_spin_lock to avoid a BUILD_BUG_ON() when the alignment
changes, as it will do on m68k when, in a subsequent patch, the minimum
alignment of the atomic_t member of struct rqspinlock gets increased from
2 to 4.  Drop the BUILD_BUG_ON() as it becomes redundant.

Link: https://lkml.kernel.org/r/cover.1768281748.git.fthain@linux-m68k.org
Link: https://lkml.kernel.org/r/8a83876b07d1feacc024521e44059ae89abbb1ea.1768281748.git.fthain@linux-m68k.org
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: Guo Ren <guoren@kernel.org>
Cc: Hao Luo <haoluo@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Sasha Levin (Microsoft) <sashal@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/rqspinlock.h | 2 +-
 kernel/bpf/rqspinlock.c          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
index 0f2dcbbfee2f..dd36ac96bf66 100644
--- a/include/asm-generic/rqspinlock.h
+++ b/include/asm-generic/rqspinlock.h
@@ -28,7 +28,7 @@ struct rqspinlock {
  */
 struct bpf_res_spin_lock {
 	u32 val;
-};
+} __aligned(__alignof__(struct rqspinlock));
 
 struct qspinlock;
 #ifdef CONFIG_QUEUED_SPINLOCKS
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index f7d0c8d4644e..8d892fb099ac 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -694,7 +694,6 @@ __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock)
 	int ret;
 
 	BUILD_BUG_ON(sizeof(rqspinlock_t) != sizeof(struct bpf_res_spin_lock));
-	BUILD_BUG_ON(__alignof__(rqspinlock_t) != __alignof__(struct bpf_res_spin_lock));
 
 	preempt_disable();
 	ret = res_spin_lock((rqspinlock_t *)lock);

From e428b013d9dff30f7a65509e33047ba975cce8ba Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Tue, 13 Jan 2026 16:22:28 +1100
Subject: [PATCH 063/107] atomic: specify alignment for atomic_t and atomic64_t

Some recent commits incorrectly assumed 4-byte alignment of locks.  That
assumption fails on Linux/m68k (and, interestingly, would have failed on
Linux/cris also).  The jump label implementation makes a similar alignment
assumption.

The expectation that atomic_t and atomic64_t variables will be naturally
aligned seems reasonable, as indeed they are on 64-bit architectures.  But
atomic64_t isn't naturally aligned on csky, m68k, microblaze, nios2,
openrisc and sh.  Neither atomic_t nor atomic64_t are naturally aligned on
m68k.

This patch brings a little uniformity by specifying natural alignment for
atomic types.  One benefit is that atomic64_t variables do not get split
across a page boundary.  The cost is that some structs grow which leads to
cache misses and wasted memory.

See also, commit bbf2a330d92c ("x86: atomic64: The atomic64_t data type
should be 8 bytes aligned on 32-bit too").

Link: https://lkml.kernel.org/r/a76bc24a4e7c1d8112d7d5fa8d14e4b694a0e90c.1768281748.git.fthain@linux-m68k.org
Link: https://lore.kernel.org/lkml/CAFr9PX=MYUDGJS2kAvPMkkfvH+0-SwQB_kxE4ea0J_wZ_pk=7w@mail.gmail.com
Link: https://lore.kernel.org/lkml/CAMuHMdW7Ab13DdGs2acMQcix5ObJK0O2dG_Fxzr8_g58Rc1_0g@mail.gmail.com/
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Acked-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Guo Ren <guoren@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: Hao Luo <haoluo@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin (Microsoft) <sashal@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/atomic64.h | 2 +-
 include/linux/types.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 100d24b02e52..f22ccfc0df98 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -10,7 +10,7 @@
 #include <linux/types.h>
 
 typedef struct {
-	s64 counter;
+	s64 __aligned(sizeof(s64)) counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(i)	{ (i) }
diff --git a/include/linux/types.h b/include/linux/types.h
index 0cbb684eec5c..f69be881369f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -179,7 +179,7 @@ typedef phys_addr_t resource_size_t;
 typedef unsigned long irq_hw_number_t;
 
 typedef struct {
-	int counter;
+	int __aligned(sizeof(int)) counter;
 } atomic_t;
 
 #define ATOMIC_INIT(i) { (i) }

From 80047d84eed25e9c92cfb9169980a0dfec110246 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 13 Jan 2026 16:22:28 +1100
Subject: [PATCH 064/107] atomic: add alignment check to instrumented atomic
 operations

Add a Kconfig option for debug builds which logs a warning when an
instrumented atomic operation takes place that's misaligned.  Some
platforms don't trap for this.

[fthain@linux-m68k.org: added __DISABLE_EXPORTS conditional and refactored as helper function]
Link: https://lkml.kernel.org/r/51ebf844e006ca0de408f5d3a831e7b39d7fc31c.1768281748.git.fthain@linux-m68k.org
Link: https://lore.kernel.org/lkml/20250901093600.GF4067720@noisy.programming.kicks-ass.net/
Link: https://lore.kernel.org/linux-next/df9fbd22-a648-ada4-fee0-68fe4325ff82@linux-m68k.org/
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: Guo Ren <guoren@kernel.org>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Rich Felker <dalias@libc.org>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Will Deacon <will@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/instrumented.h | 11 +++++++++++
 lib/Kconfig.debug            | 10 ++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index 711a1f0d1a73..e34b6a557e0a 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -7,6 +7,7 @@
 #ifndef _LINUX_INSTRUMENTED_H
 #define _LINUX_INSTRUMENTED_H
 
+#include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/kasan-checks.h>
 #include <linux/kcsan-checks.h>
@@ -55,6 +56,13 @@ static __always_inline void instrument_read_write(const volatile void *v, size_t
 	kcsan_check_read_write(v, size);
 }
 
+static __always_inline void instrument_atomic_check_alignment(const volatile void *v, size_t size)
+{
+#ifndef __DISABLE_EXPORTS
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ATOMIC) && ((unsigned long)v & (size - 1)));
+#endif
+}
+
 /**
  * instrument_atomic_read - instrument atomic read access
  * @v: address of access
@@ -67,6 +75,7 @@ static __always_inline void instrument_atomic_read(const volatile void *v, size_
 {
 	kasan_check_read(v, size);
 	kcsan_check_atomic_read(v, size);
+	instrument_atomic_check_alignment(v, size);
 }
 
 /**
@@ -81,6 +90,7 @@ static __always_inline void instrument_atomic_write(const volatile void *v, size
 {
 	kasan_check_write(v, size);
 	kcsan_check_atomic_write(v, size);
+	instrument_atomic_check_alignment(v, size);
 }
 
 /**
@@ -95,6 +105,7 @@ static __always_inline void instrument_atomic_read_write(const volatile void *v,
 {
 	kasan_check_write(v, size);
 	kcsan_check_atomic_read_write(v, size);
+	instrument_atomic_check_alignment(v, size);
 }
 
 /**
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 17d759a04021..9eb685d1ec44 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1360,6 +1360,16 @@ config DEBUG_PREEMPT
 	  depending on workload as it triggers debugging routines for each
 	  this_cpu operation. It should only be used for debugging purposes.
 
+config DEBUG_ATOMIC
+	bool "Debug atomic variables"
+	depends on DEBUG_KERNEL
+	help
+	  If you say Y here then the kernel will add a runtime alignment check
+	  to atomic accesses. Useful for architectures that do not have trap on
+	  mis-aligned access.
+
+	  This option has potentially significant overhead.
+
 menu "Lock Debugging (spinlocks, mutexes, etc...)"
 
 config LOCK_DEBUGGING_SUPPORT

From 9a229ae249e0a24276901ad6807f31b32124f5c5 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Tue, 13 Jan 2026 16:22:28 +1100
Subject: [PATCH 065/107] atomic: add option for weaker alignment check

Add a new Kconfig symbol to make CONFIG_DEBUG_ATOMIC more useful on those
architectures which do not align dynamic allocations to 8-byte boundaries.
Without this, CONFIG_DEBUG_ATOMIC produces excessive WARN splats.

Link: https://lkml.kernel.org/r/6d25a12934fe9199332f4d65d17c17de450139a8.1768281748.git.fthain@linux-m68k.org
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Hao Luo <haoluo@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Sasha Levin (Microsoft) <sashal@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/instrumented.h | 8 +++++++-
 lib/Kconfig.debug            | 8 ++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index e34b6a557e0a..a1b4cf81adc2 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -59,7 +59,13 @@ static __always_inline void instrument_read_write(const volatile void *v, size_t
 static __always_inline void instrument_atomic_check_alignment(const volatile void *v, size_t size)
 {
 #ifndef __DISABLE_EXPORTS
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ATOMIC) && ((unsigned long)v & (size - 1)));
+	if (IS_ENABLED(CONFIG_DEBUG_ATOMIC)) {
+		unsigned int mask = size - 1;
+
+		if (IS_ENABLED(CONFIG_DEBUG_ATOMIC_LARGEST_ALIGN))
+			mask &= sizeof(struct { long x; } __aligned_largest) - 1;
+		WARN_ON_ONCE((unsigned long)v & mask);
+	}
 #endif
 }
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9eb685d1ec44..7eed3b197ca9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1370,6 +1370,14 @@ config DEBUG_ATOMIC
 
 	  This option has potentially significant overhead.
 
+config DEBUG_ATOMIC_LARGEST_ALIGN
+	bool "Check alignment only up to __aligned_largest"
+	depends on DEBUG_ATOMIC
+	help
+	  If you say Y here then the check for natural alignment of
+	  atomic accesses will be constrained to the compiler's largest
+	  alignment for scalar types.
+
 menu "Lock Debugging (spinlocks, mutexes, etc...)"
 
 config LOCK_DEBUGGING_SUPPORT

From 89802ca36c96b324829996ef05013f82ecc9b68a Mon Sep 17 00:00:00 2001
From: Wangyang Guo <wangyang.guo@intel.com>
Date: Tue, 13 Jan 2026 10:29:58 +0800
Subject: [PATCH 066/107] lib/group_cpus: make group CPU cluster aware

As CPU core counts increase, the number of NVMe IRQs may be smaller than
the total number of CPUs.  This forces multiple CPUs to share the same
IRQ.  If the IRQ affinity and the CPU's cluster do not align, a
performance penalty can be observed on some platforms.

This patch improves IRQ affinity by grouping CPUs by cluster within each
NUMA domain, ensuring better locality between CPUs and their assigned NVMe
IRQs.

Details:

Intel Xeon E platform packs 4 CPU cores as 1 module (cluster) and share
the L2 cache.  Let's say, if there are 40 CPUs in 1 NUMA domain and 11
IRQs to dispatch.  The existing algorithm will map first 7 IRQs each with
4 CPUs and remained 4 IRQs each with 3 CPUs.  The last 4 IRQs may have
cross cluster issue.  For example, the 9th IRQ which pinned to CPU32, then
for CPU31, it will have cross L2 memory access.

CPU |28 29 30 31|32 33 34 35|36 ...
     -------- -------- --------
IRQ      8        9       10

If this patch applied, then first 2 IRQs each mapped with 2 CPUs and rest
9 IRQs each mapped with 4 CPUs, which avoids the cross cluster memory
access.

CPU |00 01 02 03|04 05 06 07|08 09 10 11| ...
     ----- ----- ----------- -----------
IRQ  1      2        3           4

As a result, 15%+ performance difference is observed in FIO
libaio/randread/bs=8k.

Changes since V1:
- Add more performance details in commit messages.
- Fix endless loop when topology_cluster_cpumask return invalid mask.

History:
  v1: https://lore.kernel.org/all/20251024023038.872616-1-wangyang.guo@intel.com/
  v1 [RESEND]: https://lore.kernel.org/all/20251111020608.1501543-1-wangyang.guo@intel.com/

Link: https://lkml.kernel.org/r/20260113022958.3379650-1-wangyang.guo@intel.com
Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
Reviewed-by: Tianyou Li <tianyou.li@intel.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Tested-by: Dan Liang <dan.liang@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@fb.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Radu Rendec <rrendec@redhat.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/group_cpus.c | 271 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 206 insertions(+), 65 deletions(-)

diff --git a/lib/group_cpus.c b/lib/group_cpus.c
index 6d08ac05f371..a93df70919df 100644
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -114,48 +114,15 @@ static int ncpus_cmp_func(const void *l, const void *r)
 	return ln->ncpus - rn->ncpus;
 }
 
-/*
- * Allocate group number for each node, so that for each node:
- *
- * 1) the allocated number is >= 1
- *
- * 2) the allocated number is <= active CPU number of this node
- *
- * The actual allocated total groups may be less than @numgrps when
- * active total CPU number is less than @numgrps.
- *
- * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
- * for each node.
- */
-static void alloc_nodes_groups(unsigned int numgrps,
-			       cpumask_var_t *node_to_cpumask,
-			       const struct cpumask *cpu_mask,
-			       const nodemask_t nodemsk,
-			       struct cpumask *nmsk,
-			       struct node_groups *node_groups)
+static void alloc_groups_to_nodes(unsigned int numgrps,
+				  unsigned int numcpus,
+				  struct node_groups *node_groups,
+				  unsigned int num_nodes)
 {
-	unsigned n, remaining_ncpus = 0;
+	unsigned int n, remaining_ncpus = numcpus;
+	unsigned int  ngroups, ncpus;
 
-	for (n = 0; n < nr_node_ids; n++) {
-		node_groups[n].id = n;
-		node_groups[n].ncpus = UINT_MAX;
-	}
-
-	for_each_node_mask(n, nodemsk) {
-		unsigned ncpus;
-
-		cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
-		ncpus = cpumask_weight(nmsk);
-
-		if (!ncpus)
-			continue;
-		remaining_ncpus += ncpus;
-		node_groups[n].ncpus = ncpus;
-	}
-
-	numgrps = min_t(unsigned, remaining_ncpus, numgrps);
-
-	sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
+	sort(node_groups, num_nodes, sizeof(node_groups[0]),
 	     ncpus_cmp_func, NULL);
 
 	/*
@@ -226,9 +193,8 @@ static void alloc_nodes_groups(unsigned int numgrps,
 	 * finally for each node X: grps(X) <= ncpu(X).
 	 *
 	 */
-	for (n = 0; n < nr_node_ids; n++) {
-		unsigned ngroups, ncpus;
 
+	for (n = 0; n < num_nodes; n++) {
 		if (node_groups[n].ncpus == UINT_MAX)
 			continue;
 
@@ -246,12 +212,201 @@ static void alloc_nodes_groups(unsigned int numgrps,
 	}
 }
 
+/*
+ * Allocate group number for each node, so that for each node:
+ *
+ * 1) the allocated number is >= 1
+ *
+ * 2) the allocated number is <= active CPU number of this node
+ *
+ * The actual allocated total groups may be less than @numgrps when
+ * active total CPU number is less than @numgrps.
+ *
+ * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
+ * for each node.
+ */
+static void alloc_nodes_groups(unsigned int numgrps,
+			       cpumask_var_t *node_to_cpumask,
+			       const struct cpumask *cpu_mask,
+			       const nodemask_t nodemsk,
+			       struct cpumask *nmsk,
+			       struct node_groups *node_groups)
+{
+	unsigned int n, numcpus = 0;
+
+	for (n = 0; n < nr_node_ids; n++) {
+		node_groups[n].id = n;
+		node_groups[n].ncpus = UINT_MAX;
+	}
+
+	for_each_node_mask(n, nodemsk) {
+		unsigned int ncpus;
+
+		cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+		ncpus = cpumask_weight(nmsk);
+
+		if (!ncpus)
+			continue;
+		numcpus += ncpus;
+		node_groups[n].ncpus = ncpus;
+	}
+
+	numgrps = min_t(unsigned int, numcpus, numgrps);
+	alloc_groups_to_nodes(numgrps, numcpus, node_groups, nr_node_ids);
+}
+
+static void assign_cpus_to_groups(unsigned int ncpus,
+				  struct cpumask *nmsk,
+				  struct node_groups *nv,
+				  struct cpumask *masks,
+				  unsigned int *curgrp,
+				  unsigned int last_grp)
+{
+	unsigned int v, cpus_per_grp, extra_grps;
+	/* Account for rounding errors */
+	extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
+
+	/* Spread allocated groups on CPUs of the current node */
+	for (v = 0; v < nv->ngroups; v++, *curgrp += 1) {
+		cpus_per_grp = ncpus / nv->ngroups;
+
+		/* Account for extra groups to compensate rounding errors */
+		if (extra_grps) {
+			cpus_per_grp++;
+			--extra_grps;
+		}
+
+		/*
+		 * wrapping has to be considered given 'startgrp'
+		 * may start anywhere
+		 */
+		if (*curgrp >= last_grp)
+			*curgrp = 0;
+		grp_spread_init_one(&masks[*curgrp], nmsk, cpus_per_grp);
+	}
+}
+
+static int alloc_cluster_groups(unsigned int ncpus,
+				unsigned int ngroups,
+				struct cpumask *node_cpumask,
+				cpumask_var_t msk,
+				const struct cpumask ***clusters_ptr,
+				struct node_groups **cluster_groups_ptr)
+{
+	unsigned int ncluster = 0;
+	unsigned int cpu, nc, n;
+	const struct cpumask *cluster_mask;
+	const struct cpumask **clusters;
+	struct node_groups *cluster_groups;
+
+	cpumask_copy(msk, node_cpumask);
+
+	/* Probe how many clusters in this node. */
+	while (1) {
+		cpu = cpumask_first(msk);
+		if (cpu >= nr_cpu_ids)
+			break;
+
+		cluster_mask = topology_cluster_cpumask(cpu);
+		if (!cpumask_weight(cluster_mask))
+			goto no_cluster;
+		/* Clean out CPUs on the same cluster. */
+		cpumask_andnot(msk, msk, cluster_mask);
+		ncluster++;
+	}
+
+	/* If ngroups < ncluster, cross cluster is inevitable, skip. */
+	if (ncluster == 0 || ncluster > ngroups)
+		goto no_cluster;
+
+	/* Allocate memory based on cluster number. */
+	clusters = kcalloc(ncluster, sizeof(struct cpumask *), GFP_KERNEL);
+	if (!clusters)
+		goto no_cluster;
+	cluster_groups = kcalloc(ncluster, sizeof(struct node_groups), GFP_KERNEL);
+	if (!cluster_groups)
+		goto fail_cluster_groups;
+
+	/* Filling cluster info for later process. */
+	cpumask_copy(msk, node_cpumask);
+	for (n = 0; n < ncluster; n++) {
+		cpu = cpumask_first(msk);
+		cluster_mask = topology_cluster_cpumask(cpu);
+		nc = cpumask_weight_and(cluster_mask, node_cpumask);
+		clusters[n] = cluster_mask;
+		cluster_groups[n].id = n;
+		cluster_groups[n].ncpus = nc;
+		cpumask_andnot(msk, msk, cluster_mask);
+	}
+
+	alloc_groups_to_nodes(ngroups, ncpus, cluster_groups, ncluster);
+
+	*clusters_ptr = clusters;
+	*cluster_groups_ptr = cluster_groups;
+	return ncluster;
+
+ fail_cluster_groups:
+	kfree(clusters);
+ no_cluster:
+	return 0;
+}
+
+/*
+ * Try group CPUs evenly for cluster locality within a NUMA node.
+ *
+ * Return: true if success, false otherwise.
+ */
+static bool __try_group_cluster_cpus(unsigned int ncpus,
+				     unsigned int ngroups,
+				     struct cpumask *node_cpumask,
+				     struct cpumask *masks,
+				     unsigned int *curgrp,
+				     unsigned int last_grp)
+{
+	struct node_groups *cluster_groups;
+	const struct cpumask **clusters;
+	unsigned int ncluster;
+	bool ret = false;
+	cpumask_var_t nmsk;
+	unsigned int i, nc;
+
+	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+		goto fail_nmsk_alloc;
+
+	ncluster = alloc_cluster_groups(ncpus, ngroups, node_cpumask, nmsk,
+					&clusters, &cluster_groups);
+
+	if (ncluster == 0)
+		goto fail_no_clusters;
+
+	for (i = 0; i < ncluster; i++) {
+		struct node_groups *nv = &cluster_groups[i];
+
+		/* Get the cpus on this cluster. */
+		cpumask_and(nmsk, node_cpumask, clusters[nv->id]);
+		nc = cpumask_weight(nmsk);
+		if (!nc)
+			continue;
+		WARN_ON_ONCE(nv->ngroups > nc);
+
+		assign_cpus_to_groups(nc, nmsk, nv, masks, curgrp, last_grp);
+	}
+
+	ret = true;
+	kfree(cluster_groups);
+	kfree(clusters);
+ fail_no_clusters:
+	free_cpumask_var(nmsk);
+ fail_nmsk_alloc:
+	return ret;
+}
+
 static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
 			       cpumask_var_t *node_to_cpumask,
 			       const struct cpumask *cpu_mask,
 			       struct cpumask *nmsk, struct cpumask *masks)
 {
-	unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
+	unsigned int i, n, nodes, done = 0;
 	unsigned int last_grp = numgrps;
 	unsigned int curgrp = startgrp;
 	nodemask_t nodemsk = NODE_MASK_NONE;
@@ -287,7 +442,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
 	alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
 			   nodemsk, nmsk, node_groups);
 	for (i = 0; i < nr_node_ids; i++) {
-		unsigned int ncpus, v;
+		unsigned int ncpus;
 		struct node_groups *nv = &node_groups[i];
 
 		if (nv->ngroups == UINT_MAX)
@@ -301,28 +456,14 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
 
 		WARN_ON_ONCE(nv->ngroups > ncpus);
 
-		/* Account for rounding errors */
-		extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
-
-		/* Spread allocated groups on CPUs of the current node */
-		for (v = 0; v < nv->ngroups; v++, curgrp++) {
-			cpus_per_grp = ncpus / nv->ngroups;
-
-			/* Account for extra groups to compensate rounding errors */
-			if (extra_grps) {
-				cpus_per_grp++;
-				--extra_grps;
-			}
-
-			/*
-			 * wrapping has to be considered given 'startgrp'
-			 * may start anywhere
-			 */
-			if (curgrp >= last_grp)
-				curgrp = 0;
-			grp_spread_init_one(&masks[curgrp], nmsk,
-						cpus_per_grp);
+		if (__try_group_cluster_cpus(ncpus, nv->ngroups, nmsk,
+					     masks, &curgrp, last_grp)) {
+			done += nv->ngroups;
+			continue;
 		}
+
+		assign_cpus_to_groups(ncpus, nmsk, nv, masks, &curgrp,
+				      last_grp);
 		done += nv->ngroups;
 	}
 	kfree(node_groups);

From 08e8f1ef3df270daef4ffc9c4bb15669f72d5d2f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 14 Jan 2026 22:47:56 -0800
Subject: [PATCH 067/107] kernel-chktaint: add reporting for tainted modules

Check all loaded modules and report any that have their 'taint'
flags set.  The tainted module output format is:
 * <module_name> (<taint_flags>)

Example output:

Kernel is "tainted" for the following reasons:
 * externally-built ('out-of-tree') module was loaded  (#12)
 * unsigned module was loaded (#13)
Raw taint value as int/string: 12288/'G           OE      '

Tainted modules:
 * dump_test (OE)

Link: https://lkml.kernel.org/r/20260115064756.531592-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Thorsten Leemhuis <linux@leemhuis.info>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/debugging/kernel-chktaint | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index e7da0909d097..e1571c04afb5 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -211,9 +211,25 @@ else
 	addout "J"
 	echo " * fwctl's mutating debug interface was used (#19)"
 fi
+echo "Raw taint value as int/string: $taint/'$out'"
 
+# report on any tainted loadable modules
+[ "$1" = "" ] && [ -r /sys/module/ ] && \
+	cnt=`grep [A-Z] /sys/module/*/taint | wc -l` || cnt=0
+
+if [ $cnt -ne 0 ]; then
+	echo
+	echo "Tainted modules:"
+	for dir in `ls /sys/module` ; do
+		if [ -r /sys/module/$dir/taint ]; then
+			modtnt=`cat /sys/module/$dir/taint`
+			[ "$modtnt" = "" ] || echo " * $dir ($modtnt)"
+		fi
+	done
+fi
+
+echo
 echo "For a more detailed explanation of the various taint flags see"
 echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
 echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
-echo "Raw taint value as int/string: $taint/'$out'"
 #EOF#

From 6ca9de3600f482b74723dc13b5e345e4bc3fb3fa Mon Sep 17 00:00:00 2001
From: "Pratyush Yadav (Google)" <pratyush@kernel.org>
Date: Fri, 16 Jan 2026 16:54:11 +0000
Subject: [PATCH 068/107] kho: print which scratch buffer failed to be reserved

When scratch area fails to reserve, KHO prints a message indicating that.
But it doesn't say which scratch failed to allocate.  This can be useful
information for debugging.  Even more so when the failure is hard to
reproduce.

Along with the current message, also print which exact scratch area failed
to be reserved.

Link: https://lkml.kernel.org/r/20260116165416.1262531-1-pratyush@kernel.org
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Matlack <dmatlack@google.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/kexec_handover.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index fbe109a0d858..b0be06c41d92 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -637,8 +637,10 @@ static void __init kho_reserve_scratch(void)
 	kho_scratch_cnt = num_online_nodes() + 2;
 	size = kho_scratch_cnt * sizeof(*kho_scratch);
 	kho_scratch = memblock_alloc(size, PAGE_SIZE);
-	if (!kho_scratch)
+	if (!kho_scratch) {
+		pr_err("Failed to reserve scratch array\n");
 		goto err_disable_kho;
+	}
 
 	/*
 	 * reserve scratch area in low memory for lowmem allocations in the
@@ -647,8 +649,10 @@ static void __init kho_reserve_scratch(void)
 	size = scratch_size_lowmem;
 	addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0,
 					 ARCH_LOW_ADDRESS_LIMIT);
-	if (!addr)
+	if (!addr) {
+		pr_err("Failed to reserve lowmem scratch buffer\n");
 		goto err_free_scratch_desc;
+	}
 
 	kho_scratch[i].addr = addr;
 	kho_scratch[i].size = size;
@@ -657,8 +661,10 @@ static void __init kho_reserve_scratch(void)
 	/* reserve large contiguous area for allocations without nid */
 	size = scratch_size_global;
 	addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES);
-	if (!addr)
+	if (!addr) {
+		pr_err("Failed to reserve global scratch buffer\n");
 		goto err_free_scratch_areas;
+	}
 
 	kho_scratch[i].addr = addr;
 	kho_scratch[i].size = size;
@@ -669,8 +675,10 @@ static void __init kho_reserve_scratch(void)
 		addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES,
 						0, MEMBLOCK_ALLOC_ACCESSIBLE,
 						nid, true);
-		if (!addr)
+		if (!addr) {
+			pr_err("Failed to reserve nid %d scratch buffer\n", nid);
 			goto err_free_scratch_areas;
+		}
 
 		kho_scratch[i].addr = addr;
 		kho_scratch[i].size = size;

From 931d5c36c7369b65adb9e3d197a8d3a8a913db8c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 16 Jan 2026 09:42:52 -0800
Subject: [PATCH 069/107] checkpatch: add an invalid patch separator test

Some versions of tools that apply patches incorrectly allow lines that
start with 3 dashes and have additional content on the same line.

Checkpatch will now emit an ERROR on these lines and optionally convert
those lines from dashes to equals with --fix.

Link: https://lkml.kernel.org/r/6ec1ed08328340db42655287afd5fa4067316b11.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Suggested-by: Ian Rogers <irogers@google.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Stehen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/dev-tools/checkpatch.rst |  5 +++++
 scripts/checkpatch.pl                  | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index deb3f67a633c..baf0b42ebba9 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -601,6 +601,11 @@ Commit message
 
     See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
 
+  **BAD_COMMIT_SEPARATOR**
+    The commit separator is a single line with 3 dashes.
+    The regex match is '^---$'
+    Lines that start with 3 dashes and have more content on the same line
+    may confuse tools that apply patches.
 
 Comparison style
 ----------------
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index c0250244cf7a..3932f07e6ada 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3031,6 +3031,16 @@ sub process {
 			}
 		}
 
+# Check for invalid patch separator
+		if ($in_commit_log &&
+		    $line =~ /^---.+/) {
+			if (ERROR("BAD_COMMIT_SEPARATOR",
+				  "Invalid commit separator - some tools may have problems applying this\n" . $herecurr) &&
+			    $fix) {
+				$fixed[$fixlinenr] =~ s/-/=/g;
+			}
+		}
+
 # Check for patch separator
 		if ($line =~ /^---$/) {
 			$has_patch_separator = 1;

From 840fe43d371fc59ef2da6b6bb88a4d480eed9a38 Mon Sep 17 00:00:00 2001
From: Pratyush Yadav <pratyush@kernel.org>
Date: Fri, 16 Jan 2026 11:22:14 +0000
Subject: [PATCH 070/107] kho: use unsigned long for nr_pages

Patch series "kho: clean up page initialization logic", v2.

This series simplifies the page initialization logic in
kho_restore_page().  It was originally only a single patch [0], but on
Pasha's suggestion, I added another patch to use unsigned long for
nr_pages.

Technically speaking, the patches aren't related and can be applied
independently, but bundling them together since patch 2 relies on 1 and it
is easier to manage them this way.


This patch (of 2):

With 4k pages, a 32-bit nr_pages can span up to 16 TiB.  While it is a
lot, there exist systems with terabytes of RAM.  gup is also moving to
using long for nr_pages.  Use unsigned long and make KHO future-proof.

Link: https://lkml.kernel.org/r/20260116112217.915803-1-pratyush@kernel.org
Link: https://lkml.kernel.org/r/20260116112217.915803-2-pratyush@kernel.org
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
Suggested-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h     |  6 +++---
 kernel/liveupdate/kexec_handover.c | 11 ++++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index a56ff3ffaf17..ac4129d1d741 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -22,15 +22,15 @@ bool is_kho_boot(void);
 
 int kho_preserve_folio(struct folio *folio);
 void kho_unpreserve_folio(struct folio *folio);
-int kho_preserve_pages(struct page *page, unsigned int nr_pages);
-void kho_unpreserve_pages(struct page *page, unsigned int nr_pages);
+int kho_preserve_pages(struct page *page, unsigned long nr_pages);
+void kho_unpreserve_pages(struct page *page, unsigned long nr_pages);
 int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
 void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation);
 void *kho_alloc_preserve(size_t size);
 void kho_unpreserve_free(void *mem);
 void kho_restore_free(void *mem);
 struct folio *kho_restore_folio(phys_addr_t phys);
-struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
+struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages);
 void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
 int kho_add_subtree(const char *name, void *fdt);
 void kho_remove_subtree(void *fdt);
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index e44fd7ceff2e..56cc1aad5c5c 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -219,7 +219,8 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
 static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
 {
 	struct page *page = pfn_to_online_page(PHYS_PFN(phys));
-	unsigned int nr_pages, ref_cnt;
+	unsigned long nr_pages;
+	unsigned int ref_cnt;
 	union kho_page_info info;
 
 	if (!page)
@@ -246,7 +247,7 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
 	 * count of 1
 	 */
 	ref_cnt = is_folio ? 0 : 1;
-	for (unsigned int i = 1; i < nr_pages; i++)
+	for (unsigned long i = 1; i < nr_pages; i++)
 		set_page_count(page + i, ref_cnt);
 
 	if (is_folio && info.order)
@@ -288,7 +289,7 @@ EXPORT_SYMBOL_GPL(kho_restore_folio);
  *
  * Return: 0 on success, error code on failure
  */
-struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages)
+struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages)
 {
 	const unsigned long start_pfn = PHYS_PFN(phys);
 	const unsigned long end_pfn = start_pfn + nr_pages;
@@ -837,7 +838,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
  *
  * Return: 0 on success, error code on failure
  */
-int kho_preserve_pages(struct page *page, unsigned int nr_pages)
+int kho_preserve_pages(struct page *page, unsigned long nr_pages)
 {
 	struct kho_mem_track *track = &kho_out.track;
 	const unsigned long start_pfn = page_to_pfn(page);
@@ -881,7 +882,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages);
  * kho_preserve_pages() call. Unpreserving arbitrary sub-ranges of larger
  * preserved blocks is not supported.
  */
-void kho_unpreserve_pages(struct page *page, unsigned int nr_pages)
+void kho_unpreserve_pages(struct page *page, unsigned long nr_pages)
 {
 	struct kho_mem_track *track = &kho_out.track;
 	const unsigned long start_pfn = page_to_pfn(page);

From 8f1081892d6218d23bf8afb4246217c41f5a9b21 Mon Sep 17 00:00:00 2001
From: Pratyush Yadav <pratyush@kernel.org>
Date: Fri, 16 Jan 2026 11:22:15 +0000
Subject: [PATCH 071/107] kho: simplify page initialization in
 kho_restore_page()

When restoring a page (from kho_restore_pages()) or folio (from
kho_restore_folio()), KHO must initialize the struct page.  The
initialization differs slightly depending on if a folio is requested or a
set of 0-order pages is requested.

Conceptually, it is quite simple to understand.  When restoring 0-order
pages, each page gets a refcount of 1 and that's it.  When restoring a
folio, head page gets a refcount of 1 and tail pages get 0.

kho_restore_page() tries to combine the two separate initialization flow
into one piece of code.  While it works fine, it is more complicated to
read than it needs to be.  Make the code simpler by splitting the two
initalization paths into two separate functions.  This improves
readability by clearly showing how each type must be initialized.

Link: https://lkml.kernel.org/r/20260116112217.915803-3-pratyush@kernel.org
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/kexec_handover.c | 40 +++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 56cc1aad5c5c..fbfa5a04faed 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -216,11 +216,32 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
 	return 0;
 }
 
+/* For physically contiguous 0-order pages. */
+static void kho_init_pages(struct page *page, unsigned long nr_pages)
+{
+	for (unsigned long i = 0; i < nr_pages; i++)
+		set_page_count(page + i, 1);
+}
+
+static void kho_init_folio(struct page *page, unsigned int order)
+{
+	unsigned long nr_pages = (1 << order);
+
+	/* Head page gets refcount of 1. */
+	set_page_count(page, 1);
+
+	/* For higher order folios, tail pages get a page count of zero. */
+	for (unsigned long i = 1; i < nr_pages; i++)
+		set_page_count(page + i, 0);
+
+	if (order > 0)
+		prep_compound_page(page, order);
+}
+
 static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
 {
 	struct page *page = pfn_to_online_page(PHYS_PFN(phys));
 	unsigned long nr_pages;
-	unsigned int ref_cnt;
 	union kho_page_info info;
 
 	if (!page)
@@ -238,20 +259,11 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
 
 	/* Clear private to make sure later restores on this page error out. */
 	page->private = 0;
-	/* Head page gets refcount of 1. */
-	set_page_count(page, 1);
 
-	/*
-	 * For higher order folios, tail pages get a page count of zero.
-	 * For physically contiguous order-0 pages every pages gets a page
-	 * count of 1
-	 */
-	ref_cnt = is_folio ? 0 : 1;
-	for (unsigned long i = 1; i < nr_pages; i++)
-		set_page_count(page + i, ref_cnt);
-
-	if (is_folio && info.order)
-		prep_compound_page(page, info.order);
+	if (is_folio)
+		kho_init_folio(page, info.order);
+	else
+		kho_init_pages(page, nr_pages);
 
 	/* Always mark headpage's codetag as empty to avoid accounting mismatch */
 	clear_page_tag_ref(page);

From e8d899d301346a5591c9d1af06c3c9b3501cf84b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Fri, 16 Jan 2026 16:26:27 -0700
Subject: [PATCH 072/107] compiler-clang.h: require LLVM 19.1.0 or higher for
 __typeof_unqual__

When building the kernel using a version of LLVM between llvmorg-19-init
(the first commit of the LLVM 19 development cycle) and the change in
LLVM that actually added __typeof_unqual__ for all C modes [1], which
might happen during a bisect of LLVM, there is a build failure:

  In file included from arch/x86/kernel/asm-offsets.c:9:
  In file included from include/linux/crypto.h:15:
  In file included from include/linux/completion.h:12:
  In file included from include/linux/swait.h:7:
  In file included from include/linux/spinlock.h:56:
  In file included from include/linux/preempt.h:79:
  arch/x86/include/asm/preempt.h:61:2: error: call to undeclared function '__typeof_unqual__'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     61 |         raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
        |         ^
  arch/x86/include/asm/percpu.h:478:36: note: expanded from macro 'raw_cpu_and_4'
    478 | #define raw_cpu_and_4(pcp, val)                         percpu_binary_op(4, , "and", (pcp), val)
        |                                                         ^
  arch/x86/include/asm/percpu.h:210:3: note: expanded from macro 'percpu_binary_op'
    210 |                 TYPEOF_UNQUAL(_var) pto_tmp__;                          \
        |                 ^
  include/linux/compiler.h:248:29: note: expanded from macro 'TYPEOF_UNQUAL'
    248 | # define TYPEOF_UNQUAL(exp) __typeof_unqual__(exp)
        |                             ^

The current logic of CC_HAS_TYPEOF_UNQUAL just checks for a major
version of 19 but half of the 19 development cycle did not have support
for __typeof_unqual__.

Harden the logic of CC_HAS_TYPEOF_UNQUAL to avoid this error by only
using __typeof_unqual__ with a released version of LLVM 19, which is
greater than or equal to 19.1.0 with LLVM's versioning scheme that
matches GCC's [2].

Link: https://github.com/llvm/llvm-project/commit/cc308f60d41744b5920ec2e2e5b25e1273c8704b [1]
Link: https://github.com/llvm/llvm-project/commit/4532617ae420056bf32f6403dde07fb99d276a49 [2]
Link: https://lkml.kernel.org/r/20260116-require-llvm-19-1-for-typeof_unqual-v1-1-3b9a4a4b212b@kernel.org
Fixes: ac053946f5c4 ("compiler.h: introduce TYPEOF_UNQUAL() macro")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Cc: Bill Wendling <morbo@google.com>
Cc: Justin Stitt <justinstitt@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-clang.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 7edf1a07b535..e1123dd28486 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -153,4 +153,4 @@
  * Bindgen uses LLVM even if our C compiler is GCC, so we cannot
  * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL.
  */
-#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ >= 19)
+#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ > 19 || (__clang_major__ == 19 && __clang_minor__ > 0))

From f2e0abdc88ce68cdba0a66ccc05a3e96b688a2c7 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Thu, 15 Jan 2026 23:25:04 -0500
Subject: [PATCH 073/107] kernel.h: drop STACK_MAGIC macro

Patch series "Unload linux/kernel.h", v5.

kernel.h hosts declarations that can be placed better.  This series
decouples kernel.h with some explicit and implicit dependencies; also,
moves tracing functionality to a new independent header.


This patch (of 6):

The macro was introduced in 1994, v1.0.4, for stacks protection.  Since
that, people found better ways to protect stacks, and now the macro is
only used by i915 selftests.  Move it to a local header and drop from the
kernel.h.

Link: https://lkml.kernel.org/r/20260116042510.241009-1-ynorov@nvidia.com
Link: https://lkml.kernel.org/r/20260116042510.241009-2-ynorov@nvidia.com
Signed-off-by: Yury Norov <ynorov@nvidia.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Reviewed-by: Aaron Tomlin <atomlin@atomlin.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/i915/gt/selftest_ring_submission.c | 1 +
 drivers/gpu/drm/i915/i915_selftest.h               | 2 ++
 include/linux/kernel.h                             | 2 --
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 87ceb0f374b6..600333ae6c8c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -3,6 +3,7 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include "i915_selftest.h"
 #include "intel_engine_pm.h"
 #include "selftests/igt_flush_test.h"
 
diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index bdf3e22c0a34..72922028f4ba 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -26,6 +26,8 @@
 
 #include <linux/types.h>
 
+#define STACK_MAGIC	0xdeadbeef
+
 struct pci_dev;
 struct drm_i915_private;
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 35b8f2a5aca5..cefe733a0c10 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -39,8 +39,6 @@
 
 #include <uapi/linux/kernel.h>
 
-#define STACK_MAGIC	0xdeadbeef
-
 struct completion;
 struct user;
 

From 25b66674b1036c1eb3069bf62329a9c60850d782 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Thu, 15 Jan 2026 23:25:05 -0500
Subject: [PATCH 074/107] moduleparam: include required headers explicitly

The following patch drops moduleparam.h dependency on kernel.h.  In
preparation to it, list all the required headers explicitly.

Link: https://lkml.kernel.org/r/20260116042510.241009-3-ynorov@nvidia.com
Signed-off-by: Yury Norov <ynorov@nvidia.com>
Suggested-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/moduleparam.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 915f32f7d888..03a977168c52 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -2,9 +2,14 @@
 #ifndef _LINUX_MODULE_PARAMS_H
 #define _LINUX_MODULE_PARAMS_H
 /* (C) Copyright 2001, 2002 Rusty Russell IBM Corporation */
+
+#include <linux/array_size.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/stringify.h>
 #include <linux/kernel.h>
+#include <linux/types.h>
 
 /*
  * The maximum module name length, including the NUL byte.

From 90ddd39b881df74b14918cee031154f6ddb7af33 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Thu, 15 Jan 2026 23:25:06 -0500
Subject: [PATCH 075/107] kernel.h: move VERIFY_OCTAL_PERMISSIONS() to sysfs.h

The macro is related to sysfs, but is defined in kernel.h.  Move it to the
proper header, and unload the generic kernel.h.

Now that the macro is removed from kernel.h, linux/moduleparam.h is
decoupled, and kernel.h inclusion can be removed.

Link: https://lkml.kernel.org/r/20260116042510.241009-4-ynorov@nvidia.com
Signed-off-by: Yury Norov <ynorov@nvidia.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/filesystems/sysfs.rst |  2 +-
 include/linux/kernel.h              | 12 ------------
 include/linux/moduleparam.h         |  2 +-
 include/linux/sysfs.h               | 13 +++++++++++++
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst
index 2703c04af7d0..ffcef4d6bc8d 100644
--- a/Documentation/filesystems/sysfs.rst
+++ b/Documentation/filesystems/sysfs.rst
@@ -120,7 +120,7 @@ is equivalent to doing::
 	    .store = store_foo,
     };
 
-Note as stated in include/linux/kernel.h "OTHER_WRITABLE?  Generally
+Note as stated in include/linux/sysfs.h "OTHER_WRITABLE?  Generally
 considered a bad idea." so trying to set a sysfs file writable for
 everyone will fail reverting to RO mode for "Others".
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cefe733a0c10..09850b26061c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -388,16 +388,4 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 # define REBUILD_DUE_TO_DYNAMIC_FTRACE
 #endif
 
-/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */
-#define VERIFY_OCTAL_PERMISSIONS(perms)						\
-	(BUILD_BUG_ON_ZERO((perms) < 0) +					\
-	 BUILD_BUG_ON_ZERO((perms) > 0777) +					\
-	 /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */		\
-	 BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) +	\
-	 BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) +		\
-	 /* USER_WRITABLE >= GROUP_WRITABLE */					\
-	 BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) +	\
-	 /* OTHER_WRITABLE?  Generally considered a bad idea. */		\
-	 BUILD_BUG_ON_ZERO((perms) & 2) +					\
-	 (perms))
 #endif
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 03a977168c52..281a006dc284 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -8,7 +8,7 @@
 #include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/stringify.h>
-#include <linux/kernel.h>
+#include <linux/sysfs.h>
 #include <linux/types.h>
 
 /*
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c33a96b7391a..99b775f3ff46 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -808,4 +808,17 @@ static inline void sysfs_put(struct kernfs_node *kn)
 	kernfs_put(kn);
 }
 
+/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */
+#define VERIFY_OCTAL_PERMISSIONS(perms)						\
+	(BUILD_BUG_ON_ZERO((perms) < 0) +					\
+	 BUILD_BUG_ON_ZERO((perms) > 0777) +					\
+	 /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */		\
+	 BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) +	\
+	 BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) +		\
+	 /* USER_WRITABLE >= GROUP_WRITABLE */					\
+	 BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) +	\
+	 /* OTHER_WRITABLE?  Generally considered a bad idea. */		\
+	 BUILD_BUG_ON_ZERO((perms) & 2) +					\
+	 (perms))
+
 #endif /* _SYSFS_H_ */

From 269586d68994ca307ded058255e243692e3bf753 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Thu, 15 Jan 2026 23:25:07 -0500
Subject: [PATCH 076/107] kernel.h: include linux/instruction_pointer.h
 explicitly

In preparation for decoupling linux/instruction_pointer.h and
linux/kernel.h, include instruction_pointer.h explicitly where needed.

Link: https://lkml.kernel.org/r/20260116042510.241009-5-ynorov@nvidia.com
Signed-off-by: Yury Norov <ynorov@nvidia.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/include/asm/processor.h | 1 +
 include/linux/ww_mutex.h          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 3affba95845b..cc187afa07b3 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -31,6 +31,7 @@
 #include <linux/cpumask.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
+#include <linux/instruction_pointer.h>
 #include <linux/bitops.h>
 #include <asm/fpu-types.h>
 #include <asm/cpu.h>
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 45ff6f7a872b..9b30fa2ec508 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -17,6 +17,7 @@
 #ifndef __LINUX_WW_MUTEX_H
 #define __LINUX_WW_MUTEX_H
 
+#include <linux/instruction_pointer.h>
 #include <linux/mutex.h>
 #include <linux/rtmutex.h>
 

From 86e685ff364394b477cd1c476029480a2a1960c5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 15 Jan 2026 23:25:08 -0500
Subject: [PATCH 077/107] tracing: remove size parameter in __trace_puts()

The __trace_puts() function takes a string pointer and the size of the
string itself.  All users currently simply pass in the strlen() of the
string it is also passing in.  There's no reason to pass in the size.
Instead have the __trace_puts() function do the strlen() within the
function itself.

This fixes a header recursion issue where using strlen() in the macro
calling __trace_puts() requires adding #include <linux/string.h> in order
to use strlen().  Removing the use of strlen() from the header fixes the
recursion issue.

Link: https://lore.kernel.org/all/aUN8Hm377C5A0ILX@yury/
Link: https://lkml.kernel.org/r/20260116042510.241009-6-ynorov@nvidia.com
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Yury Norov <ynorov@nvidia.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++--
 kernel/trace/trace.c   | 7 +++----
 kernel/trace/trace.h   | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 09850b26061c..5838c419ed37 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -328,10 +328,10 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
 	if (__builtin_constant_p(str))					\
 		__trace_bputs(_THIS_IP_, trace_printk_fmt);		\
 	else								\
-		__trace_puts(_THIS_IP_, str, strlen(str));		\
+		__trace_puts(_THIS_IP_, str);				\
 })
 extern int __trace_bputs(unsigned long ip, const char *str);
-extern int __trace_puts(unsigned long ip, const char *str, int size);
+extern int __trace_puts(unsigned long ip, const char *str);
 
 extern void trace_dump_stack(int skip);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index baec63134ab6..e18005807395 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1178,11 +1178,10 @@ EXPORT_SYMBOL_GPL(__trace_array_puts);
  * __trace_puts - write a constant string into the trace buffer.
  * @ip:	   The address of the caller
  * @str:   The constant string to write
- * @size:  The size of the string.
  */
-int __trace_puts(unsigned long ip, const char *str, int size)
+int __trace_puts(unsigned long ip, const char *str)
 {
-	return __trace_array_puts(printk_trace, ip, str, size);
+	return __trace_array_puts(printk_trace, ip, str, strlen(str));
 }
 EXPORT_SYMBOL_GPL(__trace_puts);
 
@@ -1201,7 +1200,7 @@ int __trace_bputs(unsigned long ip, const char *str)
 	int size = sizeof(struct bputs_entry);
 
 	if (!printk_binsafe(tr))
-		return __trace_puts(ip, str, strlen(str));
+		return __trace_puts(ip, str);
 
 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
 		return 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b6d42fe06115..de4e6713b84e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2116,7 +2116,7 @@ extern void tracing_log_err(struct trace_array *tr,
  * about performance). The internal_trace_puts() is for such
  * a purpose.
  */
-#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str))
+#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str)
 
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, id, tstruct, print)	\

From bec261fec6d41318e414c4064f2b67c6db628acd Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@nvidia.com>
Date: Thu, 15 Jan 2026 23:25:09 -0500
Subject: [PATCH 078/107] tracing: move tracing declarations from kernel.h to a
 dedicated header

Tracing is a half of the kernel.h in terms of LOCs, although it's a
self-consistent part.  It is intended for quick debugging purposes and
isn't used by the normal tracing utilities.

Move it to a separate header.  If someone needs to just throw a
trace_printk() in their driver, they will not have to pull all the heavy
tracing machinery.

This is a pure move.

Link: https://lkml.kernel.org/r/20260116042510.241009-7-ynorov@nvidia.com
Signed-off-by: Yury Norov <ynorov@nvidia.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Aaron Tomlin <atomlin@atomlin.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel.h       | 196 +--------------------------------
 include/linux/trace_printk.h | 204 +++++++++++++++++++++++++++++++++++
 2 files changed, 205 insertions(+), 195 deletions(-)
 create mode 100644 include/linux/trace_printk.h

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5838c419ed37..e5570a16cbb1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -31,7 +31,7 @@
 #include <linux/build_bug.h>
 #include <linux/sprintf.h>
 #include <linux/static_call_types.h>
-#include <linux/instruction_pointer.h>
+#include <linux/trace_printk.h>
 #include <linux/util_macros.h>
 #include <linux/wordpart.h>
 
@@ -189,200 +189,6 @@ enum system_states {
 };
 extern enum system_states system_state;
 
-/*
- * General tracing related utility functions - trace_printk(),
- * tracing_on/tracing_off and tracing_start()/tracing_stop
- *
- * Use tracing_on/tracing_off when you want to quickly turn on or off
- * tracing. It simply enables or disables the recording of the trace events.
- * This also corresponds to the user space /sys/kernel/tracing/tracing_on
- * file, which gives a means for the kernel and userspace to interact.
- * Place a tracing_off() in the kernel where you want tracing to end.
- * From user space, examine the trace, and then echo 1 > tracing_on
- * to continue tracing.
- *
- * tracing_stop/tracing_start has slightly more overhead. It is used
- * by things like suspend to ram where disabling the recording of the
- * trace is not enough, but tracing must actually stop because things
- * like calling smp_processor_id() may crash the system.
- *
- * Most likely, you want to use tracing_on/tracing_off.
- */
-
-enum ftrace_dump_mode {
-	DUMP_NONE,
-	DUMP_ALL,
-	DUMP_ORIG,
-	DUMP_PARAM,
-};
-
-#ifdef CONFIG_TRACING
-void tracing_on(void);
-void tracing_off(void);
-int tracing_is_on(void);
-void tracing_snapshot(void);
-void tracing_snapshot_alloc(void);
-
-extern void tracing_start(void);
-extern void tracing_stop(void);
-
-static inline __printf(1, 2)
-void ____trace_printk_check_format(const char *fmt, ...)
-{
-}
-#define __trace_printk_check_format(fmt, args...)			\
-do {									\
-	if (0)								\
-		____trace_printk_check_format(fmt, ##args);		\
-} while (0)
-
-/**
- * trace_printk - printf formatting in the ftrace buffer
- * @fmt: the printf format for printing
- *
- * Note: __trace_printk is an internal function for trace_printk() and
- *       the @ip is passed in via the trace_printk() macro.
- *
- * This function allows a kernel developer to debug fast path sections
- * that printk is not appropriate for. By scattering in various
- * printk like tracing in the code, a developer can quickly see
- * where problems are occurring.
- *
- * This is intended as a debugging tool for the developer only.
- * Please refrain from leaving trace_printks scattered around in
- * your code. (Extra memory is used for special buffers that are
- * allocated when trace_printk() is used.)
- *
- * A little optimization trick is done here. If there's only one
- * argument, there's no need to scan the string for printf formats.
- * The trace_puts() will suffice. But how can we take advantage of
- * using trace_puts() when trace_printk() has only one argument?
- * By stringifying the args and checking the size we can tell
- * whether or not there are args. __stringify((__VA_ARGS__)) will
- * turn into "()\0" with a size of 3 when there are no args, anything
- * else will be bigger. All we need to do is define a string to this,
- * and then take its size and compare to 3. If it's bigger, use
- * do_trace_printk() otherwise, optimize it to trace_puts(). Then just
- * let gcc optimize the rest.
- */
-
-#define trace_printk(fmt, ...)				\
-do {							\
-	char _______STR[] = __stringify((__VA_ARGS__));	\
-	if (sizeof(_______STR) > 3)			\
-		do_trace_printk(fmt, ##__VA_ARGS__);	\
-	else						\
-		trace_puts(fmt);			\
-} while (0)
-
-#define do_trace_printk(fmt, args...)					\
-do {									\
-	static const char *trace_printk_fmt __used			\
-		__section("__trace_printk_fmt") =			\
-		__builtin_constant_p(fmt) ? fmt : NULL;			\
-									\
-	__trace_printk_check_format(fmt, ##args);			\
-									\
-	if (__builtin_constant_p(fmt))					\
-		__trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args);	\
-	else								\
-		__trace_printk(_THIS_IP_, fmt, ##args);			\
-} while (0)
-
-extern __printf(2, 3)
-int __trace_bprintk(unsigned long ip, const char *fmt, ...);
-
-extern __printf(2, 3)
-int __trace_printk(unsigned long ip, const char *fmt, ...);
-
-/**
- * trace_puts - write a string into the ftrace buffer
- * @str: the string to record
- *
- * Note: __trace_bputs is an internal function for trace_puts and
- *       the @ip is passed in via the trace_puts macro.
- *
- * This is similar to trace_printk() but is made for those really fast
- * paths that a developer wants the least amount of "Heisenbug" effects,
- * where the processing of the print format is still too much.
- *
- * This function allows a kernel developer to debug fast path sections
- * that printk is not appropriate for. By scattering in various
- * printk like tracing in the code, a developer can quickly see
- * where problems are occurring.
- *
- * This is intended as a debugging tool for the developer only.
- * Please refrain from leaving trace_puts scattered around in
- * your code. (Extra memory is used for special buffers that are
- * allocated when trace_puts() is used.)
- *
- * Returns: 0 if nothing was written, positive # if string was.
- *  (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
- */
-
-#define trace_puts(str) ({						\
-	static const char *trace_printk_fmt __used			\
-		__section("__trace_printk_fmt") =			\
-		__builtin_constant_p(str) ? str : NULL;			\
-									\
-	if (__builtin_constant_p(str))					\
-		__trace_bputs(_THIS_IP_, trace_printk_fmt);		\
-	else								\
-		__trace_puts(_THIS_IP_, str);				\
-})
-extern int __trace_bputs(unsigned long ip, const char *str);
-extern int __trace_puts(unsigned long ip, const char *str);
-
-extern void trace_dump_stack(int skip);
-
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement.
- */
-#define ftrace_vprintk(fmt, vargs)					\
-do {									\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt __used		\
-		  __section("__trace_printk_fmt") =			\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
-									\
-		__ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs);	\
-	} else								\
-		__ftrace_vprintk(_THIS_IP_, fmt, vargs);		\
-} while (0)
-
-extern __printf(2, 0) int
-__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap);
-
-extern __printf(2, 0) int
-__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
-
-extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
-#else
-static inline void tracing_start(void) { }
-static inline void tracing_stop(void) { }
-static inline void trace_dump_stack(int skip) { }
-
-static inline void tracing_on(void) { }
-static inline void tracing_off(void) { }
-static inline int tracing_is_on(void) { return 0; }
-static inline void tracing_snapshot(void) { }
-static inline void tracing_snapshot_alloc(void) { }
-
-static inline __printf(1, 2)
-int trace_printk(const char *fmt, ...)
-{
-	return 0;
-}
-static __printf(1, 0) inline int
-ftrace_vprintk(const char *fmt, va_list ap)
-{
-	return 0;
-}
-static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
-#endif /* CONFIG_TRACING */
-
 /* Rebuild everything on CONFIG_DYNAMIC_FTRACE */
 #ifdef CONFIG_DYNAMIC_FTRACE
 # define REBUILD_DUE_TO_DYNAMIC_FTRACE
diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h
new file mode 100644
index 000000000000..bb5874097f24
--- /dev/null
+++ b/include/linux/trace_printk.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_TRACE_PRINTK_H
+#define _LINUX_TRACE_PRINTK_H
+
+#include <linux/compiler_attributes.h>
+#include <linux/instruction_pointer.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+
+/*
+ * General tracing related utility functions - trace_printk(),
+ * tracing_on/tracing_off and tracing_start()/tracing_stop
+ *
+ * Use tracing_on/tracing_off when you want to quickly turn on or off
+ * tracing. It simply enables or disables the recording of the trace events.
+ * This also corresponds to the user space /sys/kernel/tracing/tracing_on
+ * file, which gives a means for the kernel and userspace to interact.
+ * Place a tracing_off() in the kernel where you want tracing to end.
+ * From user space, examine the trace, and then echo 1 > tracing_on
+ * to continue tracing.
+ *
+ * tracing_stop/tracing_start has slightly more overhead. It is used
+ * by things like suspend to ram where disabling the recording of the
+ * trace is not enough, but tracing must actually stop because things
+ * like calling smp_processor_id() may crash the system.
+ *
+ * Most likely, you want to use tracing_on/tracing_off.
+ */
+
+enum ftrace_dump_mode {
+	DUMP_NONE,
+	DUMP_ALL,
+	DUMP_ORIG,
+	DUMP_PARAM,
+};
+
+#ifdef CONFIG_TRACING
+void tracing_on(void);
+void tracing_off(void);
+int tracing_is_on(void);
+void tracing_snapshot(void);
+void tracing_snapshot_alloc(void);
+
+extern void tracing_start(void);
+extern void tracing_stop(void);
+
+static inline __printf(1, 2)
+void ____trace_printk_check_format(const char *fmt, ...)
+{
+}
+#define __trace_printk_check_format(fmt, args...)			\
+do {									\
+	if (0)								\
+		____trace_printk_check_format(fmt, ##args);		\
+} while (0)
+
+/**
+ * trace_printk - printf formatting in the ftrace buffer
+ * @fmt: the printf format for printing
+ *
+ * Note: __trace_printk is an internal function for trace_printk() and
+ *       the @ip is passed in via the trace_printk() macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving trace_printks scattered around in
+ * your code. (Extra memory is used for special buffers that are
+ * allocated when trace_printk() is used.)
+ *
+ * A little optimization trick is done here. If there's only one
+ * argument, there's no need to scan the string for printf formats.
+ * The trace_puts() will suffice. But how can we take advantage of
+ * using trace_puts() when trace_printk() has only one argument?
+ * By stringifying the args and checking the size we can tell
+ * whether or not there are args. __stringify((__VA_ARGS__)) will
+ * turn into "()\0" with a size of 3 when there are no args, anything
+ * else will be bigger. All we need to do is define a string to this,
+ * and then take its size and compare to 3. If it's bigger, use
+ * do_trace_printk() otherwise, optimize it to trace_puts(). Then just
+ * let gcc optimize the rest.
+ */
+
+#define trace_printk(fmt, ...)				\
+do {							\
+	char _______STR[] = __stringify((__VA_ARGS__));	\
+	if (sizeof(_______STR) > 3)			\
+		do_trace_printk(fmt, ##__VA_ARGS__);	\
+	else						\
+		trace_puts(fmt);			\
+} while (0)
+
+#define do_trace_printk(fmt, args...)					\
+do {									\
+	static const char *trace_printk_fmt __used			\
+		__section("__trace_printk_fmt") =			\
+		__builtin_constant_p(fmt) ? fmt : NULL;			\
+									\
+	__trace_printk_check_format(fmt, ##args);			\
+									\
+	if (__builtin_constant_p(fmt))					\
+		__trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args);	\
+	else								\
+		__trace_printk(_THIS_IP_, fmt, ##args);			\
+} while (0)
+
+extern __printf(2, 3)
+int __trace_bprintk(unsigned long ip, const char *fmt, ...);
+
+extern __printf(2, 3)
+int __trace_printk(unsigned long ip, const char *fmt, ...);
+
+/**
+ * trace_puts - write a string into the ftrace buffer
+ * @str: the string to record
+ *
+ * Note: __trace_bputs is an internal function for trace_puts and
+ *       the @ip is passed in via the trace_puts macro.
+ *
+ * This is similar to trace_printk() but is made for those really fast
+ * paths that a developer wants the least amount of "Heisenbug" effects,
+ * where the processing of the print format is still too much.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving trace_puts scattered around in
+ * your code. (Extra memory is used for special buffers that are
+ * allocated when trace_puts() is used.)
+ *
+ * Returns: 0 if nothing was written, positive # if string was.
+ *  (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
+ */
+
+#define trace_puts(str) ({						\
+	static const char *trace_printk_fmt __used			\
+		__section("__trace_printk_fmt") =			\
+		__builtin_constant_p(str) ? str : NULL;			\
+									\
+	if (__builtin_constant_p(str))					\
+		__trace_bputs(_THIS_IP_, trace_printk_fmt);		\
+	else								\
+		__trace_puts(_THIS_IP_, str);				\
+})
+extern int __trace_bputs(unsigned long ip, const char *str);
+extern int __trace_puts(unsigned long ip, const char *str);
+
+extern void trace_dump_stack(int skip);
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement.
+ */
+#define ftrace_vprintk(fmt, vargs)					\
+do {									\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt __used		\
+		  __section("__trace_printk_fmt") =			\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs);	\
+	} else								\
+		__ftrace_vprintk(_THIS_IP_, fmt, vargs);		\
+} while (0)
+
+extern __printf(2, 0) int
+__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap);
+
+extern __printf(2, 0) int
+__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
+
+extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
+#else
+static inline void tracing_start(void) { }
+static inline void tracing_stop(void) { }
+static inline void trace_dump_stack(int skip) { }
+
+static inline void tracing_on(void) { }
+static inline void tracing_off(void) { }
+static inline int tracing_is_on(void) { return 0; }
+static inline void tracing_snapshot(void) { }
+static inline void tracing_snapshot_alloc(void) { }
+
+static inline __printf(1, 2)
+int trace_printk(const char *fmt, ...)
+{
+	return 0;
+}
+static __printf(1, 0) inline int
+ftrace_vprintk(const char *fmt, va_list ap)
+{
+	return 0;
+}
+static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
+#endif /* CONFIG_TRACING */
+
+#endif

From cc20650a096370469919be0eb3b041fc5aa47b39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 17 Jan 2026 08:34:47 +0000
Subject: [PATCH 079/107] scripts/bloat-o-meter: ignore __noinstr_text_start

__noinstr_text_start is adding noise to the script, ignore it.

For instance using __always_inline on __skb_incr_checksum_unnecessary and
CC=clang build.

Before this patch, __noinstr_text_start can show up and confuse us.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/2 grow/shrink: 3/0 up/down: 212/-206 (6)
Function                                     old     new   delta
tcp6_gro_complete                            208     283     +75
tcp4_gro_complete                            376     449     +73
__noinstr_text_start                        3536    3600     +64
__pfx___skb_incr_checksum_unnecessary         32       -     -32
__skb_incr_checksum_unnecessary              174       -    -174
Total: Before=25509464, After=25509470, chg +0.00%

After this patch we have a more precise result.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/2 grow/shrink: 2/0 up/down: 148/-206 (-58)
Function                                     old     new   delta
tcp6_gro_complete                            208     283     +75
tcp4_gro_complete                            376     449     +73
__pfx___skb_incr_checksum_unnecessary         32       -     -32
__skb_incr_checksum_unnecessary              174       -    -174
Total: Before=25505928, After=25505870, chg -0.00%

Link: https://lkml.kernel.org/r/20260117083448.3877418-1-edumazet@google.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 scripts/bloat-o-meter | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index 888ce286a351..db5dd18dc2d5 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -42,6 +42,7 @@ def getsizes(file, format):
                 if name.startswith("__se_sys"): continue
                 if name.startswith("__se_compat_sys"): continue
                 if name.startswith("__addressable_"): continue
+                if name.startswith("__noinstr_text_start"): continue
                 if name == "linux_banner": continue
                 if name == "vermagic": continue
                 # statics and some other optimizations adds random .NUMBER

From 503efe850c7463a1e59df133b84461ef53c0361f Mon Sep 17 00:00:00 2001
From: Wang Yaxin <wang.yaxin@zte.com.cn>
Date: Mon, 19 Jan 2026 10:02:41 +0800
Subject: [PATCH 080/107] delayacct: add timestamp of delay max

Problem
=======
Commit 658eb5ab916d ("delayacct: add delay max to record delay peak")
introduced the delay max for getdelays, which records abnormal latency
peaks and helps us understand the magnitude of such delays.  However, the
peak latency value alone is insufficient for effective root cause
analysis.  Without the precise timestamp of when the peak occurred, we
still lack the critical context needed to correlate it with other system
events.

Solution
========
To address this, we need to additionally record a precise timestamp when
the maximum latency occurs.  By correlating this timestamp with system
logs and monitoring metrics, we can identify processes with abnormal
resource usage at the same moment, which can help us to pinpoint root
causes.

Use Case
========
bash-4.4# ./getdelays -d -t 227
print delayacct stats ON
TGID    227
CPU         count     real total  virtual total    delay total  delay average      delay max      delay min      delay max timestamp
               46      188000000      192348334        4098012          0.089ms     0.429260ms     0.051205ms    2026-01-15T15:06:58
IO          count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
SWAP        count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
RECLAIM     count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
THRAS HING   count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
COMPACT     count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
WPCOPY      count    delay total  delay average      delay max      delay min      delay max timestamp
              182       19413338          0.107ms     0.547353ms     0.022462ms    2026-01-15T15:05:24
IRQ         count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A

Link: https://lkml.kernel.org/r/20260119100241520gWubW8-5QfhSf9gjqcc_E@zte.com.cn
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/accounting/delay-accounting.rst |  32 ++--
 include/linux/delayacct.h                     |   8 +
 include/linux/sched.h                         |   5 +
 include/uapi/linux/taskstats.h                |  22 ++-
 kernel/delayacct.c                            |  31 +++-
 kernel/sched/stats.h                          |   8 +-
 tools/accounting/getdelays.c                  | 172 +++++++++++++++---
 7 files changed, 223 insertions(+), 55 deletions(-)

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 86d7902a657f..e209c46241b0 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -107,22 +107,22 @@ Get sum and peak of delays, since system boot, for all pids with tgid 242::
 	TGID    242
 
 
-	CPU         count     real total  virtual total    delay total  delay average      delay max      delay min
-	               39      156000000      156576579        2111069          0.054ms     0.212296ms     0.031307ms
-	IO          count    delay total  delay average      delay max      delay min
-	                0              0          0.000ms     0.000000ms     0.000000ms
-	SWAP        count    delay total  delay average      delay max      delay min
-	                0              0          0.000ms     0.000000ms     0.000000ms
-	RECLAIM     count    delay total  delay average      delay max      delay min
-	                0              0          0.000ms     0.000000ms     0.000000ms
-	THRASHING   count    delay total  delay average      delay max      delay min
-	                0              0          0.000ms     0.000000ms     0.000000ms
-	COMPACT     count    delay total  delay average      delay max      delay min
-	                0              0          0.000ms     0.000000ms     0.000000ms
-	WPCOPY      count    delay total  delay average      delay max      delay min
-	              156       11215873          0.072ms     0.207403ms     0.033913ms
-	IRQ         count    delay total  delay average      delay max      delay min
-	                0              0          0.000ms     0.000000ms     0.000000ms
+	CPU         count     real total  virtual total    delay total  delay average      delay max      delay min      delay max timestamp
+	               46      188000000      192348334        4098012          0.089ms     0.429260ms     0.051205ms    2026-01-15T15:06:58
+	IO          count    delay total  delay average      delay max      delay min      delay max timestamp
+	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
+	SWAP        count    delay total  delay average      delay max      delay min      delay max timestamp
+	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
+	RECLAIM     count    delay total  delay average      delay max      delay min      delay max timestamp
+	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
+	THRASHING   count    delay total  delay average      delay max      delay min      delay max timestamp
+	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
+	COMPACT     count    delay total  delay average      delay max      delay min      delay max timestamp
+	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
+	WPCOPY      count    delay total  delay average      delay max      delay min      delay max timestamp
+	              182       19413338          0.107ms     0.547353ms     0.022462ms    2026-01-15T15:05:24
+	IRQ         count    delay total  delay average      delay max      delay min      delay max timestamp
+	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
 
 Get IO accounting for pid 1, it works only with -p::
 
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 800dcc360db2..ecb06f16d22c 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -69,6 +69,14 @@ struct task_delay_info {
 	u32 compact_count;	/* total count of memory compact */
 	u32 wpcopy_count;	/* total count of write-protect copy */
 	u32 irq_count;	/* total count of IRQ/SOFTIRQ */
+
+	struct timespec64 blkio_delay_max_ts;
+	struct timespec64 swapin_delay_max_ts;
+	struct timespec64 freepages_delay_max_ts;
+	struct timespec64 thrashing_delay_max_ts;
+	struct timespec64 compact_delay_max_ts;
+	struct timespec64 wpcopy_delay_max_ts;
+	struct timespec64 irq_delay_max_ts;
 };
 #endif
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index da0133524d08..1d22b6229b95 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -49,6 +49,7 @@
 #include <linux/tracepoint-defs.h>
 #include <linux/unwind_deferred_types.h>
 #include <asm/kmap_size.h>
+#include <linux/time64.h>
 #ifndef COMPILE_OFFSETS
 #include <generated/rq-offsets.h>
 #endif
@@ -86,6 +87,7 @@ struct signal_struct;
 struct task_delay_info;
 struct task_group;
 struct task_struct;
+struct timespec64;
 struct user_event_mm;
 
 #include <linux/sched/ext.h>
@@ -435,6 +437,9 @@ struct sched_info {
 	/* When were we last queued to run? */
 	unsigned long long		last_queued;
 
+	/* Timestamp of max time spent waiting on a runqueue: */
+	struct timespec64		max_run_delay_ts;
+
 #endif /* CONFIG_SCHED_INFO */
 };
 
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index 5929030d4e8b..1b31e8e14d2f 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -18,6 +18,16 @@
 #define _LINUX_TASKSTATS_H
 
 #include <linux/types.h>
+#ifdef __KERNEL__
+#include <linux/time64.h>
+#else
+#ifndef _LINUX_TIME64_H
+struct timespec64 {
+	__s64   tv_sec;         /* seconds */
+	long    tv_nsec;        /* nanoseconds */
+};
+#endif
+#endif
 
 /* Format for per-task data returned to userland when
  *	- a task exits
@@ -34,7 +44,7 @@
  */
 
 
-#define TASKSTATS_VERSION	16
+#define TASKSTATS_VERSION	17
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -230,6 +240,16 @@ struct taskstats {
 
 	__u64	irq_delay_max;
 	__u64	irq_delay_min;
+
+	/*v17: delay max timestamp record*/
+	struct timespec64 cpu_delay_max_ts;
+	struct timespec64 blkio_delay_max_ts;
+	struct timespec64 swapin_delay_max_ts;
+	struct timespec64 freepages_delay_max_ts;
+	struct timespec64 thrashing_delay_max_ts;
+	struct timespec64 compact_delay_max_ts;
+	struct timespec64 wpcopy_delay_max_ts;
+	struct timespec64 irq_delay_max_ts;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 30e7912ebb0d..d58ffc63bcba 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -18,6 +18,7 @@
 do { \
 	d->type##_delay_max = tsk->delays->type##_delay_max; \
 	d->type##_delay_min = tsk->delays->type##_delay_min; \
+	d->type##_delay_max_ts = tsk->delays->type##_delay_max_ts; \
 	tmp = d->type##_delay_total + tsk->delays->type##_delay; \
 	d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
 	d->type##_count += tsk->delays->type##_count; \
@@ -104,7 +105,8 @@ void __delayacct_tsk_init(struct task_struct *tsk)
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumulator (@total) and @count
  */
-static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count,
+							 u64 *max, u64 *min, struct timespec64 *ts)
 {
 	s64 ns = local_clock() - *start;
 	unsigned long flags;
@@ -113,8 +115,10 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou
 		raw_spin_lock_irqsave(lock, flags);
 		*total += ns;
 		(*count)++;
-		if (ns > *max)
+		if (ns > *max) {
 			*max = ns;
+			ktime_get_real_ts64(ts);
+		}
 		if (*min == 0 || ns < *min)
 			*min = ns;
 		raw_spin_unlock_irqrestore(lock, flags);
@@ -137,7 +141,8 @@ void __delayacct_blkio_end(struct task_struct *p)
 		      &p->delays->blkio_delay,
 		      &p->delays->blkio_count,
 		      &p->delays->blkio_delay_max,
-		      &p->delays->blkio_delay_min);
+		      &p->delays->blkio_delay_min,
+		      &p->delays->blkio_delay_max_ts);
 }
 
 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -170,6 +175,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 
 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
 	d->cpu_delay_min = tsk->sched_info.min_run_delay;
+	d->cpu_delay_max_ts = tsk->sched_info.max_run_delay_ts;
 	tmp = (s64)d->cpu_delay_total + t2;
 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
 	tmp = (s64)d->cpu_run_virtual_total + t3;
@@ -217,7 +223,8 @@ void __delayacct_freepages_end(void)
 		      &current->delays->freepages_delay,
 		      &current->delays->freepages_count,
 		      &current->delays->freepages_delay_max,
-		      &current->delays->freepages_delay_min);
+		      &current->delays->freepages_delay_min,
+		      &current->delays->freepages_delay_max_ts);
 }
 
 void __delayacct_thrashing_start(bool *in_thrashing)
@@ -241,7 +248,8 @@ void __delayacct_thrashing_end(bool *in_thrashing)
 		      &current->delays->thrashing_delay,
 		      &current->delays->thrashing_count,
 		      &current->delays->thrashing_delay_max,
-		      &current->delays->thrashing_delay_min);
+		      &current->delays->thrashing_delay_min,
+		      &current->delays->thrashing_delay_max_ts);
 }
 
 void __delayacct_swapin_start(void)
@@ -256,7 +264,8 @@ void __delayacct_swapin_end(void)
 		      &current->delays->swapin_delay,
 		      &current->delays->swapin_count,
 		      &current->delays->swapin_delay_max,
-		      &current->delays->swapin_delay_min);
+		      &current->delays->swapin_delay_min,
+		      &current->delays->swapin_delay_max_ts);
 }
 
 void __delayacct_compact_start(void)
@@ -271,7 +280,8 @@ void __delayacct_compact_end(void)
 		      &current->delays->compact_delay,
 		      &current->delays->compact_count,
 		      &current->delays->compact_delay_max,
-		      &current->delays->compact_delay_min);
+		      &current->delays->compact_delay_min,
+		      &current->delays->compact_delay_max_ts);
 }
 
 void __delayacct_wpcopy_start(void)
@@ -286,7 +296,8 @@ void __delayacct_wpcopy_end(void)
 		      &current->delays->wpcopy_delay,
 		      &current->delays->wpcopy_count,
 		      &current->delays->wpcopy_delay_max,
-		      &current->delays->wpcopy_delay_min);
+		      &current->delays->wpcopy_delay_min,
+		      &current->delays->wpcopy_delay_max_ts);
 }
 
 void __delayacct_irq(struct task_struct *task, u32 delta)
@@ -296,8 +307,10 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
 	raw_spin_lock_irqsave(&task->delays->lock, flags);
 	task->delays->irq_delay += delta;
 	task->delays->irq_count++;
-	if (delta > task->delays->irq_delay_max)
+	if (delta > task->delays->irq_delay_max) {
 		task->delays->irq_delay_max = delta;
+		ktime_get_real_ts64(&task->delays->irq_delay_max_ts);
+	}
 	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
 		task->delays->irq_delay_min = delta;
 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index c903f1a42891..a612cf253c87 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -253,8 +253,10 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 	delta = rq_clock(rq) - t->sched_info.last_queued;
 	t->sched_info.last_queued = 0;
 	t->sched_info.run_delay += delta;
-	if (delta > t->sched_info.max_run_delay)
+	if (delta > t->sched_info.max_run_delay) {
 		t->sched_info.max_run_delay = delta;
+		ktime_get_real_ts64(&t->sched_info.max_run_delay_ts);
+	}
 	if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay))
 		t->sched_info.min_run_delay = delta;
 	rq_sched_info_dequeue(rq, delta);
@@ -278,8 +280,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
-	if (delta > t->sched_info.max_run_delay)
+	if (delta > t->sched_info.max_run_delay) {
 		t->sched_info.max_run_delay = delta;
+		ktime_get_real_ts64(&t->sched_info.max_run_delay_ts);
+	}
 	if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay))
 		t->sched_info.min_run_delay = delta;
 
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 21cb3c3d1331..64796c0223be 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -24,6 +24,7 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <signal.h>
+#include <time.h>
 
 #include <linux/genetlink.h>
 #include <linux/taskstats.h>
@@ -194,6 +195,37 @@ static int get_family_id(int sd)
 #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
 #define delay_ms(t) (t / 1000000ULL)
 
+/*
+ * Format timespec64 to human readable string (YYYY-MM-DD HH:MM:SS)
+ * Returns formatted string or "N/A" if timestamp is zero
+ */
+static const char *format_timespec64(struct timespec64 *ts)
+{
+	static char buffer[32];
+	struct tm tm_info;
+	time_t time_sec;
+
+	/* Check if timestamp is zero (not set) */
+	if (ts->tv_sec == 0 && ts->tv_nsec == 0)
+		return "N/A";
+
+	time_sec = (time_t)ts->tv_sec;
+
+	/* Use thread-safe localtime_r */
+	if (localtime_r(&time_sec, &tm_info) == NULL)
+		return "N/A";
+
+	snprintf(buffer, sizeof(buffer), "%04d-%02d-%02dT%02d:%02d:%02d",
+		tm_info.tm_year + 1900,
+		tm_info.tm_mon + 1,
+		tm_info.tm_mday,
+		tm_info.tm_hour,
+		tm_info.tm_min,
+		tm_info.tm_sec);
+
+	return buffer;
+}
+
 /*
  * Version compatibility note:
  * Field availability depends on taskstats version (t->version),
@@ -205,13 +237,28 @@ static int get_family_id(int sd)
  * version >= 13  - supports WPCOPY statistics
  * version >= 14  - supports IRQ statistics
  * version >= 16  - supports *_max and *_min delay statistics
+ * version >= 17  - supports delay max timestamp statistics
  *
  * Always verify version before accessing version-dependent fields
  * to maintain backward compatibility.
  */
 #define PRINT_CPU_DELAY(version, t) \
 	do { \
-		if (version >= 16) { \
+		if (version >= 17) { \
+			printf("%-10s%15s%15s%15s%15s%15s%15s%15s%25s\n", \
+				"CPU", "count", "real total", "virtual total", \
+				"delay total", "delay average", "delay max", \
+				"delay min", "delay max timestamp"); \
+			printf("          %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms%23s\n", \
+				(unsigned long long)(t)->cpu_count, \
+				(unsigned long long)(t)->cpu_run_real_total, \
+				(unsigned long long)(t)->cpu_run_virtual_total, \
+				(unsigned long long)(t)->cpu_delay_total, \
+				average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \
+				delay_ms((double)(t)->cpu_delay_max), \
+				delay_ms((double)(t)->cpu_delay_min), \
+				format_timespec64(&(t)->cpu_delay_max_ts)); \
+		} else if (version >= 16) { \
 			printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \
 				"CPU", "count", "real total", "virtual total", \
 				"delay total", "delay average", "delay max", "delay min"); \
@@ -257,44 +304,115 @@ static int get_family_id(int sd)
 		} \
 	} while (0)
 
+#define PRINT_FILED_DELAY_WITH_TS(name, version, t, count, total, max, min, max_ts) \
+	do { \
+		if (version >= 17) { \
+			printf("%-10s%15s%15s%15s%15s%15s%25s\n", \
+				name, "count", "delay total", "delay average", \
+				"delay max", "delay min", "delay max timestamp"); \
+			printf("          %15llu%15llu%15.3fms%13.6fms%13.6fms%23s\n", \
+				(unsigned long long)(t)->count, \
+				(unsigned long long)(t)->total, \
+				average_ms((double)(t)->total, (t)->count), \
+				delay_ms((double)(t)->max), \
+				delay_ms((double)(t)->min), \
+				format_timespec64(&(t)->max_ts)); \
+		} else if (version >= 16) { \
+			printf("%-10s%15s%15s%15s%15s%15s\n", \
+				name, "count", "delay total", "delay average", \
+				"delay max", "delay min"); \
+			printf("          %15llu%15llu%15.3fms%13.6fms%13.6fms\n", \
+				(unsigned long long)(t)->count, \
+				(unsigned long long)(t)->total, \
+				average_ms((double)(t)->total, (t)->count), \
+				delay_ms((double)(t)->max), \
+				delay_ms((double)(t)->min)); \
+		} else { \
+			printf("%-10s%15s%15s%15s\n", \
+				name, "count", "delay total", "delay average"); \
+			printf("          %15llu%15llu%15.3fms\n", \
+				(unsigned long long)(t)->count, \
+				(unsigned long long)(t)->total, \
+				average_ms((double)(t)->total, (t)->count)); \
+		} \
+	} while (0)
+
 static void print_delayacct(struct taskstats *t)
 {
 	printf("\n\n");
 
 	PRINT_CPU_DELAY(t->version, t);
 
-	PRINT_FILED_DELAY("IO", t->version, t,
-		blkio_count, blkio_delay_total,
-		blkio_delay_max, blkio_delay_min);
+	/* Use new macro with timestamp support for version >= 17 */
+	if (t->version >= 17) {
+		PRINT_FILED_DELAY_WITH_TS("IO", t->version, t,
+			blkio_count, blkio_delay_total,
+			blkio_delay_max, blkio_delay_min, blkio_delay_max_ts);
 
-	PRINT_FILED_DELAY("SWAP", t->version, t,
-		swapin_count, swapin_delay_total,
-		swapin_delay_max, swapin_delay_min);
+		PRINT_FILED_DELAY_WITH_TS("SWAP", t->version, t,
+			swapin_count, swapin_delay_total,
+			swapin_delay_max, swapin_delay_min, swapin_delay_max_ts);
 
-	PRINT_FILED_DELAY("RECLAIM", t->version, t,
-		freepages_count, freepages_delay_total,
-		freepages_delay_max, freepages_delay_min);
+		PRINT_FILED_DELAY_WITH_TS("RECLAIM", t->version, t,
+			freepages_count, freepages_delay_total,
+			freepages_delay_max, freepages_delay_min, freepages_delay_max_ts);
 
-	PRINT_FILED_DELAY("THRASHING", t->version, t,
-		thrashing_count, thrashing_delay_total,
-		thrashing_delay_max, thrashing_delay_min);
+		PRINT_FILED_DELAY_WITH_TS("THRASHING", t->version, t,
+			thrashing_count, thrashing_delay_total,
+			thrashing_delay_max, thrashing_delay_min, thrashing_delay_max_ts);
 
-	if (t->version >= 11) {
-		PRINT_FILED_DELAY("COMPACT", t->version, t,
-			compact_count, compact_delay_total,
-			compact_delay_max, compact_delay_min);
-	}
+		if (t->version >= 11) {
+			PRINT_FILED_DELAY_WITH_TS("COMPACT", t->version, t,
+				compact_count, compact_delay_total,
+				compact_delay_max, compact_delay_min, compact_delay_max_ts);
+		}
 
-	if (t->version >= 13) {
-		PRINT_FILED_DELAY("WPCOPY", t->version, t,
-			wpcopy_count, wpcopy_delay_total,
-			wpcopy_delay_max, wpcopy_delay_min);
-	}
+		if (t->version >= 13) {
+			PRINT_FILED_DELAY_WITH_TS("WPCOPY", t->version, t,
+				wpcopy_count, wpcopy_delay_total,
+				wpcopy_delay_max, wpcopy_delay_min, wpcopy_delay_max_ts);
+		}
 
-	if (t->version >= 14) {
-		PRINT_FILED_DELAY("IRQ", t->version, t,
-			irq_count, irq_delay_total,
-			irq_delay_max, irq_delay_min);
+		if (t->version >= 14) {
+			PRINT_FILED_DELAY_WITH_TS("IRQ", t->version, t,
+				irq_count, irq_delay_total,
+				irq_delay_max, irq_delay_min, irq_delay_max_ts);
+		}
+	} else {
+		/* Use original macro for older versions */
+		PRINT_FILED_DELAY("IO", t->version, t,
+			blkio_count, blkio_delay_total,
+			blkio_delay_max, blkio_delay_min);
+
+		PRINT_FILED_DELAY("SWAP", t->version, t,
+			swapin_count, swapin_delay_total,
+			swapin_delay_max, swapin_delay_min);
+
+		PRINT_FILED_DELAY("RECLAIM", t->version, t,
+			freepages_count, freepages_delay_total,
+			freepages_delay_max, freepages_delay_min);
+
+		PRINT_FILED_DELAY("THRASHING", t->version, t,
+			thrashing_count, thrashing_delay_total,
+			thrashing_delay_max, thrashing_delay_min);
+
+		if (t->version >= 11) {
+			PRINT_FILED_DELAY("COMPACT", t->version, t,
+				compact_count, compact_delay_total,
+				compact_delay_max, compact_delay_min);
+		}
+
+		if (t->version >= 13) {
+			PRINT_FILED_DELAY("WPCOPY", t->version, t,
+				wpcopy_count, wpcopy_delay_total,
+				wpcopy_delay_max, wpcopy_delay_min);
+		}
+
+		if (t->version >= 14) {
+			PRINT_FILED_DELAY("IRQ", t->version, t,
+				irq_count, irq_delay_total,
+				irq_delay_max, irq_delay_min);
+		}
 	}
 }
 

From 666183dcdd9ad3b8156a1df7f204f728f720380f Mon Sep 17 00:00:00 2001
From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
Date: Wed, 21 Jan 2026 09:35:08 +0800
Subject: [PATCH 081/107] rapidio: replace rio_free_net() with kfree() in
 rio_scan_alloc_net()

When idtab allocation fails, net is not registered with rio_add_net() yet,
so kfree(net) is sufficient to release the memory.  Set mport->net to NULL
to avoid dangling pointer.

Link: https://lkml.kernel.org/r/20260121013508.195836-1-lihaoxiang@isrc.iscas.ac.cn
Fixes: e6b585ca6e81 ("rapidio: move net allocation into core code")
Signed-off-by: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/rapidio/rio-scan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index c12941f71e2c..dcd6619a4b02 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -854,7 +854,8 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
 
 		if (idtab == NULL) {
 			pr_err("RIO: failed to allocate destID table\n");
-			rio_free_net(net);
+			kfree(net);
+			mport->net = NULL;
 			net = NULL;
 		} else {
 			net->enum_data = idtab;

From 5138c936c2c82c9be8883921854bc6f7e1177d8c Mon Sep 17 00:00:00 2001
From: Heming Zhao <heming.zhao@suse.com>
Date: Wed, 10 Dec 2025 09:57:24 +0800
Subject: [PATCH 082/107] ocfs2: fix reflink preserve cleanup issue

commit c06c303832ec ("ocfs2: fix xattr array entry __counted_by error")
doesn't handle all cases and the cleanup job for preserved xattr entries
still has bug:
- the 'last' pointer should be shifted by one unit after cleanup
  an array entry.
- current code logic doesn't cleanup the first entry when xh_count is 1.

Note, commit c06c303832ec is also a bug fix for 0fe9b66c65f3.

Link: https://lkml.kernel.org/r/20251210015725.8409-2-heming.zhao@suse.com
Fixes: 0fe9b66c65f3 ("ocfs2: Add preserve to reflink.")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/xattr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 5fd85f517868..e434a62dd69f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6395,6 +6395,10 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 					(void *)last - (void *)xe);
 				memset(last, 0,
 				       sizeof(struct ocfs2_xattr_entry));
+				last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1;
+			} else {
+				memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
+				last = NULL;
 			}
 
 			/*

From 480e1d5c64bb14441f79f2eb9421d5e26f91ea3d Mon Sep 17 00:00:00 2001
From: Li Chen <me@linux.beauty>
Date: Tue, 20 Jan 2026 20:40:04 +0800
Subject: [PATCH 083/107] kexec: derive purgatory entry from symbol

kexec_load_purgatory() derives image->start by locating e_entry inside an
SHF_EXECINSTR section.  If the purgatory object contains multiple
executable sections with overlapping sh_addr, the entrypoint check can
match more than once and trigger a WARN.

Derive the entry section from the purgatory_start symbol when present and
compute image->start from its final placement.  Keep the existing e_entry
fallback for purgatories that do not expose the symbol.

WARNING: kernel/kexec_file.c:1009 at kexec_load_purgatory+0x395/0x3c0, CPU#10: kexec/1784
Call Trace:
 <TASK>
 bzImage64_load+0x133/0xa00
 __do_sys_kexec_file_load+0x2b3/0x5c0
 do_syscall_64+0x81/0x610
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

[me@linux.beauty: move helper to avoid forward declaration, per Baoquan]
  Link: https://lkml.kernel.org/r/20260128043511.316860-1-me@linux.beauty
Link: https://lkml.kernel.org/r/20260120124005.148381-1-me@linux.beauty
Fixes: 8652d44f466a ("kexec: support purgatories with .text.hot sections")
Signed-off-by: Li Chen <me@linux.beauty>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Eric Biggers <ebiggers@kernel.org>
Cc: Li Chen <me@linux.beauty>
Cc: Philipp Rudo <prudo@redhat.com>
Cc: Ricardo Ribalda Delgado <ribalda@chromium.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kexec_file.c | 131 +++++++++++++++++++++++++-------------------
 1 file changed, 74 insertions(+), 57 deletions(-)

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index eb62a9794242..2bfbb2d144e6 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -882,6 +882,60 @@ out_free_sha_regions:
 }
 
 #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
+/*
+ * kexec_purgatory_find_symbol - find a symbol in the purgatory
+ * @pi:		Purgatory to search in.
+ * @name:	Name of the symbol.
+ *
+ * Return: pointer to symbol in read-only symtab on success, NULL on error.
+ */
+static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+						  const char *name)
+{
+	const Elf_Shdr *sechdrs;
+	const Elf_Ehdr *ehdr;
+	const Elf_Sym *syms;
+	const char *strtab;
+	int i, k;
+
+	if (!pi->ehdr)
+		return NULL;
+
+	ehdr = pi->ehdr;
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_SYMTAB)
+			continue;
+
+		if (sechdrs[i].sh_link >= ehdr->e_shnum)
+			/* Invalid strtab section number */
+			continue;
+		strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset;
+		syms = (void *)ehdr + sechdrs[i].sh_offset;
+
+		/* Go through symbols for a match */
+		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+				continue;
+
+			if (strcmp(strtab + syms[k].st_name, name) != 0)
+				continue;
+
+			if (syms[k].st_shndx == SHN_UNDEF ||
+			    syms[k].st_shndx >= ehdr->e_shnum) {
+				pr_debug("Symbol: %s has bad section index %d.\n",
+					name, syms[k].st_shndx);
+				return NULL;
+			}
+
+			/* Found the symbol we are looking for */
+			return &syms[k];
+		}
+	}
+
+	return NULL;
+}
 /*
  * kexec_purgatory_setup_kbuf - prepare buffer to load purgatory.
  * @pi:		Purgatory to be loaded.
@@ -960,6 +1014,10 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
 	unsigned long offset;
 	size_t sechdrs_size;
 	Elf_Shdr *sechdrs;
+	const Elf_Sym *entry_sym;
+	u16 entry_shndx = 0;
+	unsigned long entry_off = 0;
+	bool start_fixed = false;
 	int i;
 
 	/*
@@ -977,6 +1035,12 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
 	bss_addr = kbuf->mem + kbuf->bufsz;
 	kbuf->image->start = pi->ehdr->e_entry;
 
+	entry_sym = kexec_purgatory_find_symbol(pi, "purgatory_start");
+	if (entry_sym) {
+		entry_shndx = entry_sym->st_shndx;
+		entry_off = entry_sym->st_value;
+	}
+
 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
 		unsigned long align;
 		void *src, *dst;
@@ -994,6 +1058,13 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
 
 		offset = ALIGN(offset, align);
 
+		if (!start_fixed && entry_sym && i == entry_shndx &&
+		    (sechdrs[i].sh_flags & SHF_EXECINSTR) &&
+		    entry_off < sechdrs[i].sh_size) {
+			kbuf->image->start = kbuf->mem + offset + entry_off;
+			start_fixed = true;
+		}
+
 		/*
 		 * Check if the segment contains the entry point, if so,
 		 * calculate the value of image->start based on it.
@@ -1004,13 +1075,14 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
 		 * is not set to the initial value, and warn the user so they
 		 * have a chance to fix their purgatory's linker script.
 		 */
-		if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
+		if (!start_fixed && sechdrs[i].sh_flags & SHF_EXECINSTR &&
 		    pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
 		    pi->ehdr->e_entry < (sechdrs[i].sh_addr
 					 + sechdrs[i].sh_size) &&
-		    !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
+		    kbuf->image->start == pi->ehdr->e_entry) {
 			kbuf->image->start -= sechdrs[i].sh_addr;
 			kbuf->image->start += kbuf->mem + offset;
+			start_fixed = true;
 		}
 
 		src = (void *)pi->ehdr + sechdrs[i].sh_offset;
@@ -1128,61 +1200,6 @@ out_free_kbuf:
 	return ret;
 }
 
-/*
- * kexec_purgatory_find_symbol - find a symbol in the purgatory
- * @pi:		Purgatory to search in.
- * @name:	Name of the symbol.
- *
- * Return: pointer to symbol in read-only symtab on success, NULL on error.
- */
-static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
-						  const char *name)
-{
-	const Elf_Shdr *sechdrs;
-	const Elf_Ehdr *ehdr;
-	const Elf_Sym *syms;
-	const char *strtab;
-	int i, k;
-
-	if (!pi->ehdr)
-		return NULL;
-
-	ehdr = pi->ehdr;
-	sechdrs = (void *)ehdr + ehdr->e_shoff;
-
-	for (i = 0; i < ehdr->e_shnum; i++) {
-		if (sechdrs[i].sh_type != SHT_SYMTAB)
-			continue;
-
-		if (sechdrs[i].sh_link >= ehdr->e_shnum)
-			/* Invalid strtab section number */
-			continue;
-		strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset;
-		syms = (void *)ehdr + sechdrs[i].sh_offset;
-
-		/* Go through symbols for a match */
-		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
-			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
-				continue;
-
-			if (strcmp(strtab + syms[k].st_name, name) != 0)
-				continue;
-
-			if (syms[k].st_shndx == SHN_UNDEF ||
-			    syms[k].st_shndx >= ehdr->e_shnum) {
-				pr_debug("Symbol: %s has bad section index %d.\n",
-						name, syms[k].st_shndx);
-				return NULL;
-			}
-
-			/* Found the symbol we are looking for */
-			return &syms[k];
-		}
-	}
-
-	return NULL;
-}
-
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
 {
 	struct purgatory_info *pi = &image->purgatory_info;

From 8924336531e21b187d724b5fdf5277269c9ec22c Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Thu, 22 Jan 2026 15:13:03 +0100
Subject: [PATCH 084/107] ipc: don't audit capability check in
 ipc_permissions()

The IPC sysctls implement the ctl_table_root::permissions hook and they
override the file access mode based on the CAP_CHECKPOINT_RESTORE
capability, which is being checked regardless of whether any access is
actually denied or not, so if an LSM denies the capability, an audit
record may be logged even when access is in fact granted.

It wouldn't be viable to restructure the sysctl permission logic to only
check the capability when the access would be actually denied if it's not
granted.  Thus, do the same as in net_ctl_permissions() (net/sysctl_net.c)
- switch from ns_capable() to ns_capable_noaudit(), so that the check
never emits an audit record.

Link: https://lkml.kernel.org/r/20260122141303.241133-1-omosnace@redhat.com
Fixes: 0889f44e2810 ("ipc: Check permissions for checkpoint_restart sysctls at open time")
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Acked-by: Alexey Gladkov <legion@kernel.org>
Acked-by: Serge Hallyn <serge@hallyn.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/capability.h | 6 ++++++
 ipc/ipc_sysctl.c           | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 1fb08922552c..37db92b3d6f8 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -203,6 +203,12 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
 		ns_capable(ns, CAP_SYS_ADMIN);
 }
 
+static inline bool checkpoint_restore_ns_capable_noaudit(struct user_namespace *ns)
+{
+	return ns_capable_noaudit(ns, CAP_CHECKPOINT_RESTORE) ||
+		ns_capable_noaudit(ns, CAP_SYS_ADMIN);
+}
+
 /* audit system wants to get cap info from files as well */
 int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
 			   const struct dentry *dentry,
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 15b17e86e198..9b087ebeb643 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -214,7 +214,7 @@ static int ipc_permissions(struct ctl_table_header *head, const struct ctl_table
 	if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
 	     (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
 	     (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
-	    checkpoint_restore_ns_capable(ns->user_ns))
+	    checkpoint_restore_ns_capable_noaudit(ns->user_ns))
 		mode = 0666;
 	else
 #endif

From 0895a000e4fff9e950a7894210db45973e485c35 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Thu, 22 Jan 2026 15:07:45 +0100
Subject: [PATCH 085/107] ucount: check for CAP_SYS_RESOURCE using
 ns_capable_noaudit()

The user.* sysctls implement the ctl_table_root::permissions hook and they
override the file access mode based on the CAP_SYS_RESOURCE capability (at
most rwx if capable, at most r-- if not).  The capability is being checked
unconditionally, so if an LSM denies the capability, an audit record may
be logged even when access is in fact granted.

Given the logic in the set_permissions() function in kernel/ucount.c and
the unfortunate way the permission checking is implemented, it doesn't
seem viable to avoid false positive denials by deferring the capability
check.  Thus, do the same as in net_ctl_permissions() (net/sysctl_net.c) -
switch from ns_capable() to ns_capable_noaudit(), so that the check never
logs an audit record.

Link: https://lkml.kernel.org/r/20260122140745.239428-1-omosnace@redhat.com
Fixes: dbec28460a89 ("userns: Add per user namespace sysctls.")
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Paul Moore <paul@paul-moore.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Alexey Gladkov <legion@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/ucount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index 586af49fc03e..fc4a8f2d3096 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -47,7 +47,7 @@ static int set_permissions(struct ctl_table_header *head,
 	int mode;
 
 	/* Allow users with CAP_SYS_RESOURCE unrestrained access */
-	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+	if (ns_capable_noaudit(user_ns, CAP_SYS_RESOURCE))
 		mode = (table->mode & S_IRWXU) >> 6;
 	else
 	/* Allow all others at most read-only access */

From b50634c5e84a7a57c20b03e367a43f1b63b7ea23 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Thu, 22 Jan 2026 14:17:57 +0200
Subject: [PATCH 086/107] kho: cleanup error handling in kho_populate()

* use dedicated labels for error handling instead of checking if a pointer
  is not null to decide if it should be unmapped
* drop assignment of values to err that are only used to print a numeric
  error code, there are pr_warn()s for each failure already so printing a
  numeric error code in the next line does not add anything useful

Link: https://lkml.kernel.org/r/20260122121757.575987-1-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/kexec_handover.c | 39 +++++++++++++-----------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index fbfa5a04faed..e0a50b012ba3 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1455,46 +1455,40 @@ void __init kho_memory_init(void)
 void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 			 phys_addr_t scratch_phys, u64 scratch_len)
 {
+	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
 	struct kho_scratch *scratch = NULL;
 	phys_addr_t mem_map_phys;
 	void *fdt = NULL;
-	int err = 0;
-	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
+	int err;
 
 	/* Validate the input FDT */
 	fdt = early_memremap(fdt_phys, fdt_len);
 	if (!fdt) {
 		pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys);
-		err = -EFAULT;
-		goto out;
+		goto err_report;
 	}
 	err = fdt_check_header(fdt);
 	if (err) {
 		pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n",
 			fdt_phys, err);
-		err = -EINVAL;
-		goto out;
+		goto err_unmap_fdt;
 	}
 	err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE);
 	if (err) {
 		pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n",
 			fdt_phys, KHO_FDT_COMPATIBLE, err);
-		err = -EINVAL;
-		goto out;
+		goto err_unmap_fdt;
 	}
 
 	mem_map_phys = kho_get_mem_map_phys(fdt);
-	if (!mem_map_phys) {
-		err = -ENOENT;
-		goto out;
-	}
+	if (!mem_map_phys)
+		goto err_unmap_fdt;
 
 	scratch = early_memremap(scratch_phys, scratch_len);
 	if (!scratch) {
 		pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
 			scratch_phys, scratch_len);
-		err = -EFAULT;
-		goto out;
+		goto err_unmap_fdt;
 	}
 
 	/*
@@ -1511,7 +1505,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 		if (WARN_ON(err)) {
 			pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe",
 				&area->addr, &size, ERR_PTR(err));
-			goto out;
+			goto err_unmap_scratch;
 		}
 		pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
 	}
@@ -1533,13 +1527,14 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 	kho_scratch_cnt = scratch_cnt;
 	pr_info("found kexec handover data.\n");
 
-out:
-	if (fdt)
-		early_memunmap(fdt, fdt_len);
-	if (scratch)
-		early_memunmap(scratch, scratch_len);
-	if (err)
-		pr_warn("disabling KHO revival: %d\n", err);
+	return;
+
+err_unmap_scratch:
+	early_memunmap(scratch, scratch_len);
+err_unmap_fdt:
+	early_memunmap(fdt, fdt_len);
+err_report:
+	pr_warn("disabling KHO revival\n");
 }
 
 /* Helper functions for kexec_file_load */

From 96a54b8ffc8c4567c32fe0b6996669f1132b026d Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 26 Jan 2026 12:20:46 +0100
Subject: [PATCH 087/107] crash_dump: fix dm_crypt keys locking and ref leak

crash_load_dm_crypt_keys() reads dm-crypt volume keys from the user
keyring.  It uses user_key_payload_locked() without holding key->sem,
which makes lockdep complain when kexec_file_load() assembles the crash
image:

  =============================
  WARNING: suspicious RCU usage
  -----------------------------
  ./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 1
  no locks held by kexec/4875.

  stack backtrace:
  Call Trace:
   <TASK>
   dump_stack_lvl+0x5d/0x80
   lockdep_rcu_suspicious.cold+0x4e/0x96
   crash_load_dm_crypt_keys+0x314/0x390
   bzImage64_load+0x116/0x9a0
   ? __lock_acquire+0x464/0x1ba0
   __do_sys_kexec_file_load+0x26a/0x4f0
   do_syscall_64+0xbd/0x430
   entry_SYSCALL_64_after_hwframe+0x77/0x7f

In addition, the key returned by request_key() is never key_put()'d,
leaking a key reference on each load attempt.

Take key->sem while copying the payload and drop the key reference
afterwards.

Link: https://lkml.kernel.org/r/patch.git-2d4d76083a5c.your-ad-here.call-01769426386-ext-2560@work.hours
Fixes: 479e58549b0f ("crash_dump: store dm crypt keys in kdump reserved memory")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Coiby Xu <coxu@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/crash_dump_dm_crypt.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c
index 0d23dc1de67c..37129243054d 100644
--- a/kernel/crash_dump_dm_crypt.c
+++ b/kernel/crash_dump_dm_crypt.c
@@ -143,6 +143,7 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key)
 {
 	const struct user_key_payload *ukp;
 	struct key *key;
+	int ret = 0;
 
 	kexec_dprintk("Requesting logon key %s", dm_key->key_desc);
 	key = request_key(&key_type_logon, dm_key->key_desc, NULL);
@@ -152,20 +153,28 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key)
 		return PTR_ERR(key);
 	}
 
+	down_read(&key->sem);
 	ukp = user_key_payload_locked(key);
-	if (!ukp)
-		return -EKEYREVOKED;
+	if (!ukp) {
+		ret = -EKEYREVOKED;
+		goto out;
+	}
 
 	if (ukp->datalen > KEY_SIZE_MAX) {
 		pr_err("Key size %u exceeds maximum (%u)\n", ukp->datalen, KEY_SIZE_MAX);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	memcpy(dm_key->data, ukp->data, ukp->datalen);
 	dm_key->key_size = ukp->datalen;
 	kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size,
 		      dm_key->key_desc, dm_key->data);
-	return 0;
+
+out:
+	up_read(&key->sem);
+	key_put(key);
+	return ret;
 }
 
 struct config_key {

From 427b2535f51342de3156babc6bdc3f3b7dd2c707 Mon Sep 17 00:00:00 2001
From: Evangelos Petrongonas <epetron@amazon.de>
Date: Tue, 20 Jan 2026 17:59:11 +0000
Subject: [PATCH 088/107] kho: skip memoryless NUMA nodes when reserving
 scratch areas

kho_reserve_scratch() iterates over all online NUMA nodes to allocate
per-node scratch memory.  On systems with memoryless NUMA nodes (nodes
that have CPUs but no memory), memblock_alloc_range_nid() fails because
there is no memory available on that node.  This causes KHO initialization
to fail and kho_enable to be set to false.

Some ARM64 systems have NUMA topologies where certain nodes contain only
CPUs without any associated memory.  These configurations are valid and
should not prevent KHO from functioning.

Fix this by only counting nodes that have memory (N_MEMORY state) and skip
memoryless nodes in the per-node scratch allocation loop.

Link: https://lkml.kernel.org/r/20260120175913.34368-1-epetron@amazon.de
Fixes: 3dc92c311498 ("kexec: add Kexec HandOver (KHO) generation helpers").
Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/kexec_handover.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index e0a50b012ba3..8a2b2a7e50fc 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -655,7 +655,7 @@ static void __init kho_reserve_scratch(void)
 	scratch_size_update();
 
 	/* FIXME: deal with node hot-plug/remove */
-	kho_scratch_cnt = num_online_nodes() + 2;
+	kho_scratch_cnt = nodes_weight(node_states[N_MEMORY]) + 2;
 	size = kho_scratch_cnt * sizeof(*kho_scratch);
 	kho_scratch = memblock_alloc(size, PAGE_SIZE);
 	if (!kho_scratch) {
@@ -691,7 +691,11 @@ static void __init kho_reserve_scratch(void)
 	kho_scratch[i].size = size;
 	i++;
 
-	for_each_online_node(nid) {
+	/*
+	 * Loop over nodes that have both memory and are online. Skip
+	 * memoryless nodes, as we can not allocate scratch areas there.
+	 */
+	for_each_node_state(nid, N_MEMORY) {
 		size = scratch_size_node(nid);
 		addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES,
 						0, MEMBLOCK_ALLOC_ACCESSIBLE,

From 33caa19f4b318378bf54692b30724f442c981dad Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:07:11 +0100
Subject: [PATCH 089/107] android/binder: don't abuse current->group_leader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "don't abuse task_struct.group_leader", v2.

This series removes the usage of ->group_leader when it is "obviously
unnecessary".

I am going to move ->group_leader from task_struct to signal_struct or at
least add the new task_group_leader() helper.  So I will send more
tree-wide changes on top of this series.


This patch (of 7):

Cleanup and preparation to simplify the next changes.

- Use current->tgid instead of current->group_leader->pid

- Use the value returned by get_task_struct() to initialize proc->tsk

Link: https://lkml.kernel.org/r/aXY_h8i78n6yD9JY@redhat.com
Link: https://lkml.kernel.org/r/aXY_ryGDwdygl1Tv@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/android/binder.c       | 7 +++----
 drivers/android/binder_alloc.c | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 535fc881c8da..dea701daabb0 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -6046,7 +6046,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	bool existing_pid = false;
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
-		     current->group_leader->pid, current->pid);
+		     current->tgid, current->pid);
 
 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 	if (proc == NULL)
@@ -6055,8 +6055,8 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	dbitmap_init(&proc->dmap);
 	spin_lock_init(&proc->inner_lock);
 	spin_lock_init(&proc->outer_lock);
-	get_task_struct(current->group_leader);
-	proc->tsk = current->group_leader;
+	proc->tsk = get_task_struct(current->group_leader);
+	proc->pid = current->tgid;
 	proc->cred = get_cred(filp->f_cred);
 	INIT_LIST_HEAD(&proc->todo);
 	init_waitqueue_head(&proc->freeze_wait);
@@ -6075,7 +6075,6 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	binder_alloc_init(&proc->alloc);
 
 	binder_stats_created(BINDER_STAT_PROC);
-	proc->pid = current->group_leader->pid;
 	INIT_LIST_HEAD(&proc->delivered_death);
 	INIT_LIST_HEAD(&proc->delivered_freeze);
 	INIT_LIST_HEAD(&proc->waiting_threads);
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 979c96b74cad..145ed5f14cdb 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1233,7 +1233,7 @@ static struct shrinker *binder_shrinker;
 VISIBLE_IF_KUNIT void __binder_alloc_init(struct binder_alloc *alloc,
 					  struct list_lru *freelist)
 {
-	alloc->pid = current->group_leader->pid;
+	alloc->pid = current->tgid;
 	alloc->mm = current->mm;
 	mmgrab(alloc->mm);
 	mutex_init(&alloc->mutex);

From a170919d1b670f531f31192bef4dff08be636a7c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:07:20 +0100
Subject: [PATCH 090/107] android/binder: use same_thread_group(proc->tsk,
 current) in binder_mmap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With or without this change the checked condition can be falsely true if
proc->tsk execs, but this is fine: binder_alloc_mmap_handler() checks
vma->vm_mm == alloc->mm.

Link: https://lkml.kernel.org/r/aXY_uPYyUg4rwNOg@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/android/binder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index dea701daabb0..b3b73303f84d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -6015,7 +6015,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct binder_proc *proc = filp->private_data;
 
-	if (proc->tsk != current->group_leader)
+	if (!same_thread_group(proc->tsk, current))
 		return -EINVAL;
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE,

From 7d08e0916a59e006c262dcd2f7168d0336c80265 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:07:28 +0100
Subject: [PATCH 091/107] drm/amdgpu: don't abuse current->group_leader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup and preparation to simplify the next changes.

- Use current->tgid instead of current->group_leader->pid

- Use get_task_pid(current, PIDTYPE_TGID) instead of
  get_task_pid(current->group_leader, PIDTYPE_PID)

Link: https://lkml.kernel.org/r/aXY_wKewzV5lCa5I@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b1c24c8fa686..df22b54ba346 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1421,7 +1421,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 			goto create_evict_fence_fail;
 		}
 
-		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+		info->pid = get_task_pid(current, PIDTYPE_TGID);
 		INIT_DELAYED_WORK(&info->restore_userptr_work,
 				  amdgpu_amdkfd_restore_userptr_worker);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a67285118c37..a0f8ba382b9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2554,7 +2554,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 	if (current->group_leader->mm != current->mm)
 		return;
 
-	vm->task_info->tgid = current->group_leader->pid;
+	vm->task_info->tgid = current->tgid;
 	get_task_comm(vm->task_info->process_name, current->group_leader);
 }
 

From a87da7a9fa7b5c39b01d7fa30415c9211e775f2e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:07:36 +0100
Subject: [PATCH 092/107] drm/amd: kill the outdated "Only the pthreads
 threading model is supported" checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nowadays task->group_leader->mm != task->mm is only possible if a) task is
not a group leader and b) task->group_leader->mm == NULL because
task->group_leader has already exited using sys_exit().

I don't think that drm/amd tries to detect/nack this case.

Link: https://lkml.kernel.org/r/aXY_yLVHd63UlWtm@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Christan König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  3 ---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ----------
 2 files changed, 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a0f8ba382b9e..e44f158a11f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2551,9 +2551,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 	vm->task_info->task.pid = current->pid;
 	get_task_comm(vm->task_info->task.comm, current);
 
-	if (current->group_leader->mm != current->mm)
-		return;
-
 	vm->task_info->tgid = current->tgid;
 	get_task_comm(vm->task_info->process_name, current->group_leader);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a085faac9fe1..f8ef18a3aa71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -833,12 +833,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
 	if (!(thread->mm && mmget_not_zero(thread->mm)))
 		return ERR_PTR(-EINVAL);
 
-	/* Only the pthreads threading model is supported. */
-	if (thread->group_leader->mm != thread->mm) {
-		mmput(thread->mm);
-		return ERR_PTR(-EINVAL);
-	}
-
 	/* If the process just called exec(3), it is possible that the
 	 * cleanup of the kfd_process (following the release of the mm
 	 * of the old process image) is still in the cleanup work queue.
@@ -918,10 +912,6 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
 	if (!thread->mm)
 		return ERR_PTR(-EINVAL);
 
-	/* Only the pthreads threading model is supported. */
-	if (thread->group_leader->mm != thread->mm)
-		return ERR_PTR(-EINVAL);
-
 	process = find_process(thread, false);
 	if (!process)
 		return ERR_PTR(-EINVAL);

From 05f8f36d0b836006a1f7a7d233789c8c80ea89df Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:07:44 +0100
Subject: [PATCH 093/107] drm/pan*: don't abuse current->group_leader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup and preparation to simplify the next changes.

Use current->tgid instead of current->group_leader->pid.

Link: https://lkml.kernel.org/r/aXY_0MrQBZWKbbmA@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/panfrost/panfrost_gem.c | 2 +-
 drivers/gpu/drm/panthor/panthor_gem.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 8041b65c6609..1ff1f2c8b726 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -17,7 +17,7 @@
 static void panfrost_gem_debugfs_bo_add(struct panfrost_device *pfdev,
 					struct panfrost_gem_object *bo)
 {
-	bo->debugfs.creator.tgid = current->group_leader->pid;
+	bo->debugfs.creator.tgid = current->tgid;
 	get_task_comm(bo->debugfs.creator.process_name, current->group_leader);
 
 	mutex_lock(&pfdev->debugfs.gems_lock);
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index fbde78db270a..29cc57efc4b9 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -27,7 +27,7 @@ static void panthor_gem_debugfs_bo_add(struct panthor_gem_object *bo)
 	struct panthor_device *ptdev = container_of(bo->base.base.dev,
 						    struct panthor_device, base);
 
-	bo->debugfs.creator.tgid = current->group_leader->pid;
+	bo->debugfs.creator.tgid = current->tgid;
 	get_task_comm(bo->debugfs.creator.process_name, current->group_leader);
 
 	mutex_lock(&ptdev->gems.lock);

From 6fd390e2bccfd82e6e2932acb21299938f8981bb Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:07:52 +0100
Subject: [PATCH 094/107] RDMA/umem: don't abuse current->group_leader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup and preparation to simplify the next changes.

Use current->tgid instead of current->group_leader->pid.

Link: https://lkml.kernel.org/r/aXY_2JIhCeGAYC0r@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Leon Romanovsky <leon@kernel.org>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/infiniband/core/umem_odp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 572a91a62a7b..32267258a19c 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -149,7 +149,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
 	umem->owning_mm = current->mm;
 	umem_odp->page_shift = PAGE_SHIFT;
 
-	umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	umem_odp->tgid = get_task_pid(current, PIDTYPE_TGID);
 	ib_init_umem_implicit_odp(umem_odp);
 	return umem_odp;
 }
@@ -258,7 +258,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
 		umem_odp->page_shift = HPAGE_SHIFT;
 #endif
 
-	umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	umem_odp->tgid = get_task_pid(current, PIDTYPE_TGID);
 	ret = ib_init_umem_odp(umem_odp, ops);
 	if (ret)
 		goto err_put_pid;

From f3951e93d4fe9cc85128dc38915877ff6ef633db Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 25 Jan 2026 17:08:00 +0100
Subject: [PATCH 095/107] netclassid: use thread_group_leader(p) in
 update_classid_task()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup and preparation to simplify planned future changes.

Link: https://lkml.kernel.org/r/aXY_4NSP094-Cf-2@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Christan König <christian.koenig@amd.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 net/core/netclassid_cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index dff66d8fb325..db9a5354f9de 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -93,7 +93,7 @@ static void update_classid_task(struct task_struct *p, u32 classid)
 	/* Only update the leader task, when many threads in this task,
 	 * so it can avoid the useless traversal.
 	 */
-	if (p != p->group_leader)
+	if (!thread_group_leader(p))
 		return;
 
 	do {

From 2e171ab29f916455a49274a2042bac4a4b35570e Mon Sep 17 00:00:00 2001
From: Pnina Feder <pnina.feder@mobileye.com>
Date: Thu, 22 Jan 2026 12:24:57 +0200
Subject: [PATCH 096/107] panic: add panic_force_cpu= parameter to redirect
 panic to a specific CPU

Some platforms require panic handling to execute on a specific CPU for
crash dump to work reliably.  This can be due to firmware limitations,
interrupt routing constraints, or platform-specific requirements where
only a single CPU is able to safely enter the crash kernel.

Add the panic_force_cpu= kernel command-line parameter to redirect panic
execution to a designated CPU.  When the parameter is provided, the CPU
that initially triggers panic forwards the panic context to the target CPU
via IPI, which then proceeds with the normal panic and kexec flow.

The IPI delivery is implemented as a weak function
(panic_smp_redirect_cpu) so architectures with NMI support can override it
for more reliable delivery.

If the specified CPU is invalid, offline, or a panic is already in
progress on another CPU, the redirection is skipped and panic continues on
the current CPU.

[pnina.feder@mobileye.com: fix unused variable warning]
  Link: https://lkml.kernel.org/r/20260126122618.2967950-1-pnina.feder@mobileye.com
Link: https://lkml.kernel.org/r/20260122102457.1154599-1-pnina.feder@mobileye.com
Signed-off-by: Pnina Feder <pnina.feder@mobileye.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .../admin-guide/kernel-parameters.txt         |  15 ++
 include/linux/panic.h                         |   8 +
 include/linux/smp.h                           |   1 +
 kernel/panic.c                                | 164 +++++++++++++++++-
 4 files changed, 186 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 73d846211144..97161861781c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4788,6 +4788,21 @@ Kernel parameters
 	panic_on_warn=1	panic() instead of WARN().  Useful to cause kdump
 			on a WARN().
 
+	panic_force_cpu=
+			[KNL,SMP] Force panic handling to execute on a specific CPU.
+			Format: <cpu number>
+			Some platforms require panic handling to occur on a
+			specific CPU for the crash kernel to function correctly.
+			This can be due to firmware limitations, interrupt routing
+			constraints, or platform-specific requirements where only
+			a particular CPU can safely enter the crash kernel.
+			When set, panic() will redirect execution to the specified
+			CPU before proceeding with the normal panic and kexec flow.
+			If the target CPU is offline or unavailable, panic proceeds
+			on the current CPU.
+			This option should only be used for systems with the above
+			constraints as it might cause the panic operation to be less reliable.
+
 	panic_print=	Bitmask for printing system info when panic happens.
 			User can chose combination of the following bits:
 			bit 0: print all tasks info
diff --git a/include/linux/panic.h b/include/linux/panic.h
index a00bc0937698..f1dd417e54b2 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -41,6 +41,14 @@ void abort(void);
  * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
  */
 extern atomic_t panic_cpu;
+
+/*
+ * panic_redirect_cpu is used when panic is redirected to a specific CPU via
+ * the panic_force_cpu= boot parameter. It holds the CPU number that originally
+ * triggered the panic before redirection. A value of PANIC_CPU_INVALID means
+ * no redirection has occurred.
+ */
+extern atomic_t panic_redirect_cpu;
 #define PANIC_CPU_INVALID	-1
 
 bool panic_try_start(void);
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 91d0ecf3b8d3..1ebd88026119 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -62,6 +62,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd);
 void __noreturn panic_smp_self_stop(void);
 void __noreturn nmi_panic_self_stop(struct pt_regs *regs);
 void crash_smp_send_stop(void);
+int panic_smp_redirect_cpu(int target_cpu, void *msg);
 
 /*
  * Call a function on all processors
diff --git a/kernel/panic.c b/kernel/panic.c
index 0c20fcaae98a..c78600212b6c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -42,6 +42,7 @@
 
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
+#define PANIC_MSG_BUFSZ 1024
 
 #ifdef CONFIG_SMP
 /*
@@ -74,6 +75,8 @@ EXPORT_SYMBOL_GPL(panic_timeout);
 
 unsigned long panic_print;
 
+static int panic_force_cpu = -1;
+
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 
 EXPORT_SYMBOL(panic_notifier_list);
@@ -300,6 +303,150 @@ void __weak crash_smp_send_stop(void)
 }
 
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+atomic_t panic_redirect_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_CRASH_DUMP)
+static char *panic_force_buf;
+
+static int __init panic_force_cpu_setup(char *str)
+{
+	int cpu;
+
+	if (!str)
+		return -EINVAL;
+
+	if (kstrtoint(str, 0, &cpu) || cpu < 0 || cpu >= nr_cpu_ids) {
+		pr_warn("panic_force_cpu: invalid value '%s'\n", str);
+		return -EINVAL;
+	}
+
+	panic_force_cpu = cpu;
+	return 0;
+}
+early_param("panic_force_cpu", panic_force_cpu_setup);
+
+static int __init panic_force_cpu_late_init(void)
+{
+	if (panic_force_cpu < 0)
+		return 0;
+
+	panic_force_buf = kmalloc(PANIC_MSG_BUFSZ, GFP_KERNEL);
+
+	return 0;
+}
+late_initcall(panic_force_cpu_late_init);
+
+static void do_panic_on_target_cpu(void *info)
+{
+	panic("%s", (char *)info);
+}
+
+/**
+ * panic_smp_redirect_cpu - Redirect panic to target CPU
+ * @target_cpu: CPU that should handle the panic
+ * @msg: formatted panic message
+ *
+ * Default implementation uses IPI. Architectures with NMI support
+ * can override this for more reliable delivery.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+int __weak panic_smp_redirect_cpu(int target_cpu, void *msg)
+{
+	static call_single_data_t panic_csd;
+
+	panic_csd.func = do_panic_on_target_cpu;
+	panic_csd.info = msg;
+
+	return smp_call_function_single_async(target_cpu, &panic_csd);
+}
+
+/**
+ * panic_try_force_cpu - Redirect panic to a specific CPU for crash kernel
+ * @fmt: panic message format string
+ * @args: arguments for format string
+ *
+ * Some platforms require panic handling to occur on a specific CPU
+ * for the crash kernel to function correctly. This function redirects
+ * panic handling to the CPU specified via the panic_force_cpu= boot parameter.
+ *
+ * Returns false if panic should proceed on current CPU.
+ * Returns true if panic was redirected.
+ */
+__printf(1, 0)
+static bool panic_try_force_cpu(const char *fmt, va_list args)
+{
+	int this_cpu = raw_smp_processor_id();
+	int old_cpu = PANIC_CPU_INVALID;
+	const char *msg;
+
+	/* Feature not enabled via boot parameter */
+	if (panic_force_cpu < 0)
+		return false;
+
+	/* Already on target CPU - proceed normally */
+	if (this_cpu == panic_force_cpu)
+		return false;
+
+	/* Target CPU is offline, can't redirect */
+	if (!cpu_online(panic_force_cpu)) {
+		pr_warn("panic: target CPU %d is offline, continuing on CPU %d\n",
+			panic_force_cpu, this_cpu);
+		return false;
+	}
+
+	/* Another panic already in progress */
+	if (panic_in_progress())
+		return false;
+
+	/*
+	 * Only one CPU can do the redirect. Use atomic cmpxchg to ensure
+	 * we don't race with another CPU also trying to redirect.
+	 */
+	if (!atomic_try_cmpxchg(&panic_redirect_cpu, &old_cpu, this_cpu))
+		return false;
+
+	/*
+	 * Use dynamically allocated buffer if available, otherwise
+	 * fall back to static message for early boot panics or allocation failure.
+	 */
+	if (panic_force_buf) {
+		vsnprintf(panic_force_buf, PANIC_MSG_BUFSZ, fmt, args);
+		msg = panic_force_buf;
+	} else {
+		msg = "Redirected panic (buffer unavailable)";
+	}
+
+	console_verbose();
+	bust_spinlocks(1);
+
+	pr_emerg("panic: Redirecting from CPU %d to CPU %d for crash kernel.\n",
+		 this_cpu, panic_force_cpu);
+
+	/* Dump original CPU before redirecting */
+	if (!test_taint(TAINT_DIE) &&
+	    oops_in_progress <= 1 &&
+	    IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
+		dump_stack();
+	}
+
+	if (panic_smp_redirect_cpu(panic_force_cpu, (void *)msg) != 0) {
+		atomic_set(&panic_redirect_cpu, PANIC_CPU_INVALID);
+		pr_warn("panic: failed to redirect to CPU %d, continuing on CPU %d\n",
+			panic_force_cpu, this_cpu);
+		return false;
+	}
+
+	/* IPI/NMI sent, this CPU should stop */
+	return true;
+}
+#else
+__printf(1, 0)
+static inline bool panic_try_force_cpu(const char *fmt, va_list args)
+{
+	return false;
+}
+#endif /* CONFIG_SMP && CONFIG_CRASH_DUMP */
 
 bool panic_try_start(void)
 {
@@ -428,7 +575,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
  */
 void vpanic(const char *fmt, va_list args)
 {
-	static char buf[1024];
+	static char buf[PANIC_MSG_BUFSZ];
 	long i, i_next = 0, len;
 	int state = 0;
 	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
@@ -452,6 +599,15 @@ void vpanic(const char *fmt, va_list args)
 	local_irq_disable();
 	preempt_disable_notrace();
 
+	/* Redirect panic to target CPU if configured via panic_force_cpu=. */
+	if (panic_try_force_cpu(fmt, args)) {
+		/*
+		 * Mark ourselves offline so panic_other_cpus_shutdown() won't wait
+		 * for us on architectures that check num_online_cpus().
+		 */
+		set_cpu_online(smp_processor_id(), false);
+		panic_smp_self_stop();
+	}
 	/*
 	 * It's possible to come here directly from a panic-assertion and
 	 * not have preempt disabled. Some functions called from here want
@@ -484,7 +640,11 @@ void vpanic(const char *fmt, va_list args)
 	/*
 	 * Avoid nested stack-dumping if a panic occurs during oops processing
 	 */
-	if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
+	if (atomic_read(&panic_redirect_cpu) != PANIC_CPU_INVALID &&
+	    panic_force_cpu == raw_smp_processor_id()) {
+		pr_emerg("panic: Redirected from CPU %d, skipping stack dump.\n",
+			 atomic_read(&panic_redirect_cpu));
+	} else if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
 		panic_this_cpu_backtrace_printed = true;
 	} else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
 		dump_stack();

From 90079798f1d748e97c74e23736491543577b8aee Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 2 Feb 2026 10:59:00 +0100
Subject: [PATCH 097/107] delayacct: fix uapi timespec64 definition

The custom definition of 'struct timespec64' is incompatible with both the
kernel's internal definition and the glibc type, at least on big-endian
targets that have the tv_nsec field in a different place, and the
definition clashes with any userspace that also defines a timespec64
structure.

Running the header check with -Wpadding enabled produces this output that
warns about the incorrect padding:

usr/include/linux/taskstats.h:25:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded]

Remove the hack and instead use the regular __kernel_timespec type that is
meant to be used in uapi definitions.

Link: https://lkml.kernel.org/r/20260202095906.1344100-1-arnd@kernel.org
Fixes: 29b63f6eff0e ("delayacct: add timestamp of delay max")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Jiang Kun <jiang.kun2@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/taskstats.h | 27 +++++++++------------------
 kernel/delayacct.c             |  6 ++++--
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index 1b31e8e14d2f..3ae25f3ce067 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -18,16 +18,7 @@
 #define _LINUX_TASKSTATS_H
 
 #include <linux/types.h>
-#ifdef __KERNEL__
-#include <linux/time64.h>
-#else
-#ifndef _LINUX_TIME64_H
-struct timespec64 {
-	__s64   tv_sec;         /* seconds */
-	long    tv_nsec;        /* nanoseconds */
-};
-#endif
-#endif
+#include <linux/time_types.h>
 
 /* Format for per-task data returned to userland when
  *	- a task exits
@@ -242,14 +233,14 @@ struct taskstats {
 	__u64	irq_delay_min;
 
 	/*v17: delay max timestamp record*/
-	struct timespec64 cpu_delay_max_ts;
-	struct timespec64 blkio_delay_max_ts;
-	struct timespec64 swapin_delay_max_ts;
-	struct timespec64 freepages_delay_max_ts;
-	struct timespec64 thrashing_delay_max_ts;
-	struct timespec64 compact_delay_max_ts;
-	struct timespec64 wpcopy_delay_max_ts;
-	struct timespec64 irq_delay_max_ts;
+	struct __kernel_timespec cpu_delay_max_ts;
+	struct __kernel_timespec blkio_delay_max_ts;
+	struct __kernel_timespec swapin_delay_max_ts;
+	struct __kernel_timespec freepages_delay_max_ts;
+	struct __kernel_timespec thrashing_delay_max_ts;
+	struct __kernel_timespec compact_delay_max_ts;
+	struct __kernel_timespec wpcopy_delay_max_ts;
+	struct __kernel_timespec irq_delay_max_ts;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index d58ffc63bcba..2e55c493c98b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -18,7 +18,8 @@
 do { \
 	d->type##_delay_max = tsk->delays->type##_delay_max; \
 	d->type##_delay_min = tsk->delays->type##_delay_min; \
-	d->type##_delay_max_ts = tsk->delays->type##_delay_max_ts; \
+	d->type##_delay_max_ts.tv_sec = tsk->delays->type##_delay_max_ts.tv_sec; \
+	d->type##_delay_max_ts.tv_nsec = tsk->delays->type##_delay_max_ts.tv_nsec; \
 	tmp = d->type##_delay_total + tsk->delays->type##_delay; \
 	d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
 	d->type##_count += tsk->delays->type##_count; \
@@ -175,7 +176,8 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 
 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
 	d->cpu_delay_min = tsk->sched_info.min_run_delay;
-	d->cpu_delay_max_ts = tsk->sched_info.max_run_delay_ts;
+	d->cpu_delay_max_ts.tv_sec = tsk->sched_info.max_run_delay_ts.tv_sec;
+	d->cpu_delay_max_ts.tv_nsec = tsk->sched_info.max_run_delay_ts.tv_nsec;
 	tmp = (s64)d->cpu_delay_total + t2;
 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
 	tmp = (s64)d->cpu_run_virtual_total + t3;

From 989b3c5af63ecb1cbaf1598fe3f79865538bc1ea Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 18 Dec 2025 10:57:48 -0500
Subject: [PATCH 098/107] list: add primitives for private list manipulations

Patch series "list private v2 & luo flb", v9.

This series introduces two connected infrastructure improvements: a new
API for handling private linked lists, and the "File-Lifecycle-Bound"
(FLB) mechanism for the Live Update Orchestrator.

1. Private List Primitives (patches 1-3)

   Recently, Linux introduced the ability to mark structure members as
   __private and access them via ACCESS_PRIVATE().  This enforces better
   encapsulation by ensuring internal details are only accessible by the
   owning subsystem.

   However, struct list_head is frequently used as an internal linkage
   mechanism within these private sections.  The standard macros in
   <linux/list.h> do not support ACCESS_PRIVATE() natively.  Consequently,
   subsystems using private lists are forced to implement ad-hoc
   workarounds or local iterator macros.

   This series adds <linux/list_private.h>, providing a set of
   primitives identical to those in <linux/list.h> but designed for
   private list heads.  It also includes a KUnit test suite to verify that
   the macros correctly handle pointer offsets and qualifiers.

2. This series adds FLB (patches 4-5) support to Live Update that also
   internally uses private lists.

   FLB allows global kernel state (such as IOMMU domains or HugeTLB
   state) to be preserved once, shared across multiple file descriptors,
   and restored when needed.  This is necessary for subsystems where
   multiple preserved file descriptors depend on a single, shared
   underlying resource.  Preserving this state for each individual file
   would be redundant and incorrect.

   FLB uses reference counting tied to the lifecycle of preserved
   files.  The state is preserved when the first file depending on it is
   preserved, and restored or cleaned up only when the last file is
   handled.


This patch (of 5):

Linux recently added an ability to add private members to structs (i.e.
__private) and access them via ACCESS_PRIVATE().  This ensures that those
members are only accessible by the subsystem which owns the struct type,
and not to the object owner.

However, struct list_head often needs to be placed into the private
section to be manipulated privately by the subsystem.

Add macros to support private list manipulations in
<linux/list_private.h>.

[akpm@linux-foundation.org: fix kerneldoc]
Link: https://lkml.kernel.org/r/20251218155752.3045808-1-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20251218155752.3045808-2-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Gow <davidgow@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/list.rst |   9 ++
 include/linux/list_private.h    | 256 ++++++++++++++++++++++++++++++++
 2 files changed, 265 insertions(+)
 create mode 100644 include/linux/list_private.h

diff --git a/Documentation/core-api/list.rst b/Documentation/core-api/list.rst
index 86873ce9adbf..241464ca0549 100644
--- a/Documentation/core-api/list.rst
+++ b/Documentation/core-api/list.rst
@@ -774,3 +774,12 @@ Full List API
 
 .. kernel-doc:: include/linux/list.h
    :internal:
+
+Private List API
+================
+
+.. kernel-doc:: include/linux/list_private.h
+   :doc: Private List Primitives
+
+.. kernel-doc:: include/linux/list_private.h
+   :internal:
diff --git a/include/linux/list_private.h b/include/linux/list_private.h
new file mode 100644
index 000000000000..19b01d16beda
--- /dev/null
+++ b/include/linux/list_private.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#ifndef _LINUX_LIST_PRIVATE_H
+#define _LINUX_LIST_PRIVATE_H
+
+/**
+ * DOC: Private List Primitives
+ *
+ * Provides a set of list primitives identical in function to those in
+ * ``<linux/list.h>``, but designed for cases where the embedded
+ * ``&struct list_head`` is private member.
+ */
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+
+#define __list_private_offset(type, member)					\
+	((size_t)(&ACCESS_PRIVATE(((type *)0), member)))
+
+/**
+ * list_private_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the identifier passed to ACCESS_PRIVATE.
+ */
+#define list_private_entry(ptr, type, member) ({				\
+	const struct list_head *__mptr = (ptr);					\
+	(type *)((char *)__mptr - __list_private_offset(type, member));		\
+})
+
+/**
+ * list_private_first_entry - get the first element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the identifier passed to ACCESS_PRIVATE.
+ */
+#define list_private_first_entry(ptr, type, member)				\
+	list_private_entry((ptr)->next, type, member)
+
+/**
+ * list_private_last_entry - get the last element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the identifier passed to ACCESS_PRIVATE.
+ */
+#define list_private_last_entry(ptr, type, member)				\
+	list_private_entry((ptr)->prev, type, member)
+
+/**
+ * list_private_next_entry - get the next element in list
+ * @pos:	the type * to cursor
+ * @member:	the name of the list_head within the struct.
+ */
+#define list_private_next_entry(pos, member)					\
+	list_private_entry(ACCESS_PRIVATE(pos, member).next, typeof(*(pos)), member)
+
+/**
+ * list_private_next_entry_circular - get the next element in list
+ * @pos:	the type * to cursor.
+ * @head:	the list head to take the element from.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Wraparound if pos is the last element (return the first element).
+ * Note, that list is expected to be not empty.
+ */
+#define list_private_next_entry_circular(pos, head, member)			\
+	(list_is_last(&ACCESS_PRIVATE(pos, member), head) ?			\
+	list_private_first_entry(head, typeof(*(pos)), member) :		\
+	list_private_next_entry(pos, member))
+
+/**
+ * list_private_prev_entry - get the prev element in list
+ * @pos:	the type * to cursor
+ * @member:	the name of the list_head within the struct.
+ */
+#define list_private_prev_entry(pos, member)					\
+	list_private_entry(ACCESS_PRIVATE(pos, member).prev, typeof(*(pos)), member)
+
+/**
+ * list_private_prev_entry_circular - get the prev element in list
+ * @pos:	the type * to cursor.
+ * @head:	the list head to take the element from.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Wraparound if pos is the first element (return the last element).
+ * Note, that list is expected to be not empty.
+ */
+#define list_private_prev_entry_circular(pos, head, member)			\
+	(list_is_first(&ACCESS_PRIVATE(pos, member), head) ?			\
+	list_private_last_entry(head, typeof(*(pos)), member) :			\
+	list_private_prev_entry(pos, member))
+
+/**
+ * list_private_entry_is_head - test if the entry points to the head of the list
+ * @pos:	the type * to cursor
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ */
+#define list_private_entry_is_head(pos, head, member)				\
+	list_is_head(&ACCESS_PRIVATE(pos, member), (head))
+
+/**
+ * list_private_for_each_entry - iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ */
+#define list_private_for_each_entry(pos, head, member)				\
+	for (pos = list_private_first_entry(head, typeof(*pos), member);	\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = list_private_next_entry(pos, member))
+
+/**
+ * list_private_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ */
+#define list_private_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_private_last_entry(head, typeof(*pos), member);		\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = list_private_prev_entry(pos, member))
+
+/**
+ * list_private_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_private_for_each_entry_continue(pos, head, member)			\
+	for (pos = list_private_next_entry(pos, member);			\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = list_private_next_entry(pos, member))
+
+/**
+ * list_private_for_each_entry_continue_reverse - iterate backwards from the given point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Start to iterate over list of given type backwards, continuing after
+ * the current position.
+ */
+#define list_private_for_each_entry_continue_reverse(pos, head, member)		\
+	for (pos = list_private_prev_entry(pos, member);			\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = list_private_prev_entry(pos, member))
+
+/**
+ * list_private_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_private_for_each_entry_from(pos, head, member)			\
+	for (; !list_private_entry_is_head(pos, head, member);			\
+	     pos = list_private_next_entry(pos, member))
+
+/**
+ * list_private_for_each_entry_from_reverse - iterate backwards over list of given type
+ *                                    from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Iterate backwards over list of given type, continuing from current position.
+ */
+#define list_private_for_each_entry_from_reverse(pos, head, member)		\
+	for (; !list_private_entry_is_head(pos, head, member);			\
+	     pos = list_private_prev_entry(pos, member))
+
+/**
+ * list_private_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ */
+#define list_private_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_private_first_entry(head, typeof(*pos), member),	\
+		n = list_private_next_entry(pos, member);			\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = n, n = list_private_next_entry(n, member))
+
+/**
+ * list_private_for_each_entry_safe_continue - continue list iteration safe against removal
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_private_for_each_entry_safe_continue(pos, n, head, member)		\
+	for (pos = list_private_next_entry(pos, member),			\
+		n = list_private_next_entry(pos, member);			\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = n, n = list_private_next_entry(n, member))
+
+/**
+ * list_private_for_each_entry_safe_from - iterate over list from current point safe against removal
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_private_for_each_entry_safe_from(pos, n, head, member)		\
+	for (n = list_private_next_entry(pos, member);				\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = n, n = list_private_next_entry(n, member))
+
+/**
+ * list_private_for_each_entry_safe_reverse - iterate backwards over list safe against removal
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_private_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_private_last_entry(head, typeof(*pos), member),		\
+		n = list_private_prev_entry(pos, member);			\
+	     !list_private_entry_is_head(pos, head, member);			\
+	     pos = n, n = list_private_prev_entry(n, member))
+
+/**
+ * list_private_safe_reset_next - reset a stale list_for_each_entry_safe loop
+ * @pos:	the loop cursor used in the list_for_each_entry_safe loop
+ * @n:		temporary storage used in list_for_each_entry_safe
+ * @member:	the name of the list_head within the struct.
+ *
+ * list_safe_reset_next is not safe to use in general if the list may be
+ * modified concurrently (eg. the lock is dropped in the loop body). An
+ * exception to this is if the cursor element (pos) is pinned in the list,
+ * and list_safe_reset_next is called after re-taking the lock and before
+ * completing the current iteration of the loop body.
+ */
+#define list_private_safe_reset_next(pos, n, member)				\
+	n = list_private_next_entry(pos, member)
+
+#endif /* _LINUX_LIST_PRIVATE_H */

From 66bd8501ceb4782b10dfa009085d9b3f4efecad6 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 18 Dec 2025 10:57:49 -0500
Subject: [PATCH 099/107] list: add kunit test for private list primitives

Add a KUnit test suite for the new private list primitives.

The test defines a struct with a __private list_head and exercises every
macro defined in <linux/list_private.h>.

This ensures that the macros correctly handle the ACCESS_PRIVATE()
abstraction and compile without warnings when acting on private members,
verifying that qualifiers are stripped and offsets are calculated
correctly.

Link: https://lkml.kernel.org/r/20251218155752.3045808-3-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: David Gow <davidgow@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/Kconfig.debug             | 14 +++++++
 lib/tests/Makefile            |  1 +
 lib/tests/list-private-test.c | 76 +++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)
 create mode 100644 lib/tests/list-private-test.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7eed3b197ca9..234b73f9baf7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2786,6 +2786,20 @@ config LIST_KUNIT_TEST
 
 	  If unsure, say N.
 
+config LIST_PRIVATE_KUNIT_TEST
+	tristate "KUnit Test for Kernel Private Linked-list structures" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the KUnit test for the private linked-list primitives
+	  defined in include/linux/list_private.h.
+
+	  These primitives allow manipulation of list_head members that are
+	  marked as private and require special accessors (ACCESS_PRIVATE)
+	  to strip qualifiers or handle encapsulation.
+
+	  If unsure, say N.
+
 config HASHTABLE_KUNIT_TEST
 	tristate "KUnit Test for Kernel Hashtable structures" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index ab3e74d0da9e..f740b0a26750 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_TEST_IOV_ITER) += kunit_iov_iter.o
 obj-$(CONFIG_IS_SIGNED_TYPE_KUNIT_TEST) += is_signed_type_kunit.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
+obj-$(CONFIG_LIST_PRIVATE_KUNIT_TEST) += list-private-test.o
 obj-$(CONFIG_KFIFO_KUNIT_TEST) += kfifo_kunit.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
diff --git a/lib/tests/list-private-test.c b/lib/tests/list-private-test.c
new file mode 100644
index 000000000000..3bd62939ae67
--- /dev/null
+++ b/lib/tests/list-private-test.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit compilation/smoke test for Private list primitives.
+ *
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#include <linux/list_private.h>
+#include <kunit/test.h>
+
+/*
+ * This forces compiler to warn if you access it directly, because list
+ * primitives expect (struct list_head *), not (volatile struct list_head *).
+ */
+#undef __private
+#define __private volatile
+
+/* Redefine ACCESS_PRIVATE for this test. */
+#undef ACCESS_PRIVATE
+#define ACCESS_PRIVATE(p, member) \
+	(*((struct list_head *)((unsigned long)&((p)->member))))
+
+struct list_test_struct {
+	int data;
+	struct list_head __private list;
+};
+
+static void list_private_compile_test(struct kunit *test)
+{
+	struct list_test_struct entry;
+	struct list_test_struct *pos, *n;
+	LIST_HEAD(head);
+
+	INIT_LIST_HEAD(&ACCESS_PRIVATE(&entry, list));
+	list_add(&ACCESS_PRIVATE(&entry, list), &head);
+	pos = &entry;
+
+	pos = list_private_entry(&ACCESS_PRIVATE(&entry, list), struct list_test_struct, list);
+	pos = list_private_first_entry(&head, struct list_test_struct, list);
+	pos = list_private_last_entry(&head, struct list_test_struct, list);
+	pos = list_private_next_entry(pos, list);
+	pos = list_private_prev_entry(pos, list);
+	pos = list_private_next_entry_circular(pos, &head, list);
+	pos = list_private_prev_entry_circular(pos, &head, list);
+
+	if (list_private_entry_is_head(pos, &head, list))
+		return;
+
+	list_private_for_each_entry(pos, &head, list) { }
+	list_private_for_each_entry_reverse(pos, &head, list) { }
+	list_private_for_each_entry_continue(pos, &head, list) { }
+	list_private_for_each_entry_continue_reverse(pos, &head, list) { }
+	list_private_for_each_entry_from(pos, &head, list) { }
+	list_private_for_each_entry_from_reverse(pos, &head, list) { }
+
+	list_private_for_each_entry_safe(pos, n, &head, list)
+		list_private_safe_reset_next(pos, n, list);
+	list_private_for_each_entry_safe_continue(pos, n, &head, list) { }
+	list_private_for_each_entry_safe_from(pos, n, &head, list) { }
+	list_private_for_each_entry_safe_reverse(pos, n, &head, list) { }
+}
+
+static struct kunit_case list_private_test_cases[] = {
+	KUNIT_CASE(list_private_compile_test),
+	{},
+};
+
+static struct kunit_suite list_private_test_module = {
+	.name = "list-private-kunit-test",
+	.test_cases = list_private_test_cases,
+};
+
+kunit_test_suite(list_private_test_module);
+
+MODULE_DESCRIPTION("KUnit compilation test for private list primitives");
+MODULE_LICENSE("GPL");

From 6845645eef81da64b916743e3f8d696ec1fb0a13 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 18 Dec 2025 10:57:50 -0500
Subject: [PATCH 100/107] liveupdate: luo_file: Use private list

Switch LUO to use the private list iterators.

Link: https://lkml.kernel.org/r/20251218155752.3045808-4-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Gow <davidgow@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/luo_file.c     | 7 ++++---
 kernel/liveupdate/luo_internal.h | 7 -------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index a32a777f6df8..1a8a1bb73a58 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -104,6 +104,7 @@
 #include <linux/io.h>
 #include <linux/kexec_handover.h>
 #include <linux/kho/abi/luo.h>
+#include <linux/list_private.h>
 #include <linux/liveupdate.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
@@ -273,7 +274,7 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
 		goto  err_fput;
 
 	err = -ENOENT;
-	luo_list_for_each_private(fh, &luo_file_handler_list, list) {
+	list_private_for_each_entry(fh, &luo_file_handler_list, list) {
 		if (fh->ops->can_preserve(fh, file)) {
 			err = 0;
 			break;
@@ -760,7 +761,7 @@ int luo_file_deserialize(struct luo_file_set *file_set,
 		bool handler_found = false;
 		struct luo_file *luo_file;
 
-		luo_list_for_each_private(fh, &luo_file_handler_list, list) {
+		list_private_for_each_entry(fh, &luo_file_handler_list, list) {
 			if (!strcmp(fh->compatible, file_ser[i].compatible)) {
 				handler_found = true;
 				break;
@@ -835,7 +836,7 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
 		return -EBUSY;
 
 	/* Check for duplicate compatible strings */
-	luo_list_for_each_private(fh_iter, &luo_file_handler_list, list) {
+	list_private_for_each_entry(fh_iter, &luo_file_handler_list, list) {
 		if (!strcmp(fh_iter->compatible, fh->compatible)) {
 			pr_err("File handler registration failed: Compatible string '%s' already registered.\n",
 			       fh->compatible);
diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
index c8973b543d1d..3f1e0c94637e 100644
--- a/kernel/liveupdate/luo_internal.h
+++ b/kernel/liveupdate/luo_internal.h
@@ -40,13 +40,6 @@ static inline int luo_ucmd_respond(struct luo_ucmd *ucmd,
  */
 #define luo_restore_fail(__fmt, ...) panic(__fmt, ##__VA_ARGS__)
 
-/* Mimics list_for_each_entry() but for private list head entries */
-#define luo_list_for_each_private(pos, head, member)				\
-	for (struct list_head *__iter = (head)->next;				\
-	     __iter != (head) &&						\
-	     ({ pos = container_of(__iter, typeof(*(pos)), member); 1; });	\
-	     __iter = __iter->next)
-
 /**
  * struct luo_file_set - A set of files that belong to the same sessions.
  * @files_list: An ordered list of files associated with this session, it is

From cab056f2aae7250af50e503b81a80dfc567a1acd Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 18 Dec 2025 10:57:51 -0500
Subject: [PATCH 101/107] liveupdate: luo_flb: introduce File-Lifecycle-Bound
 global state

Introduce a mechanism for managing global kernel state whose lifecycle is
tied to the preservation of one or more files.  This is necessary for
subsystems where multiple preserved file descriptors depend on a single,
shared underlying resource.

An example is HugeTLB, where multiple file descriptors such as memfd and
guest_memfd may rely on the state of a single HugeTLB subsystem.
Preserving this state for each individual file would be redundant and
incorrect.  The state should be preserved only once when the first file is
preserved, and restored/finished only once the last file is handled.

This patch introduces File-Lifecycle-Bound (FLB) objects to solve this
problem.  An FLB is a global, reference-counted object with a defined set
of operations:

- A file handler (struct liveupdate_file_handler) declares a dependency
  on one or more FLBs via a new registration function,
  liveupdate_register_flb().
- When the first file depending on an FLB is preserved, the FLB's
  .preserve() callback is invoked to save the shared global state. The
  reference count is then incremented for each subsequent file.
- Conversely, when the last file is unpreserved (before reboot) or
  finished (after reboot), the FLB's .unpreserve() or .finish() callback
  is invoked to clean up the global resource.

The implementation includes:

- A new set of ABI definitions (luo_flb_ser, luo_flb_head_ser) and a
  corresponding FDT node (luo-flb) to serialize the state of all active
  FLBs and pass them via Kexec Handover.
- Core logic in luo_flb.c to manage FLB registration, reference
  counting, and the invocation of lifecycle callbacks.
- An API (liveupdate_flb_get/_incoming/_outgoing) for other kernel
  subsystems to safely access the live object managed by an FLB, both
  before and after the live update.

This framework provides the necessary infrastructure for more complex
subsystems like IOMMU, VFIO, and KVM to integrate with the Live Update
Orchestrator.

Link: https://lkml.kernel.org/r/20251218155752.3045808-5-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Gow <davidgow@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/core-api/liveupdate.rst |  11 +
 include/linux/kho/abi/luo.h           |  76 +++
 include/linux/liveupdate.h            | 147 ++++++
 kernel/liveupdate/Makefile            |   1 +
 kernel/liveupdate/luo_core.c          |   7 +-
 kernel/liveupdate/luo_file.c          |  24 +-
 kernel/liveupdate/luo_flb.c           | 654 ++++++++++++++++++++++++++
 kernel/liveupdate/luo_internal.h      |   7 +
 8 files changed, 924 insertions(+), 3 deletions(-)
 create mode 100644 kernel/liveupdate/luo_flb.c

diff --git a/Documentation/core-api/liveupdate.rst b/Documentation/core-api/liveupdate.rst
index e2aba13494cf..5a292d0f3706 100644
--- a/Documentation/core-api/liveupdate.rst
+++ b/Documentation/core-api/liveupdate.rst
@@ -18,6 +18,11 @@ LUO Preserving File Descriptors
 .. kernel-doc:: kernel/liveupdate/luo_file.c
    :doc: LUO File Descriptors
 
+LUO File Lifecycle Bound Global Data
+====================================
+.. kernel-doc:: kernel/liveupdate/luo_flb.c
+   :doc: LUO File Lifecycle Bound Global Data
+
 Live Update Orchestrator ABI
 ============================
 .. kernel-doc:: include/linux/kho/abi/luo.h
@@ -40,6 +45,9 @@ Public API
 .. kernel-doc:: kernel/liveupdate/luo_core.c
    :export:
 
+.. kernel-doc:: kernel/liveupdate/luo_flb.c
+   :export:
+
 .. kernel-doc:: kernel/liveupdate/luo_file.c
    :export:
 
@@ -48,6 +56,9 @@ Internal API
 .. kernel-doc:: kernel/liveupdate/luo_core.c
    :internal:
 
+.. kernel-doc:: kernel/liveupdate/luo_flb.c
+   :internal:
+
 .. kernel-doc:: kernel/liveupdate/luo_session.c
    :internal:
 
diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h
index beb86847b544..a44010aafb5e 100644
--- a/include/linux/kho/abi/luo.h
+++ b/include/linux/kho/abi/luo.h
@@ -37,6 +37,11 @@
  *             compatible = "luo-session-v1";
  *             luo-session-header = <phys_addr_of_session_header_ser>;
  *         };
+ *
+ *         luo-flb {
+ *             compatible = "luo-flb-v1";
+ *             luo-flb-header = <phys_addr_of_flb_header_ser>;
+ *         };
  *     };
  *
  * Main LUO Node (/):
@@ -56,6 +61,17 @@
  *     is the header for a contiguous block of memory containing an array of
  *     `struct luo_session_ser`, one for each preserved session.
  *
+ * File-Lifecycle-Bound Node (luo-flb):
+ *   This node describes all preserved global objects whose lifecycle is bound
+ *   to that of the preserved files (e.g., shared IOMMU state).
+ *
+ *   - compatible: "luo-flb-v1"
+ *     Identifies the FLB ABI version.
+ *   - luo-flb-header: u64
+ *     The physical address of a `struct luo_flb_header_ser`. This structure is
+ *     the header for a contiguous block of memory containing an array of
+ *     `struct luo_flb_ser`, one for each preserved global object.
+ *
  * Serialization Structures:
  *   The FDT properties point to memory regions containing arrays of simple,
  *   `__packed` structures. These structures contain the actual preserved state.
@@ -74,6 +90,16 @@
  *     Metadata for a single preserved file. Contains the `compatible` string to
  *     find the correct handler in the new kernel, a user-provided `token` for
  *     identification, and an opaque `data` handle for the handler to use.
+ *
+ *   - struct luo_flb_header_ser:
+ *     Header for the FLB array. Contains the total page count of the
+ *     preserved memory block and the number of `struct luo_flb_ser` entries
+ *     that follow.
+ *
+ *   - struct luo_flb_ser:
+ *     Metadata for a single preserved global object. Contains its `name`
+ *     (compatible string), an opaque `data` handle, and the `count`
+ *     number of files depending on it.
  */
 
 #ifndef _LINUX_KHO_ABI_LUO_H
@@ -163,4 +189,54 @@ struct luo_session_ser {
 	struct luo_file_set_ser file_set_ser;
 } __packed;
 
+/* The max size is set so it can be reliably used during in serialization */
+#define LIVEUPDATE_FLB_COMPAT_LENGTH	48
+
+#define LUO_FDT_FLB_NODE_NAME	"luo-flb"
+#define LUO_FDT_FLB_COMPATIBLE	"luo-flb-v1"
+#define LUO_FDT_FLB_HEADER	"luo-flb-header"
+
+/**
+ * struct luo_flb_header_ser - Header for the serialized FLB data block.
+ * @pgcnt: The total number of pages occupied by the entire preserved memory
+ *         region, including this header and the subsequent array of
+ *         &struct luo_flb_ser entries.
+ * @count: The number of &struct luo_flb_ser entries that follow this header
+ *         in the memory block.
+ *
+ * This structure is located at the physical address specified by the
+ * `LUO_FDT_FLB_HEADER` FDT property. It provides the new kernel with the
+ * necessary information to find and iterate over the array of preserved
+ * File-Lifecycle-Bound objects and to manage the underlying memory.
+ *
+ * If this structure is modified, LUO_FDT_FLB_COMPATIBLE must be updated.
+ */
+struct luo_flb_header_ser {
+	u64 pgcnt;
+	u64 count;
+} __packed;
+
+/**
+ * struct luo_flb_ser - Represents the serialized state of a single FLB object.
+ * @name:    The unique compatibility string of the FLB object, used to find the
+ *           corresponding &struct liveupdate_flb handler in the new kernel.
+ * @data:    The opaque u64 handle returned by the FLB's .preserve() operation
+ *           in the old kernel. This handle encapsulates the entire state needed
+ *           for restoration.
+ * @count:   The reference count at the time of serialization; i.e., the number
+ *           of preserved files that depended on this FLB. This is used by the
+ *           new kernel to correctly manage the FLB's lifecycle.
+ *
+ * An array of these structures is created in a preserved memory region and
+ * passed to the new kernel. Each entry allows the LUO core to restore one
+ * global, shared object.
+ *
+ * If this structure is modified, LUO_FDT_FLB_COMPATIBLE must be updated.
+ */
+struct luo_flb_ser {
+	char name[LIVEUPDATE_FLB_COMPAT_LENGTH];
+	u64 data;
+	u64 count;
+} __packed;
+
 #endif /* _LINUX_KHO_ABI_LUO_H */
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index a7f6ee5b6771..fe82a6c3005f 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -11,10 +11,13 @@
 #include <linux/compiler.h>
 #include <linux/kho/abi/luo.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/types.h>
 #include <uapi/linux/liveupdate.h>
 
 struct liveupdate_file_handler;
+struct liveupdate_flb;
+struct liveupdate_session;
 struct file;
 
 /**
@@ -99,6 +102,118 @@ struct liveupdate_file_handler {
 	 * registered file handlers.
 	 */
 	struct list_head __private list;
+	/* A list of FLB dependencies. */
+	struct list_head __private flb_list;
+};
+
+/**
+ * struct liveupdate_flb_op_args - Arguments for FLB operation callbacks.
+ * @flb:       The global FLB instance for which this call is performed.
+ * @data:      For .preserve():    [OUT] The callback sets this field.
+ *             For .unpreserve():  [IN]  The handle from .preserve().
+ *             For .retrieve():    [IN]  The handle from .preserve().
+ * @obj:       For .preserve():    [OUT] Sets this to the live object.
+ *             For .retrieve():    [OUT] Sets this to the live object.
+ *             For .finish():      [IN]  The live object from .retrieve().
+ *
+ * This structure bundles all parameters for the FLB operation callbacks.
+ */
+struct liveupdate_flb_op_args {
+	struct liveupdate_flb *flb;
+	u64 data;
+	void *obj;
+};
+
+/**
+ * struct liveupdate_flb_ops - Callbacks for global File-Lifecycle-Bound data.
+ * @preserve:        Called when the first file using this FLB is preserved.
+ *                   The callback must save its state and return a single,
+ *                   self-contained u64 handle by setting the 'argp->data'
+ *                   field and 'argp->obj'.
+ * @unpreserve:      Called when the last file using this FLB is unpreserved
+ *                   (aborted before reboot). Receives the handle via
+ *                   'argp->data' and live object via 'argp->obj'.
+ * @retrieve:        Called on-demand in the new kernel, the first time a
+ *                   component requests access to the shared object. It receives
+ *                   the preserved handle via 'argp->data' and must reconstruct
+ *                   the live object, returning it by setting the 'argp->obj'
+ *                   field.
+ * @finish:          Called in the new kernel when the last file using this FLB
+ *                   is finished. Receives the live object via 'argp->obj' for
+ *                   cleanup.
+ * @owner:           Module reference
+ *
+ * Operations that manage global shared data with file bound lifecycle,
+ * triggered by the first file that uses it and concluded by the last file that
+ * uses it, across all sessions.
+ */
+struct liveupdate_flb_ops {
+	int (*preserve)(struct liveupdate_flb_op_args *argp);
+	void (*unpreserve)(struct liveupdate_flb_op_args *argp);
+	int (*retrieve)(struct liveupdate_flb_op_args *argp);
+	void (*finish)(struct liveupdate_flb_op_args *argp);
+	struct module *owner;
+};
+
+/*
+ * struct luo_flb_private_state - Private FLB state structures.
+ * @count:     The number of preserved files currently depending on this FLB.
+ *             This is used to trigger the preserve/unpreserve/finish ops on the
+ *             first/last file.
+ * @data:      The opaque u64 handle returned by .preserve() or passed to
+ *             .retrieve().
+ * @obj:       The live kernel object returned by .preserve() or .retrieve().
+ * @lock:      A mutex that protects all fields within this structure, providing
+ *             the synchronization service for the FLB's ops.
+ * @finished:  True once the FLB's finish() callback has run.
+ * @retrieved: True once the FLB's retrieve() callback has run.
+ */
+struct luo_flb_private_state {
+	long count;
+	u64 data;
+	void *obj;
+	struct mutex lock;
+	bool finished;
+	bool retrieved;
+};
+
+/*
+ * struct luo_flb_private - Keep separate incoming and outgoing states.
+ * @list:        A global list of registered FLBs.
+ * @outgoing:    The runtime state for the pre-reboot
+ *               (preserve/unpreserve) lifecycle.
+ * @incoming:    The runtime state for the post-reboot (retrieve/finish)
+ *               lifecycle.
+ * @users:       With how many File-Handlers this FLB is registered.
+ * @initialized: true when private fields have been initialized.
+ */
+struct luo_flb_private {
+	struct list_head list;
+	struct luo_flb_private_state outgoing;
+	struct luo_flb_private_state incoming;
+	int users;
+	bool initialized;
+};
+
+/**
+ * struct liveupdate_flb - A global definition for a shared data object.
+ * @ops:         Callback functions
+ * @compatible:  The compatibility string (e.g., "iommu-core-v1"
+ *               that uniquely identifies the FLB type this handler
+ *               supports. This is matched against the compatible string
+ *               associated with individual &struct liveupdate_flb
+ *               instances.
+ *
+ * This struct is the "template" that a driver registers to define a shared,
+ * file-lifecycle-bound object. The actual runtime state (the live object,
+ * refcount, etc.) is managed privately by the LUO core.
+ */
+struct liveupdate_flb {
+	const struct liveupdate_flb_ops *ops;
+	const char compatible[LIVEUPDATE_FLB_COMPAT_LENGTH];
+
+	/* private: */
+	struct luo_flb_private __private private;
 };
 
 #ifdef CONFIG_LIVEUPDATE
@@ -112,6 +227,14 @@ int liveupdate_reboot(void);
 int liveupdate_register_file_handler(struct liveupdate_file_handler *fh);
 int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh);
 
+int liveupdate_register_flb(struct liveupdate_file_handler *fh,
+			    struct liveupdate_flb *flb);
+int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+			      struct liveupdate_flb *flb);
+
+int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp);
+int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp);
+
 #else /* CONFIG_LIVEUPDATE */
 
 static inline bool liveupdate_enabled(void)
@@ -134,5 +257,29 @@ static inline int liveupdate_unregister_file_handler(struct liveupdate_file_hand
 	return -EOPNOTSUPP;
 }
 
+static inline int liveupdate_register_flb(struct liveupdate_file_handler *fh,
+					  struct liveupdate_flb *flb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+					    struct liveupdate_flb *flb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int liveupdate_flb_get_incoming(struct liveupdate_flb *flb,
+					      void **objp)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb,
+					      void **objp)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_LIVEUPDATE */
 #endif /* _LINUX_LIVEUPDATE_H */
diff --git a/kernel/liveupdate/Makefile b/kernel/liveupdate/Makefile
index 7cad2eece32d..d2f779cbe279 100644
--- a/kernel/liveupdate/Makefile
+++ b/kernel/liveupdate/Makefile
@@ -3,6 +3,7 @@
 luo-y :=								\
 		luo_core.o						\
 		luo_file.o						\
+		luo_flb.o						\
 		luo_session.o
 
 obj-$(CONFIG_KEXEC_HANDOVER)		+= kexec_handover.o
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index a26c093eb8eb..dda7bb57d421 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -127,7 +127,9 @@ static int __init luo_early_startup(void)
 	if (err)
 		return err;
 
-	return 0;
+	err = luo_flb_setup_incoming(luo_global.fdt_in);
+
+	return err;
 }
 
 static int __init liveupdate_early_init(void)
@@ -164,6 +166,7 @@ static int __init luo_fdt_setup(void)
 	err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
 	err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
 	err |= luo_session_setup_outgoing(fdt_out);
+	err |= luo_flb_setup_outgoing(fdt_out);
 	err |= fdt_end_node(fdt_out);
 	err |= fdt_finish(fdt_out);
 	if (err)
@@ -225,6 +228,8 @@ int liveupdate_reboot(void)
 	if (err)
 		return err;
 
+	luo_flb_serialize();
+
 	err = kho_finalize();
 	if (err) {
 		pr_err("kho_finalize failed %d\n", err);
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index 1a8a1bb73a58..cade273c50c9 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -285,10 +285,14 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
 	if (err)
 		goto err_free_files_mem;
 
+	err = luo_flb_file_preserve(fh);
+	if (err)
+		goto err_free_files_mem;
+
 	luo_file = kzalloc(sizeof(*luo_file), GFP_KERNEL);
 	if (!luo_file) {
 		err = -ENOMEM;
-		goto err_free_files_mem;
+		goto err_flb_unpreserve;
 	}
 
 	luo_file->file = file;
@@ -312,6 +316,8 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
 
 err_kfree:
 	kfree(luo_file);
+err_flb_unpreserve:
+	luo_flb_file_unpreserve(fh);
 err_free_files_mem:
 	luo_free_files_mem(file_set);
 err_fput:
@@ -353,6 +359,7 @@ void luo_file_unpreserve_files(struct luo_file_set *file_set)
 		args.serialized_data = luo_file->serialized_data;
 		args.private_data = luo_file->private_data;
 		luo_file->fh->ops->unpreserve(&args);
+		luo_flb_file_unpreserve(luo_file->fh);
 
 		list_del(&luo_file->list);
 		file_set->count--;
@@ -630,6 +637,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set,
 	args.retrieved = luo_file->retrieved;
 
 	luo_file->fh->ops->finish(&args);
+	luo_flb_file_finish(luo_file->fh);
 }
 
 /**
@@ -851,6 +859,7 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
 		goto err_resume;
 	}
 
+	INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, flb_list));
 	INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, list));
 	list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list);
 	luo_session_resume();
@@ -871,23 +880,34 @@ err_resume:
  *
  * It ensures safe removal by checking that:
  * No live update session is currently in progress.
+ * No FLB registered with this file handler.
  *
  * If the unregistration fails, the internal test state is reverted.
  *
  * Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live
- * update is in progress, can't quiesce live update.
+ * update is in progress, can't quiesce live update or FLB is registred with
+ * this file handler.
  */
 int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
 {
+	int err = -EBUSY;
+
 	if (!liveupdate_enabled())
 		return -EOPNOTSUPP;
 
 	if (!luo_session_quiesce())
 		return -EBUSY;
 
+	if (!list_empty(&ACCESS_PRIVATE(fh, flb_list)))
+		goto err_resume;
+
 	list_del(&ACCESS_PRIVATE(fh, list));
 	module_put(fh->ops->owner);
 	luo_session_resume();
 
 	return 0;
+
+err_resume:
+	luo_session_resume();
+	return err;
 }
diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c
new file mode 100644
index 000000000000..4c437de5c0b0
--- /dev/null
+++ b/kernel/liveupdate/luo_flb.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: LUO File Lifecycle Bound Global Data
+ *
+ * File-Lifecycle-Bound (FLB) objects provide a mechanism for managing global
+ * state that is shared across multiple live-updatable files. The lifecycle of
+ * this shared state is tied to the preservation of the files that depend on it.
+ *
+ * An FLB represents a global resource, such as the IOMMU core state, that is
+ * required by multiple file descriptors (e.g., all VFIO fds).
+ *
+ * The preservation of the FLB's state is triggered when the *first* file
+ * depending on it is preserved. The cleanup of this state (unpreserve or
+ * finish) is triggered when the *last* file depending on it is unpreserved or
+ * finished.
+ *
+ * Handler Dependency: A file handler declares its dependency on one or more
+ * FLBs by registering them via liveupdate_register_flb().
+ *
+ * Callback Model: Each FLB is defined by a set of operations
+ * (&struct liveupdate_flb_ops) that LUO invokes at key points:
+ *
+ *     - .preserve(): Called for the first file. Saves global state.
+ *     - .unpreserve(): Called for the last file (if aborted pre-reboot).
+ *     - .retrieve(): Called on-demand in the new kernel to restore the state.
+ *     - .finish(): Called for the last file in the new kernel for cleanup.
+ *
+ * This reference-counted approach ensures that shared state is saved exactly
+ * once and restored exactly once, regardless of how many files depend on it,
+ * and that its lifecycle is correctly managed across the kexec transition.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cleanup.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/luo.h>
+#include <linux/libfdt.h>
+#include <linux/list_private.h>
+#include <linux/liveupdate.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/unaligned.h>
+#include "luo_internal.h"
+
+#define LUO_FLB_PGCNT		1ul
+#define LUO_FLB_MAX		(((LUO_FLB_PGCNT << PAGE_SHIFT) -	\
+		sizeof(struct luo_flb_header_ser)) / sizeof(struct luo_flb_ser))
+
+struct luo_flb_header {
+	struct luo_flb_header_ser *header_ser;
+	struct luo_flb_ser *ser;
+	bool active;
+};
+
+struct luo_flb_global {
+	struct luo_flb_header incoming;
+	struct luo_flb_header outgoing;
+	struct list_head list;
+	long count;
+};
+
+static struct luo_flb_global luo_flb_global = {
+	.list = LIST_HEAD_INIT(luo_flb_global.list),
+};
+
+/*
+ * struct luo_flb_link - Links an FLB definition to a file handler's internal
+ * list of dependencies.
+ * @flb:  A pointer to the registered &struct liveupdate_flb definition.
+ * @list: The list_head for linking.
+ */
+struct luo_flb_link {
+	struct liveupdate_flb *flb;
+	struct list_head list;
+};
+
+/* luo_flb_get_private - Access private field, and if needed initialize it. */
+static struct luo_flb_private *luo_flb_get_private(struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = &ACCESS_PRIVATE(flb, private);
+
+	if (!private->initialized) {
+		mutex_init(&private->incoming.lock);
+		mutex_init(&private->outgoing.lock);
+		INIT_LIST_HEAD(&private->list);
+		private->users = 0;
+		private->initialized = true;
+	}
+
+	return private;
+}
+
+static int luo_flb_file_preserve_one(struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+
+	scoped_guard(mutex, &private->outgoing.lock) {
+		if (!private->outgoing.count) {
+			struct liveupdate_flb_op_args args = {0};
+			int err;
+
+			args.flb = flb;
+			err = flb->ops->preserve(&args);
+			if (err)
+				return err;
+			private->outgoing.data = args.data;
+			private->outgoing.obj = args.obj;
+		}
+		private->outgoing.count++;
+	}
+
+	return 0;
+}
+
+static void luo_flb_file_unpreserve_one(struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+
+	scoped_guard(mutex, &private->outgoing.lock) {
+		private->outgoing.count--;
+		if (!private->outgoing.count) {
+			struct liveupdate_flb_op_args args = {0};
+
+			args.flb = flb;
+			args.data = private->outgoing.data;
+			args.obj = private->outgoing.obj;
+
+			if (flb->ops->unpreserve)
+				flb->ops->unpreserve(&args);
+
+			private->outgoing.data = 0;
+			private->outgoing.obj = NULL;
+		}
+	}
+}
+
+static int luo_flb_retrieve_one(struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+	struct luo_flb_header *fh = &luo_flb_global.incoming;
+	struct liveupdate_flb_op_args args = {0};
+	bool found = false;
+	int err;
+
+	guard(mutex)(&private->incoming.lock);
+
+	if (private->incoming.finished)
+		return -ENODATA;
+
+	if (private->incoming.retrieved)
+		return 0;
+
+	if (!fh->active)
+		return -ENODATA;
+
+	for (int i = 0; i < fh->header_ser->count; i++) {
+		if (!strcmp(fh->ser[i].name, flb->compatible)) {
+			private->incoming.data = fh->ser[i].data;
+			private->incoming.count = fh->ser[i].count;
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENOENT;
+
+	args.flb = flb;
+	args.data = private->incoming.data;
+
+	err = flb->ops->retrieve(&args);
+	if (err)
+		return err;
+
+	private->incoming.obj = args.obj;
+	private->incoming.retrieved = true;
+
+	return 0;
+}
+
+static void luo_flb_file_finish_one(struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+	u64 count;
+
+	scoped_guard(mutex, &private->incoming.lock)
+		count = --private->incoming.count;
+
+	if (!count) {
+		struct liveupdate_flb_op_args args = {0};
+
+		if (!private->incoming.retrieved) {
+			int err = luo_flb_retrieve_one(flb);
+
+			if (WARN_ON(err))
+				return;
+		}
+
+		scoped_guard(mutex, &private->incoming.lock) {
+			args.flb = flb;
+			args.obj = private->incoming.obj;
+			flb->ops->finish(&args);
+
+			private->incoming.data = 0;
+			private->incoming.obj = NULL;
+			private->incoming.finished = true;
+		}
+	}
+}
+
+/**
+ * luo_flb_file_preserve - Notifies FLBs that a file is about to be preserved.
+ * @fh: The file handler for the preserved file.
+ *
+ * This function iterates through all FLBs associated with the given file
+ * handler. It increments the reference count for each FLB. If the count becomes
+ * 1, it triggers the FLB's .preserve() callback to save the global state.
+ *
+ * This operation is atomic. If any FLB's .preserve() op fails, it will roll
+ * back by calling .unpreserve() on any FLBs that were successfully preserved
+ * during this call.
+ *
+ * Context: Called from luo_preserve_file()
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int luo_flb_file_preserve(struct liveupdate_file_handler *fh)
+{
+	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+	struct luo_flb_link *iter;
+	int err = 0;
+
+	list_for_each_entry(iter, flb_list, list) {
+		err = luo_flb_file_preserve_one(iter->flb);
+		if (err)
+			goto exit_err;
+	}
+
+	return 0;
+
+exit_err:
+	list_for_each_entry_continue_reverse(iter, flb_list, list)
+		luo_flb_file_unpreserve_one(iter->flb);
+
+	return err;
+}
+
+/**
+ * luo_flb_file_unpreserve - Notifies FLBs that a dependent file was unpreserved.
+ * @fh: The file handler for the unpreserved file.
+ *
+ * This function iterates through all FLBs associated with the given file
+ * handler, in reverse order of registration. It decrements the reference count
+ * for each FLB. If the count becomes 0, it triggers the FLB's .unpreserve()
+ * callback to clean up the global state.
+ *
+ * Context: Called when a preserved file is being cleaned up before reboot
+ *          (e.g., from luo_file_unpreserve_files()).
+ */
+void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh)
+{
+	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+	struct luo_flb_link *iter;
+
+	list_for_each_entry_reverse(iter, flb_list, list)
+		luo_flb_file_unpreserve_one(iter->flb);
+}
+
+/**
+ * luo_flb_file_finish - Notifies FLBs that a dependent file has been finished.
+ * @fh: The file handler for the finished file.
+ *
+ * This function iterates through all FLBs associated with the given file
+ * handler, in reverse order of registration. It decrements the incoming
+ * reference count for each FLB. If the count becomes 0, it triggers the FLB's
+ * .finish() callback for final cleanup in the new kernel.
+ *
+ * Context: Called from luo_file_finish() for each file being finished.
+ */
+void luo_flb_file_finish(struct liveupdate_file_handler *fh)
+{
+	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+	struct luo_flb_link *iter;
+
+	list_for_each_entry_reverse(iter, flb_list, list)
+		luo_flb_file_finish_one(iter->flb);
+}
+
+/**
+ * liveupdate_register_flb - Associate an FLB with a file handler and register it globally.
+ * @fh:   The file handler that will now depend on the FLB.
+ * @flb:  The File-Lifecycle-Bound object to associate.
+ *
+ * Establishes a dependency, informing the LUO core that whenever a file of
+ * type @fh is preserved, the state of @flb must also be managed.
+ *
+ * On the first registration of a given @flb object, it is added to a global
+ * registry. This function checks for duplicate registrations, both for a
+ * specific handler and globally, and ensures the total number of unique
+ * FLBs does not exceed the system limit.
+ *
+ * Context: Typically called from a subsystem's module init function after
+ *          both the handler and the FLB have been defined and initialized.
+ * Return: 0 on success. Returns a negative errno on failure:
+ *         -EINVAL if arguments are NULL or not initialized.
+ *         -ENOMEM on memory allocation failure.
+ *         -EEXIST if this FLB is already registered with this handler.
+ *         -ENOSPC if the maximum number of global FLBs has been reached.
+ *         -EOPNOTSUPP if live update is disabled or not configured.
+ */
+int liveupdate_register_flb(struct liveupdate_file_handler *fh,
+			    struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+	struct luo_flb_link *link __free(kfree) = NULL;
+	struct liveupdate_flb *gflb;
+	struct luo_flb_link *iter;
+	int err;
+
+	if (!liveupdate_enabled())
+		return -EOPNOTSUPP;
+
+	if (WARN_ON(!flb->ops->preserve || !flb->ops->unpreserve ||
+		    !flb->ops->retrieve || !flb->ops->finish)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * File handler must already be registered, as it initializes the
+	 * flb_list
+	 */
+	if (WARN_ON(list_empty(&ACCESS_PRIVATE(fh, list))))
+		return -EINVAL;
+
+	link = kzalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	/*
+	 * Ensure the system is quiescent (no active sessions).
+	 * This acts as a global lock for registration: no other thread can
+	 * be in this section, and no sessions can be creating/using FDs.
+	 */
+	if (!luo_session_quiesce())
+		return -EBUSY;
+
+	/* Check that this FLB is not already linked to this file handler */
+	err = -EEXIST;
+	list_for_each_entry(iter, flb_list, list) {
+		if (iter->flb == flb)
+			goto err_resume;
+	}
+
+	/*
+	 * If this FLB is not linked to global list it's the first time the FLB
+	 * is registered
+	 */
+	if (!private->users) {
+		if (WARN_ON(!list_empty(&private->list))) {
+			err = -EINVAL;
+			goto err_resume;
+		}
+
+		if (luo_flb_global.count == LUO_FLB_MAX) {
+			err = -ENOSPC;
+			goto err_resume;
+		}
+
+		/* Check that compatible string is unique in global list */
+		list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) {
+			if (!strcmp(gflb->compatible, flb->compatible))
+				goto err_resume;
+		}
+
+		if (!try_module_get(flb->ops->owner)) {
+			err = -EAGAIN;
+			goto err_resume;
+		}
+
+		list_add_tail(&private->list, &luo_flb_global.list);
+		luo_flb_global.count++;
+	}
+
+	/* Finally, link the FLB to the file handler */
+	private->users++;
+	link->flb = flb;
+	list_add_tail(&no_free_ptr(link)->list, flb_list);
+	luo_session_resume();
+
+	return 0;
+
+err_resume:
+	luo_session_resume();
+	return err;
+}
+
+/**
+ * liveupdate_unregister_flb - Remove an FLB dependency from a file handler.
+ * @fh:   The file handler that is currently depending on the FLB.
+ * @flb:  The File-Lifecycle-Bound object to remove.
+ *
+ * Removes the association between the specified file handler and the FLB
+ * previously established by liveupdate_register_flb().
+ *
+ * This function manages the global lifecycle of the FLB. It decrements the
+ * FLB's usage count. If this was the last file handler referencing this FLB,
+ * the FLB is removed from the global registry and the reference to its
+ * owner module (acquired during registration) is released.
+ *
+ * Context: This function ensures the session is quiesced (no active FDs
+ *          being created) during the update. It is typically called from a
+ *          subsystem's module exit function.
+ * Return: 0 on success.
+ *         -EOPNOTSUPP if live update is disabled.
+ *         -EBUSY if the live update session is active and cannot be quiesced.
+ *         -ENOENT if the FLB was not found in the file handler's list.
+ */
+int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+			      struct liveupdate_flb *flb)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+	struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+	struct luo_flb_link *iter;
+	int err = -ENOENT;
+
+	if (!liveupdate_enabled())
+		return -EOPNOTSUPP;
+
+	/*
+	 * Ensure the system is quiescent (no active sessions).
+	 * This acts as a global lock for unregistration.
+	 */
+	if (!luo_session_quiesce())
+		return -EBUSY;
+
+	/* Find and remove the link from the file handler's list */
+	list_for_each_entry(iter, flb_list, list) {
+		if (iter->flb == flb) {
+			list_del(&iter->list);
+			kfree(iter);
+			err = 0;
+			break;
+		}
+	}
+
+	if (err)
+		goto err_resume;
+
+	private->users--;
+	/*
+	 * If this is the last file-handler with which we are registred, remove
+	 * from the global list, and relese module reference.
+	 */
+	if (!private->users) {
+		list_del_init(&private->list);
+		luo_flb_global.count--;
+		module_put(flb->ops->owner);
+	}
+
+	luo_session_resume();
+
+	return 0;
+
+err_resume:
+	luo_session_resume();
+	return err;
+}
+
+/**
+ * liveupdate_flb_get_incoming - Retrieve the incoming FLB object.
+ * @flb:  The FLB definition.
+ * @objp: Output parameter; will be populated with the live shared object.
+ *
+ * Returns a pointer to its shared live object for the incoming (post-reboot)
+ * path.
+ *
+ * If this is the first time the object is requested in the new kernel, this
+ * function will trigger the FLB's .retrieve() callback to reconstruct the
+ * object from its preserved state. Subsequent calls will return the same
+ * cached object.
+ *
+ * Return: 0 on success, or a negative errno on failure. -ENODATA means no
+ * incoming FLB data, -ENOENT means specific flb not found in the incoming
+ * data, and -EOPNOTSUPP when live update is disabled or not configured.
+ */
+int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+
+	if (!liveupdate_enabled())
+		return -EOPNOTSUPP;
+
+	if (!private->incoming.obj) {
+		int err = luo_flb_retrieve_one(flb);
+
+		if (err)
+			return err;
+	}
+
+	guard(mutex)(&private->incoming.lock);
+	*objp = private->incoming.obj;
+
+	return 0;
+}
+
+/**
+ * liveupdate_flb_get_outgoing - Retrieve the outgoing FLB object.
+ * @flb:  The FLB definition.
+ * @objp: Output parameter; will be populated with the live shared object.
+ *
+ * Returns a pointer to its shared live object for the outgoing (pre-reboot)
+ * path.
+ *
+ * This function assumes the object has already been created by the FLB's
+ * .preserve() callback, which is triggered when the first dependent file
+ * is preserved.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp)
+{
+	struct luo_flb_private *private = luo_flb_get_private(flb);
+
+	if (!liveupdate_enabled())
+		return -EOPNOTSUPP;
+
+	guard(mutex)(&private->outgoing.lock);
+	*objp = private->outgoing.obj;
+
+	return 0;
+}
+
+int __init luo_flb_setup_outgoing(void *fdt_out)
+{
+	struct luo_flb_header_ser *header_ser;
+	u64 header_ser_pa;
+	int err;
+
+	header_ser = kho_alloc_preserve(LUO_FLB_PGCNT << PAGE_SHIFT);
+	if (IS_ERR(header_ser))
+		return PTR_ERR(header_ser);
+
+	header_ser_pa = virt_to_phys(header_ser);
+
+	err = fdt_begin_node(fdt_out, LUO_FDT_FLB_NODE_NAME);
+	err |= fdt_property_string(fdt_out, "compatible",
+				   LUO_FDT_FLB_COMPATIBLE);
+	err |= fdt_property(fdt_out, LUO_FDT_FLB_HEADER, &header_ser_pa,
+			    sizeof(header_ser_pa));
+	err |= fdt_end_node(fdt_out);
+
+	if (err)
+		goto err_unpreserve;
+
+	header_ser->pgcnt = LUO_FLB_PGCNT;
+	luo_flb_global.outgoing.header_ser = header_ser;
+	luo_flb_global.outgoing.ser = (void *)(header_ser + 1);
+	luo_flb_global.outgoing.active = true;
+
+	return 0;
+
+err_unpreserve:
+	kho_unpreserve_free(header_ser);
+
+	return err;
+}
+
+int __init luo_flb_setup_incoming(void *fdt_in)
+{
+	struct luo_flb_header_ser *header_ser;
+	int err, header_size, offset;
+	const void *ptr;
+	u64 header_ser_pa;
+
+	offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_FLB_NODE_NAME);
+	if (offset < 0) {
+		pr_err("Unable to get FLB node [%s]\n", LUO_FDT_FLB_NODE_NAME);
+
+		return -ENOENT;
+	}
+
+	err = fdt_node_check_compatible(fdt_in, offset,
+					LUO_FDT_FLB_COMPATIBLE);
+	if (err) {
+		pr_err("FLB node is incompatible with '%s' [%d]\n",
+		       LUO_FDT_FLB_COMPATIBLE, err);
+
+		return -EINVAL;
+	}
+
+	header_size = 0;
+	ptr = fdt_getprop(fdt_in, offset, LUO_FDT_FLB_HEADER, &header_size);
+	if (!ptr || header_size != sizeof(u64)) {
+		pr_err("Unable to get FLB header property '%s' [%d]\n",
+		       LUO_FDT_FLB_HEADER, header_size);
+
+		return -EINVAL;
+	}
+
+	header_ser_pa = get_unaligned((u64 *)ptr);
+	header_ser = phys_to_virt(header_ser_pa);
+
+	luo_flb_global.incoming.header_ser = header_ser;
+	luo_flb_global.incoming.ser = (void *)(header_ser + 1);
+	luo_flb_global.incoming.active = true;
+
+	return 0;
+}
+
+/**
+ * luo_flb_serialize - Serializes all active FLB objects for KHO.
+ *
+ * This function is called from the reboot path. It iterates through all
+ * registered File-Lifecycle-Bound (FLB) objects. For each FLB that has been
+ * preserved (i.e., its reference count is greater than zero), it writes its
+ * metadata into the memory region designated for Kexec Handover.
+ *
+ * The serialized data includes the FLB's compatibility string, its opaque
+ * data handle, and the final reference count. This allows the new kernel to
+ * find the appropriate handler and reconstruct the FLB's state.
+ *
+ * Context: Called from liveupdate_reboot() just before kho_finalize().
+ */
+void luo_flb_serialize(void)
+{
+	struct luo_flb_header *fh = &luo_flb_global.outgoing;
+	struct liveupdate_flb *gflb;
+	int i = 0;
+
+	list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) {
+		struct luo_flb_private *private = luo_flb_get_private(gflb);
+
+		if (private->outgoing.count > 0) {
+			strscpy(fh->ser[i].name, gflb->compatible,
+				sizeof(fh->ser[i].name));
+			fh->ser[i].data = private->outgoing.data;
+			fh->ser[i].count = private->outgoing.count;
+			i++;
+		}
+	}
+
+	fh->header_ser->count = i;
+}
diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
index 3f1e0c94637e..99db13d99530 100644
--- a/kernel/liveupdate/luo_internal.h
+++ b/kernel/liveupdate/luo_internal.h
@@ -100,4 +100,11 @@ int luo_file_deserialize(struct luo_file_set *file_set,
 void luo_file_set_init(struct luo_file_set *file_set);
 void luo_file_set_destroy(struct luo_file_set *file_set);
 
+int luo_flb_file_preserve(struct liveupdate_file_handler *fh);
+void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh);
+void luo_flb_file_finish(struct liveupdate_file_handler *fh);
+int __init luo_flb_setup_outgoing(void *fdt);
+int __init luo_flb_setup_incoming(void *fdt);
+void luo_flb_serialize(void);
+
 #endif /* _LINUX_LUO_INTERNAL_H */

From f653ff7af96951faa69c68665d44bed80702544f Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 18 Dec 2025 10:57:52 -0500
Subject: [PATCH 102/107] tests/liveupdate: add in-kernel liveupdate test

Introduce an in-kernel test module to validate the core logic of the Live
Update Orchestrator's File-Lifecycle-Bound feature.  This provides a
low-level, controlled environment to test FLB registration and callback
invocation without requiring userspace interaction or actual kexec
reboots.

The test is enabled by the CONFIG_LIVEUPDATE_TEST Kconfig option.

Link: https://lkml.kernel.org/r/20251218155752.3045808-6-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: David Gow <davidgow@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <kees@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS                      |   1 +
 include/linux/kho/abi/luo.h      |   5 +
 kernel/liveupdate/luo_file.c     |   8 +-
 kernel/liveupdate/luo_internal.h |   8 ++
 lib/Kconfig.debug                |  23 +++++
 lib/tests/Makefile               |   1 +
 lib/tests/liveupdate.c           | 158 +++++++++++++++++++++++++++++++
 7 files changed, 203 insertions(+), 1 deletion(-)
 create mode 100644 lib/tests/liveupdate.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 92b377cd131b..a2a4cfd19fad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14652,6 +14652,7 @@ F:	include/linux/liveupdate.h
 F:	include/linux/liveupdate/
 F:	include/uapi/linux/liveupdate.h
 F:	kernel/liveupdate/
+F:	lib/tests/liveupdate.c
 F:	mm/memfd_luo.c
 F:	tools/testing/selftests/liveupdate/
 
diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h
index a44010aafb5e..46750a0ddf88 100644
--- a/include/linux/kho/abi/luo.h
+++ b/include/linux/kho/abi/luo.h
@@ -239,4 +239,9 @@ struct luo_flb_ser {
 	u64 count;
 } __packed;
 
+/* Kernel Live Update Test ABI */
+#ifdef CONFIG_LIVEUPDATE_TEST
+#define LIVEUPDATE_TEST_FLB_COMPATIBLE(i)	"liveupdate-test-flb-v" #i
+#endif
+
 #endif /* _LINUX_KHO_ABI_LUO_H */
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index cade273c50c9..35d2a8b1a0df 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -864,6 +864,8 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
 	list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list);
 	luo_session_resume();
 
+	liveupdate_test_register(fh);
+
 	return 0;
 
 err_resume:
@@ -895,8 +897,10 @@ int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
 	if (!liveupdate_enabled())
 		return -EOPNOTSUPP;
 
+	liveupdate_test_unregister(fh);
+
 	if (!luo_session_quiesce())
-		return -EBUSY;
+		goto err_register;
 
 	if (!list_empty(&ACCESS_PRIVATE(fh, flb_list)))
 		goto err_resume;
@@ -909,5 +913,7 @@ int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
 
 err_resume:
 	luo_session_resume();
+err_register:
+	liveupdate_test_register(fh);
 	return err;
 }
diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
index 99db13d99530..8083d8739b09 100644
--- a/kernel/liveupdate/luo_internal.h
+++ b/kernel/liveupdate/luo_internal.h
@@ -107,4 +107,12 @@ int __init luo_flb_setup_outgoing(void *fdt);
 int __init luo_flb_setup_incoming(void *fdt);
 void luo_flb_serialize(void);
 
+#ifdef CONFIG_LIVEUPDATE_TEST
+void liveupdate_test_register(struct liveupdate_file_handler *fh);
+void liveupdate_test_unregister(struct liveupdate_file_handler *fh);
+#else
+static inline void liveupdate_test_register(struct liveupdate_file_handler *fh) { }
+static inline void liveupdate_test_unregister(struct liveupdate_file_handler *fh) { }
+#endif
+
 #endif /* _LINUX_LUO_INTERNAL_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 234b73f9baf7..ef201f1cc498 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2825,6 +2825,29 @@ config LINEAR_RANGES_TEST
 
 	  If unsure, say N.
 
+config LIVEUPDATE_TEST
+	bool "Live Update Kernel Test"
+	default n
+	depends on LIVEUPDATE
+	help
+	  Enable a built-in kernel test module for the Live Update
+	  Orchestrator.
+
+	  This module validates the File-Lifecycle-Bound subsystem by
+	  registering a set of mock FLB objects with any real file handlers
+	  that support live update (such as the memfd handler).
+
+	  When live update operations are performed, this test module will
+	  output messages to the kernel log (dmesg), confirming that its
+	  registration and various callback functions (preserve, retrieve,
+	  finish, etc.) are being invoked correctly.
+
+	  This is a debugging and regression testing tool for developers
+	  working on the Live Update subsystem. It should not be enabled in
+	  production kernels.
+
+	  If unsure, say N
+
 config CMDLINE_KUNIT_TEST
 	tristate "KUnit test for cmdline API" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index f740b0a26750..436b7b7a65f0 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_LIST_PRIVATE_KUNIT_TEST) += list-private-test.o
 obj-$(CONFIG_KFIFO_KUNIT_TEST) += kfifo_kunit.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
+obj-$(CONFIG_LIVEUPDATE_TEST) += liveupdate.o
 
 CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes)
 obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o
diff --git a/lib/tests/liveupdate.c b/lib/tests/liveupdate.c
new file mode 100644
index 000000000000..496d6ef91a30
--- /dev/null
+++ b/lib/tests/liveupdate.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME " test: " fmt
+
+#include <linux/cleanup.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/liveupdate.h>
+#include <linux/module.h>
+#include "../../kernel/liveupdate/luo_internal.h"
+
+static const struct liveupdate_flb_ops test_flb_ops;
+#define DEFINE_TEST_FLB(i) {						\
+	.ops = &test_flb_ops,						\
+	.compatible = LIVEUPDATE_TEST_FLB_COMPATIBLE(i),		\
+}
+
+/* Number of Test FLBs to register with every file handler */
+#define TEST_NFLBS 3
+static struct liveupdate_flb test_flbs[TEST_NFLBS] = {
+	DEFINE_TEST_FLB(0),
+	DEFINE_TEST_FLB(1),
+	DEFINE_TEST_FLB(2),
+};
+
+#define TEST_FLB_MAGIC_BASE 0xFEEDF00DCAFEBEE0ULL
+
+static int test_flb_preserve(struct liveupdate_flb_op_args *argp)
+{
+	ptrdiff_t index = argp->flb - test_flbs;
+
+	pr_info("%s: preserve was triggered\n", argp->flb->compatible);
+	argp->data = TEST_FLB_MAGIC_BASE + index;
+
+	return 0;
+}
+
+static void test_flb_unpreserve(struct liveupdate_flb_op_args *argp)
+{
+	pr_info("%s: unpreserve was triggered\n", argp->flb->compatible);
+}
+
+static int test_flb_retrieve(struct liveupdate_flb_op_args *argp)
+{
+	ptrdiff_t index = argp->flb - test_flbs;
+	u64 expected_data = TEST_FLB_MAGIC_BASE + index;
+
+	if (argp->data == expected_data) {
+		pr_info("%s: found flb data from the previous boot\n",
+			argp->flb->compatible);
+		argp->obj = (void *)argp->data;
+	} else {
+		pr_err("%s: ERROR - incorrect data handle: %llx, expected %llx\n",
+		       argp->flb->compatible, argp->data, expected_data);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void test_flb_finish(struct liveupdate_flb_op_args *argp)
+{
+	ptrdiff_t index = argp->flb - test_flbs;
+	void *expected_obj = (void *)(TEST_FLB_MAGIC_BASE + index);
+
+	if (argp->obj == expected_obj) {
+		pr_info("%s: finish was triggered\n", argp->flb->compatible);
+	} else {
+		pr_err("%s: ERROR - finish called with invalid object\n",
+		       argp->flb->compatible);
+	}
+}
+
+static const struct liveupdate_flb_ops test_flb_ops = {
+	.preserve	= test_flb_preserve,
+	.unpreserve	= test_flb_unpreserve,
+	.retrieve	= test_flb_retrieve,
+	.finish		= test_flb_finish,
+	.owner		= THIS_MODULE,
+};
+
+static void liveupdate_test_init(void)
+{
+	static DEFINE_MUTEX(init_lock);
+	static bool initialized;
+	int i;
+
+	guard(mutex)(&init_lock);
+
+	if (initialized)
+		return;
+
+	for (i = 0; i < TEST_NFLBS; i++) {
+		struct liveupdate_flb *flb = &test_flbs[i];
+		void *obj;
+		int err;
+
+		err = liveupdate_flb_get_incoming(flb, &obj);
+		if (err && err != -ENODATA && err != -ENOENT) {
+			pr_err("liveupdate_flb_get_incoming for %s failed: %pe\n",
+			       flb->compatible, ERR_PTR(err));
+		}
+	}
+	initialized = true;
+}
+
+void liveupdate_test_register(struct liveupdate_file_handler *fh)
+{
+	int err, i;
+
+	liveupdate_test_init();
+
+	for (i = 0; i < TEST_NFLBS; i++) {
+		struct liveupdate_flb *flb = &test_flbs[i];
+
+		err = liveupdate_register_flb(fh, flb);
+		if (err) {
+			pr_err("Failed to register %s %pe\n",
+			       flb->compatible, ERR_PTR(err));
+		}
+	}
+
+	err = liveupdate_register_flb(fh, &test_flbs[0]);
+	if (!err || err != -EEXIST) {
+		pr_err("Failed: %s should be already registered, but got err: %pe\n",
+		       test_flbs[0].compatible, ERR_PTR(err));
+	}
+
+	pr_info("Registered %d FLBs with file handler: [%s]\n",
+		TEST_NFLBS, fh->compatible);
+}
+
+void liveupdate_test_unregister(struct liveupdate_file_handler *fh)
+{
+	int err, i;
+
+	for (i = 0; i < TEST_NFLBS; i++) {
+		struct liveupdate_flb *flb = &test_flbs[i];
+
+		err = liveupdate_unregister_flb(fh, flb);
+		if (err) {
+			pr_err("Failed to unregister %s %pe\n",
+			       flb->compatible, ERR_PTR(err));
+		}
+	}
+
+	pr_info("Unregistered %d FLBs from file handler: [%s]\n",
+		TEST_NFLBS, fh->compatible);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pasha Tatashin <pasha.tatashin@soleen.com>");
+MODULE_DESCRIPTION("In-kernel test for LUO mechanism");

From 0758293d5dc88a45b910f46a0c7558bf6b09e01d Mon Sep 17 00:00:00 2001
From: "Tycho Andersen (AMD)" <tycho@kernel.org>
Date: Fri, 23 Jan 2026 12:05:06 -0700
Subject: [PATCH 103/107] kho: fix doc for kho_restore_pages()

This function returns NULL if kho_restore_page() returns NULL, which
happens in a couple of corner cases.  It never returns an error code.

Link: https://lkml.kernel.org/r/20260123190506.1058669-1-tycho@kernel.org
Signed-off-by: Tycho Andersen (AMD) <tycho@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/liveupdate/kexec_handover.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 8a2b2a7e50fc..fb3a7b67676e 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -299,7 +299,7 @@ EXPORT_SYMBOL_GPL(kho_restore_folio);
  * Restore a contiguous list of order 0 pages that was preserved with
  * kho_preserve_pages().
  *
- * Return: 0 on success, error code on failure
+ * Return: the first page on success, NULL on failure.
  */
 struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages)
 {

From 9dc052234da736f7749f19ab6936342ec7dbe3ac Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 16 Jan 2026 09:17:30 +0000
Subject: [PATCH 104/107] kcsan, compiler_types: avoid duplicate type issues in
 BPF Type Format

Enabling KCSAN is causing a large number of duplicate types in BTF for
core kernel structs like task_struct [1].  This is due to the definition
in include/linux/compiler_types.h

`#ifdef __SANITIZE_THREAD__
...
`#define __data_racy volatile
..
`#else
...
`#define __data_racy
...
`#endif

Because some objects in the kernel are compiled without KCSAN flags
(KCSAN_SANITIZE) we sometimes get the empty __data_racy annotation for
objects; as a result we get multiple conflicting representations of the
associated structs in DWARF, and these lead to multiple instances of core
kernel types in BTF since they cannot be deduplicated due to the
additional modifier in some instances.

Moving the __data_racy definition under CONFIG_KCSAN avoids this problem,
since the volatile modifier will be present for both KCSAN and
KCSAN_SANITIZE objects in a CONFIG_KCSAN=y kernel.

Link: https://lkml.kernel.org/r/20260116091730.324322-1-alan.maguire@oracle.com
Fixes: 31f605a308e6 ("kcsan, compiler_types: Introduce __data_racy type qualifier")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Reported-by: Nilay Shroff <nilay@linux.ibm.com>
Tested-by: Nilay Shroff <nilay@linux.ibm.com>
Suggested-by: Marco Elver <elver@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Bart van Assche <bvanassche@acm.org>
Cc: Daniel Borkman <daniel@iogearbox.net>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Naman Jain <namjain@linux.microsoft.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler_types.h | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index d3318a3c2577..86111a189a87 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -303,6 +303,22 @@ struct ftrace_likely_data {
 # define __no_kasan_or_inline __always_inline
 #endif
 
+#ifdef CONFIG_KCSAN
+/*
+ * Type qualifier to mark variables where all data-racy accesses should be
+ * ignored by KCSAN. Note, the implementation simply marks these variables as
+ * volatile, since KCSAN will treat such accesses as "marked".
+ *
+ * Defined here because defining __data_racy as volatile for KCSAN objects only
+ * causes problems in BPF Type Format (BTF) generation since struct members
+ * of core kernel data structs will be volatile in some objects and not in
+ * others.  Instead define it globally for KCSAN kernels.
+ */
+# define __data_racy volatile
+#else
+# define __data_racy
+#endif
+
 #ifdef __SANITIZE_THREAD__
 /*
  * Clang still emits instrumentation for __tsan_func_{entry,exit}() and builtin
@@ -314,16 +330,9 @@ struct ftrace_likely_data {
  * disable all instrumentation. See Kconfig.kcsan where this is mandatory.
  */
 # define __no_kcsan __no_sanitize_thread __disable_sanitizer_instrumentation
-/*
- * Type qualifier to mark variables where all data-racy accesses should be
- * ignored by KCSAN. Note, the implementation simply marks these variables as
- * volatile, since KCSAN will treat such accesses as "marked".
- */
-# define __data_racy volatile
 # define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused
 #else
 # define __no_kcsan
-# define __data_racy
 #endif
 
 #ifdef __SANITIZE_MEMORY__

From cafe4074a7221dca2fa954dd1ab0cf99b6318e23 Mon Sep 17 00:00:00 2001
From: Shengming Hu <hu.shengming@zte.com.cn>
Date: Mon, 19 Jan 2026 21:59:05 +0800
Subject: [PATCH 105/107] watchdog/softlockup: fix sample ring index wrap in
 need_counting_irqs()

cpustat_tail indexes cpustat_util[], which is a NUM_SAMPLE_PERIODS-sized
ring buffer. need_counting_irqs() currently wraps the index using
NUM_HARDIRQ_REPORT, which only happens to match NUM_SAMPLE_PERIODS.

Use NUM_SAMPLE_PERIODS for the wrap to keep the ring math correct even if
the NUM_HARDIRQ_REPORT or  NUM_SAMPLE_PERIODS changes.

Link: https://lkml.kernel.org/r/tencent_7068189CB6D6689EB353F3D17BF5A5311A07@qq.com
Fixes: e9a9292e2368 ("watchdog/softlockup: Report the most frequent interrupts")
Signed-off-by: Shengming Hu <hu.shengming@zte.com.cn>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Zhang Run <zhang.run@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b4d5fbdb933a..7d675781bc91 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -550,7 +550,7 @@ static bool need_counting_irqs(void)
 	u8 util;
 	int tail = __this_cpu_read(cpustat_tail);
 
-	tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT;
+	tail = (tail + NUM_SAMPLE_PERIODS - 1) % NUM_SAMPLE_PERIODS;
 	util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
 	return util > HARDIRQ_PERCENT_THRESH;
 }

From 76149d53502cf17ef3ae454ff384551236fba867 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng@tencent.com>
Date: Wed, 28 Jan 2026 16:30:07 +0800
Subject: [PATCH 106/107] procfs: fix missing RCU protection when reading
 real_parent in do_task_stat()

When reading /proc/[pid]/stat, do_task_stat() accesses task->real_parent
without proper RCU protection, which leads to:

  cpu 0                               cpu 1
  -----                               -----
  do_task_stat
    var = task->real_parent
                                      release_task
                                        call_rcu(delayed_put_task_struct)
    task_tgid_nr_ns(var)
      rcu_read_lock   <--- Too late to protect task->real_parent!
      task_pid_ptr    <--- UAF!
      rcu_read_unlock

This patch uses task_ppid_nr_ns() instead of task_tgid_nr_ns() to add
proper RCU protection for accessing task->real_parent.

Link: https://lkml.kernel.org/r/20260128083007.3173016-1-alexjlzheng@tencent.com
Fixes: 06fffb1267c9 ("do_task_stat: don't take rcu_read_lock()")
Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: ruippan <ruippan@tencent.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/array.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 39e9246f6e4a..f447e734612a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -529,7 +529,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		}
 
 		sid = task_session_nr_ns(task, ns);
-		ppid = task_tgid_nr_ns(task->real_parent, ns);
+		ppid = task_ppid_nr_ns(task, ns);
 		pgid = task_pgrp_nr_ns(task, ns);
 
 		unlock_task_sighand(task, &flags);

From 0dddf20b4fd4afd59767acc144ad4da60259f21f Mon Sep 17 00:00:00 2001
From: Qiliang Yuan <realwujing@gmail.com>
Date: Wed, 28 Jan 2026 21:26:14 -0500
Subject: [PATCH 107/107] watchdog/hardlockup: simplify perf event probe and
 remove per-cpu dependency

Simplify the hardlockup detector's probe path and remove its implicit
dependency on pinned per-cpu execution.

Refactor hardlockup_detector_event_create() to be stateless.  Return the
created perf_event pointer to the caller instead of directly modifying the
per-cpu 'watchdog_ev' variable.  This allows the probe path to safely
manage a temporary event without the risk of leaving stale pointers should
task migration occur.

Link: https://lkml.kernel.org/r/20260129022629.2201331-1-realwujing@gmail.com
Signed-off-by: Shouxin Sun <sunshx@chinatelecom.cn>
Signed-off-by: Junnan Zhang <zhangjn11@chinatelecom.cn>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Cc: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: Song Liu <song@kernel.org>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Wang Jinchao <wangjinchao600@gmail.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/watchdog_perf.c | 50 +++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index d3ca70e3c256..cf05775a96d3 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -118,18 +118,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
 	watchdog_hardlockup_check(smp_processor_id(), regs);
 }
 
-static int hardlockup_detector_event_create(void)
+static struct perf_event *hardlockup_detector_event_create(unsigned int cpu)
 {
-	unsigned int cpu;
 	struct perf_event_attr *wd_attr;
 	struct perf_event *evt;
 
-	/*
-	 * Preemption is not disabled because memory will be allocated.
-	 * Ensure CPU-locality by calling this in per-CPU kthread.
-	 */
-	WARN_ON(!is_percpu_thread());
-	cpu = raw_smp_processor_id();
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
 
@@ -143,14 +136,7 @@ static int hardlockup_detector_event_create(void)
 						       watchdog_overflow_callback, NULL);
 	}
 
-	if (IS_ERR(evt)) {
-		pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
-			 PTR_ERR(evt));
-		return PTR_ERR(evt);
-	}
-	WARN_ONCE(this_cpu_read(watchdog_ev), "unexpected watchdog_ev leak");
-	this_cpu_write(watchdog_ev, evt);
-	return 0;
+	return evt;
 }
 
 /**
@@ -159,17 +145,26 @@ static int hardlockup_detector_event_create(void)
  */
 void watchdog_hardlockup_enable(unsigned int cpu)
 {
+	struct perf_event *evt;
+
 	WARN_ON_ONCE(cpu != smp_processor_id());
 
-	if (hardlockup_detector_event_create())
+	evt = hardlockup_detector_event_create(cpu);
+	if (IS_ERR(evt)) {
+		pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
+			 PTR_ERR(evt));
 		return;
+	}
 
 	/* use original value for check */
 	if (!atomic_fetch_inc(&watchdog_cpus))
 		pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
 
+	WARN_ONCE(this_cpu_read(watchdog_ev), "unexpected watchdog_ev leak");
+	this_cpu_write(watchdog_ev, evt);
+
 	watchdog_init_timestamp();
-	perf_event_enable(this_cpu_read(watchdog_ev));
+	perf_event_enable(evt);
 }
 
 /**
@@ -263,19 +258,30 @@ bool __weak __init arch_perf_nmi_is_available(void)
  */
 int __init watchdog_hardlockup_probe(void)
 {
+	struct perf_event *evt;
+	unsigned int cpu;
 	int ret;
 
 	if (!arch_perf_nmi_is_available())
 		return -ENODEV;
 
-	ret = hardlockup_detector_event_create();
+	if (!hw_nmi_get_sample_period(watchdog_thresh))
+		return -EINVAL;
 
-	if (ret) {
+	/*
+	 * Test hardware PMU availability by creating a temporary perf event.
+	 * The event is released immediately.
+	 */
+	cpu = raw_smp_processor_id();
+	evt = hardlockup_detector_event_create(cpu);
+	if (IS_ERR(evt)) {
 		pr_info("Perf NMI watchdog permanently disabled\n");
+		ret = PTR_ERR(evt);
 	} else {
-		perf_event_release_kernel(this_cpu_read(watchdog_ev));
-		this_cpu_write(watchdog_ev, NULL);
+		perf_event_release_kernel(evt);
+		ret = 0;
 	}
+
 	return ret;
 }