From 1c88823a1958011343fa33b12a40fe42e829d9d9 Mon Sep 17 00:00:00 2001
From: Adarsh Das <adarshdas950@gmail.com>
Date: Tue, 3 Feb 2026 22:53:56 +0530
Subject: [PATCH 1/9] btrfs: handle unexpected exact match in
 btrfs_set_inode_index_count()

We search with offset (u64)-1 which should never match exactly.
Previously the code silently returned success without setting the index
count. Now logs an error and return -EUCLEAN instead.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Adarsh Das <adarshdas950@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>,
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1aebd2ee2704..b6c763a17406 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6149,9 +6149,18 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		return ret;
-	/* FIXME: we should be able to handle this */
-	if (ret == 0)
-		return ret;
+
+	if (unlikely(ret == 0)) {
+		/*
+		 * Key with offset -1 found, there would have to exist a dir
+		 * index item with such offset, but this is out of the valid
+		 * range.
+		 */
+		btrfs_err(root->fs_info,
+			  "unexpected exact match for DIR_INDEX key, inode %llu",
+			  btrfs_ino(inode));
+		return -EUCLEAN;
+	}
 
 	if (path->slots[0] == 0) {
 		inode->index_cnt = BTRFS_DIR_START_INDEX;

From be6324a809dbda76d5fdb23720ad9b20e5c1905c Mon Sep 17 00:00:00 2001
From: Adarsh Das <adarshdas950@gmail.com>
Date: Tue, 3 Feb 2026 22:53:57 +0530
Subject: [PATCH 2/9] btrfs: replace BUG() with error handling in
 __btrfs_balance()

We search with offset (u64)-1 which should never match exactly.
Previously this was handled with BUG(). Now logs an error
and return -EUCLEAN.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Adarsh Das <adarshdas950@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f281d113519b..50f7aae70418 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4367,8 +4367,14 @@ again:
 		 * this shouldn't happen, it means the last relocate
 		 * failed
 		 */
-		if (ret == 0)
-			BUG(); /* FIXME break ? */
+		if (unlikely(ret == 0)) {
+			btrfs_err(fs_info,
+				  "unexpected exact match of CHUNK_ITEM in chunk tree, offset 0x%llx",
+				  key.offset);
+			mutex_unlock(&fs_info->reclaim_bgs_lock);
+			ret = -EUCLEAN;
+			goto error;
+		}
 
 		ret = btrfs_previous_item(chunk_root, path, 0,
 					  BTRFS_CHUNK_ITEM_KEY);

From 5870ec7c8fe57a8b2c65005e5da5efc054faa3e6 Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiashengjiangcool@gmail.com>
Date: Wed, 14 Jan 2026 01:13:38 +0000
Subject: [PATCH 3/9] btrfs: reset block group size class when it becomes empty

Block group size classes are managed consistently everywhere.
Currently, btrfs_use_block_group_size_class() sets a block group's size
class to specialize it for a specific allocation size. However, this
size class remains "stale" even if the block group becomes completely
empty (both used and reserved bytes reach zero).

This happens in two scenarios:

1. When space reservations are freed (e.g., due to errors or transaction
   aborts) via btrfs_free_reserved_bytes().
2. When the last extent in a block group is freed via
   btrfs_update_block_group().

While size classes are advisory, a stale size class can cause
find_free_extent to unnecessarily skip candidate block groups during
initial search loops. This undermines the purpose of size classes to
reduce fragmentation by keeping block groups restricted to a specific
size class when they could be reused for any size.

Fix this by resetting the size class to BTRFS_BG_SZ_NONE whenever a
block group's used and reserved counts both reach zero. This ensures
that empty block groups are fully available for any allocation size in
the next cycle.

Fixes: 52bb7a2166af ("btrfs: introduce size class to block group allocator")
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Jiasheng Jiang <jiashengjiangcool@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 3186ed4fd26d..5f76683b3f21 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3760,6 +3760,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 	return ret;
 }
 
+static void btrfs_maybe_reset_size_class(struct btrfs_block_group *bg)
+{
+	lockdep_assert_held(&bg->lock);
+	if (btrfs_block_group_should_use_size_class(bg) &&
+	    bg->used == 0 && bg->reserved == 0)
+		bg->size_class = BTRFS_BG_SZ_NONE;
+}
+
 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 			     u64 bytenr, u64 num_bytes, bool alloc)
 {
@@ -3824,6 +3832,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 		old_val -= num_bytes;
 		cache->used = old_val;
 		cache->pinned += num_bytes;
+		btrfs_maybe_reset_size_class(cache);
 		btrfs_space_info_update_bytes_pinned(space_info, num_bytes);
 		space_info->bytes_used -= num_bytes;
 		space_info->disk_used -= num_bytes * factor;
@@ -3952,6 +3961,7 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
 	spin_lock(&cache->lock);
 	bg_ro = cache->ro;
 	cache->reserved -= num_bytes;
+	btrfs_maybe_reset_size_class(cache);
 	if (is_delalloc)
 		cache->delalloc_bytes -= num_bytes;
 	spin_unlock(&cache->lock);

From 8ceaad6cd6e7fa5f73b0b2796a2e85d75d37e9f3 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 27 Jan 2026 15:46:55 +1030
Subject: [PATCH 4/9] btrfs: do not ASSERT() when the fs flips RO inside
 btrfs_repair_io_failure()

[BUG]
There is a bug report that when btrfs hits ENOSPC error in a critical
path, btrfs flips RO (this part is expected, although the ENOSPC bug
still needs to be addressed).

The problem is after the RO flip, if there is a read repair pending, we
can hit the ASSERT() inside btrfs_repair_io_failure() like the following:

  BTRFS info (device vdc): relocating block group 30408704 flags metadata|raid1
  ------------[ cut here ]------------
  BTRFS: Transaction aborted (error -28)
  WARNING: fs/btrfs/extent-tree.c:3235 at __btrfs_free_extent.isra.0+0x453/0xfd0, CPU#1: btrfs/383844
  Modules linked in: kvm_intel kvm irqbypass
  [...]
  ---[ end trace 0000000000000000 ]---
  BTRFS info (device vdc state EA): 2 enospc errors during balance
  BTRFS info (device vdc state EA): balance: ended with status: -30
  BTRFS error (device vdc state EA): parent transid verify failed on logical 30556160 mirror 2 wanted 8 found 6
  BTRFS error (device vdc state EA): bdev /dev/nvme0n1 errs: wr 0, rd 0, flush 0, corrupt 10, gen 0
  [...]
  assertion failed: !(fs_info->sb->s_flags & SB_RDONLY) :: 0, in fs/btrfs/bio.c:938
  ------------[ cut here ]------------
  assertion failed: !(fs_info->sb->s_flags & SB_RDONLY) :: 0, in fs/btrfs/bio.c:938
  kernel BUG at fs/btrfs/bio.c:938!
  Oops: invalid opcode: 0000 [#1] SMP NOPTI
  CPU: 0 UID: 0 PID: 868 Comm: kworker/u8:13 Tainted: G        W        N  6.19.0-rc6+ #4788 PREEMPT(full)
  Tainted: [W]=WARN, [N]=TEST
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
  Workqueue: btrfs-endio simple_end_io_work
  RIP: 0010:btrfs_repair_io_failure.cold+0xb2/0x120
  RSP: 0000:ffffc90001d2bcf0 EFLAGS: 00010246
  RAX: 0000000000000051 RBX: 0000000000001000 RCX: 0000000000000000
  RDX: 0000000000000000 RSI: ffffffff8305cf42 RDI: 00000000ffffffff
  RBP: 0000000000000002 R08: 00000000fffeffff R09: ffffffff837fa988
  R10: ffffffff8327a9e0 R11: 6f69747265737361 R12: ffff88813018d310
  R13: ffff888168b8a000 R14: ffffc90001d2bd90 R15: ffff88810a169000
  FS:  0000000000000000(0000) GS:ffff8885e752c000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  ------------[ cut here ]------------

[CAUSE]
The cause of -ENOSPC error during the test case btrfs/124 is still
unknown, although it's known that we still have cases where metadata can
be over-committed but can not be fulfilled correctly, thus if we hit
such ENOSPC error inside a critical path, we have no choice but abort
the current transaction.

This will mark the fs read-only.

The problem is inside the btrfs_repair_io_failure() path that we require
the fs not to be mount read-only. This is normally fine, but if we are
doing a read-repair meanwhile the fs flips RO due to a critical error,
we can enter btrfs_repair_io_failure() with super block set to
read-only, thus triggering the above crash.

[FIX]
Just replace the ASSERT() with a proper return if the fs is already
read-only.

Reported-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/linux-btrfs/20260126045555.GB31641@lst.de/
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/bio.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 0a69e09bfe28..4a1528803ff7 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -934,7 +934,6 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
 	struct bio *bio = NULL;
 	int ret = 0;
 
-	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
 	BUG_ON(!mirror_num);
 
 	/* Basic alignment checks. */
@@ -946,6 +945,13 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
 	ASSERT(step <= length);
 	ASSERT(is_power_of_2(step));
 
+	/*
+	 * The fs either mounted RO or hit critical errors, no need
+	 * to continue repairing.
+	 */
+	if (unlikely(sb_rdonly(fs_info->sb)))
+		return 0;
+
 	if (btrfs_repair_one_zone(fs_info, logical))
 		return 0;
 

From 2155d0c0a761a56ce7ede83a26eb23ea0f935260 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 3 Feb 2026 18:03:35 +0000
Subject: [PATCH 5/9] btrfs: use the correct type to initialize block reserve
 for delayed refs

When initializing the delayed refs block reserve for a transaction handle
we are passing a type of BTRFS_BLOCK_RSV_DELOPS, which is meant for
delayed items and not for delayed refs. The correct type for delayed refs
is BTRFS_BLOCK_RSV_DELREFS.

On release of any excess space reserved in a local delayed refs reserve,
we also should transfer that excess space to the global block reserve
(it it's full, we return to the space info for general availability).

By initializing a transaction's local delayed refs block reserve with a
type of BTRFS_BLOCK_RSV_DELOPS, we were also causing any excess space
released from the delayed block reserve (fs_info->delayed_block_rsv, used
for delayed inodes and items) to be transferred to the global block
reserve instead of the global delayed refs block reserve. This was an
unintentional change in commit 28270e25c69a ("btrfs: always reserve space
for delayed refs when starting transaction"), but it's not particularly
serious as things tend to cancel out each other most of the time and it's
relatively rare to be anywhere near exhaustion of the global reserve.

Fix this by initializing a transaction's local delayed refs reserve with
a type of BTRFS_BLOCK_RSV_DELREFS and making btrfs_block_rsv_release()
attempt to transfer unused space from such a reserve into the global block
reserve, just as we did before that commit for when the block reserve is
a delayed refs rsv.

Reported-by: Alex Lyakas <alex.lyakas@zadara.com>
Link: https://lore.kernel.org/linux-btrfs/CAOcd+r0FHG5LWzTSu=LknwSoqxfw+C00gFAW7fuX71+Z5AfEew@mail.gmail.com/
Fixes: 28270e25c69a ("btrfs: always reserve space for delayed refs when starting transaction")
Reviewed-by: Alex Lyakas <alex.lyakas@zadara.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-rsv.c   | 7 ++++---
 fs/btrfs/transaction.c | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index e823230c09b7..93c371db8731 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -276,10 +276,11 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
 	struct btrfs_block_rsv *target = NULL;
 
 	/*
-	 * If we are a delayed block reserve then push to the global rsv,
-	 * otherwise dump into the global delayed reserve if it is not full.
+	 * If we are a delayed refs block reserve then push to the global
+	 * reserve, otherwise dump into the global delayed refs reserve if it is
+	 * not full.
 	 */
-	if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS)
+	if (block_rsv->type == BTRFS_BLOCK_RSV_DELREFS)
 		target = global_rsv;
 	else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
 		target = delayed_rsv;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0b2498749b1e..463238ca8a4d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -726,7 +726,7 @@ again:
 
 	h->type = type;
 	INIT_LIST_HEAD(&h->new_bgs);
-	btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS);
+	btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELREFS);
 
 	smp_mb();
 	if (cur_trans->state >= TRANS_STATE_COMMIT_START &&

From f46a283bbc58d7871ab22f5882e942f889fa2b0e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 3 Feb 2026 15:59:26 +0000
Subject: [PATCH 6/9] btrfs: change unaligned root messages to error level in
 btrfs_validate_super()

If the root nodes for the chunk root, tree root or log root are not sector
size aligned, we are logging a warning message but these are in fact
errors that makes the super block validation fail. So change the level of
the messages from warning to error.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 20c405a4789d..13e400046c87 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2416,18 +2416,18 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
 
 	/* Root alignment check */
 	if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
-		btrfs_warn(fs_info, "tree_root block unaligned: %llu",
-			   btrfs_super_root(sb));
+		btrfs_err(fs_info, "tree_root block unaligned: %llu",
+			  btrfs_super_root(sb));
 		ret = -EINVAL;
 	}
 	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
-		btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
+		btrfs_err(fs_info, "chunk_root block unaligned: %llu",
 			   btrfs_super_chunk_root(sb));
 		ret = -EINVAL;
 	}
 	if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
-		btrfs_warn(fs_info, "log_root block unaligned: %llu",
-			   btrfs_super_log_root(sb));
+		btrfs_err(fs_info, "log_root block unaligned: %llu",
+			  btrfs_super_log_root(sb));
 		ret = -EINVAL;
 	}
 

From 29e525665a77a70ea8f19310e96b1b1472b07fc9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sun, 8 Feb 2026 18:30:08 +0000
Subject: [PATCH 7/9] btrfs: fix lost return value on error in finish_verity()

If btrfs_update_inode() or del_orphan() fail, we jump to the 'end_trans'
label and then return 0 instead of the error returned by one of those
calls. Fix this and return the error.

Fixes: 61fb7f04ee06 ("btrfs: remove out label in finish_verity()")
Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/20260208161129.3888234-1-clm@meta.com/
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/verity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index 06cbd6f00a78..95ea794f20d3 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -552,7 +552,7 @@ static int finish_verity(struct btrfs_inode *inode, const void *desc,
 	btrfs_set_fs_compat_ro(root->fs_info, VERITY);
 end_trans:
 	btrfs_end_transaction(trans);
-	return 0;
+	return ret;
 
 }
 

From 7b54e08f2ef8f94d7e3959dde3694c4c34fa7701 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sun, 8 Feb 2026 19:48:14 +0000
Subject: [PATCH 8/9] btrfs: fix lost error return in btrfs_find_orphan_roots()

If the call to btrfs_get_fs_root() returns an error different from -ENOENT
we break out of the loop and then return 0, losing the error. Fix this
by returning the error instead of breaking from the loop.

Reported-by: Chris Mason <clm@meta.com>
Link: https://lore.kernel.org/linux-btrfs/20260208185321.1128472-1-clm@meta.com/
Fixes: 8670a25ecb2f ("btrfs: use single return variable in btrfs_find_orphan_roots()")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/root-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 37a4173c0a0b..d85a09ae1733 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -257,7 +257,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 		root = btrfs_get_fs_root(fs_info, root_objectid, false);
 		ret = PTR_ERR_OR_ZERO(root);
 		if (ret && ret != -ENOENT) {
-			break;
+			return ret;
 		} else if (ret == -ENOENT) {
 			struct btrfs_trans_handle *trans;
 

From ecb7c2484cfc83a93658907580035a8adf1e0a92 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 4 Feb 2026 17:15:53 +0000
Subject: [PATCH 9/9] btrfs: fix invalid leaf access in btrfs_quota_enable() if
 ref key not found

If btrfs_search_slot_for_read() returns 1, it means we did not find any
key greater than or equals to the key we asked for, meaning we have
reached the end of the tree and therefore the path is not valid. If
this happens we need to break out of the loop and stop, instead of
continuing and accessing an invalid path.

Fixes: 5223cc60b40a ("btrfs: drop the path before adding qgroup items when enabling qgroups")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index f53c313ab6e4..38adadb936dc 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1169,11 +1169,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
 			}
 			if (ret > 0) {
 				/*
-				 * Shouldn't happen, but in case it does we
-				 * don't need to do the btrfs_next_item, just
-				 * continue.
+				 * Shouldn't happen because the key should still
+				 * be there (return 0), but in case it does it
+				 * means we have reached the end of the tree -
+				 * there are no more leaves with items that have
+				 * a key greater than or equals to @found_key,
+				 * so just stop the search loop.
 				 */
-				continue;
+				break;
 			}
 		}
 		ret = btrfs_next_item(tree_root, path);