From 1c88823a1958011343fa33b12a40fe42e829d9d9 Mon Sep 17 00:00:00 2001 From: Adarsh Das Date: Tue, 3 Feb 2026 22:53:56 +0530 Subject: [PATCH 1/9] btrfs: handle unexpected exact match in btrfs_set_inode_index_count() We search with offset (u64)-1 which should never match exactly. Previously the code silently returned success without setting the index count. Now logs an error and return -EUCLEAN instead. Reviewed-by: Qu Wenruo Signed-off-by: Adarsh Das Reviewed-by: David Sterba , Signed-off-by: David Sterba --- fs/btrfs/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1aebd2ee2704..b6c763a17406 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6149,9 +6149,18 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - /* FIXME: we should be able to handle this */ - if (ret == 0) - return ret; + + if (unlikely(ret == 0)) { + /* + * Key with offset -1 found, there would have to exist a dir + * index item with such offset, but this is out of the valid + * range. + */ + btrfs_err(root->fs_info, + "unexpected exact match for DIR_INDEX key, inode %llu", + btrfs_ino(inode)); + return -EUCLEAN; + } if (path->slots[0] == 0) { inode->index_cnt = BTRFS_DIR_START_INDEX; From be6324a809dbda76d5fdb23720ad9b20e5c1905c Mon Sep 17 00:00:00 2001 From: Adarsh Das Date: Tue, 3 Feb 2026 22:53:57 +0530 Subject: [PATCH 2/9] btrfs: replace BUG() with error handling in __btrfs_balance() We search with offset (u64)-1 which should never match exactly. Previously this was handled with BUG(). Now logs an error and return -EUCLEAN. Reviewed-by: Qu Wenruo Signed-off-by: Adarsh Das Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f281d113519b..50f7aae70418 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4367,8 +4367,14 @@ again: * this shouldn't happen, it means the last relocate * failed */ - if (ret == 0) - BUG(); /* FIXME break ? */ + if (unlikely(ret == 0)) { + btrfs_err(fs_info, + "unexpected exact match of CHUNK_ITEM in chunk tree, offset 0x%llx", + key.offset); + mutex_unlock(&fs_info->reclaim_bgs_lock); + ret = -EUCLEAN; + goto error; + } ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); From 5870ec7c8fe57a8b2c65005e5da5efc054faa3e6 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 14 Jan 2026 01:13:38 +0000 Subject: [PATCH 3/9] btrfs: reset block group size class when it becomes empty Block group size classes are managed consistently everywhere. Currently, btrfs_use_block_group_size_class() sets a block group's size class to specialize it for a specific allocation size. However, this size class remains "stale" even if the block group becomes completely empty (both used and reserved bytes reach zero). This happens in two scenarios: 1. When space reservations are freed (e.g., due to errors or transaction aborts) via btrfs_free_reserved_bytes(). 2. When the last extent in a block group is freed via btrfs_update_block_group(). While size classes are advisory, a stale size class can cause find_free_extent to unnecessarily skip candidate block groups during initial search loops. This undermines the purpose of size classes to reduce fragmentation by keeping block groups restricted to a specific size class when they could be reused for any size. Fix this by resetting the size class to BTRFS_BG_SZ_NONE whenever a block group's used and reserved counts both reach zero. This ensures that empty block groups are fully available for any allocation size in the next cycle. Fixes: 52bb7a2166af ("btrfs: introduce size class to block group allocator") Reviewed-by: Boris Burkov Signed-off-by: Jiasheng Jiang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 3186ed4fd26d..5f76683b3f21 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3760,6 +3760,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) return ret; } +static void btrfs_maybe_reset_size_class(struct btrfs_block_group *bg) +{ + lockdep_assert_held(&bg->lock); + if (btrfs_block_group_should_use_size_class(bg) && + bg->used == 0 && bg->reserved == 0) + bg->size_class = BTRFS_BG_SZ_NONE; +} + int btrfs_update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, bool alloc) { @@ -3824,6 +3832,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, old_val -= num_bytes; cache->used = old_val; cache->pinned += num_bytes; + btrfs_maybe_reset_size_class(cache); btrfs_space_info_update_bytes_pinned(space_info, num_bytes); space_info->bytes_used -= num_bytes; space_info->disk_used -= num_bytes * factor; @@ -3952,6 +3961,7 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes, spin_lock(&cache->lock); bg_ro = cache->ro; cache->reserved -= num_bytes; + btrfs_maybe_reset_size_class(cache); if (is_delalloc) cache->delalloc_bytes -= num_bytes; spin_unlock(&cache->lock); From 8ceaad6cd6e7fa5f73b0b2796a2e85d75d37e9f3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 27 Jan 2026 15:46:55 +1030 Subject: [PATCH 4/9] btrfs: do not ASSERT() when the fs flips RO inside btrfs_repair_io_failure() [BUG] There is a bug report that when btrfs hits ENOSPC error in a critical path, btrfs flips RO (this part is expected, although the ENOSPC bug still needs to be addressed). The problem is after the RO flip, if there is a read repair pending, we can hit the ASSERT() inside btrfs_repair_io_failure() like the following: BTRFS info (device vdc): relocating block group 30408704 flags metadata|raid1 ------------[ cut here ]------------ BTRFS: Transaction aborted (error -28) WARNING: fs/btrfs/extent-tree.c:3235 at __btrfs_free_extent.isra.0+0x453/0xfd0, CPU#1: btrfs/383844 Modules linked in: kvm_intel kvm irqbypass [...] ---[ end trace 0000000000000000 ]--- BTRFS info (device vdc state EA): 2 enospc errors during balance BTRFS info (device vdc state EA): balance: ended with status: -30 BTRFS error (device vdc state EA): parent transid verify failed on logical 30556160 mirror 2 wanted 8 found 6 BTRFS error (device vdc state EA): bdev /dev/nvme0n1 errs: wr 0, rd 0, flush 0, corrupt 10, gen 0 [...] assertion failed: !(fs_info->sb->s_flags & SB_RDONLY) :: 0, in fs/btrfs/bio.c:938 ------------[ cut here ]------------ assertion failed: !(fs_info->sb->s_flags & SB_RDONLY) :: 0, in fs/btrfs/bio.c:938 kernel BUG at fs/btrfs/bio.c:938! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 0 UID: 0 PID: 868 Comm: kworker/u8:13 Tainted: G W N 6.19.0-rc6+ #4788 PREEMPT(full) Tainted: [W]=WARN, [N]=TEST Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 Workqueue: btrfs-endio simple_end_io_work RIP: 0010:btrfs_repair_io_failure.cold+0xb2/0x120 RSP: 0000:ffffc90001d2bcf0 EFLAGS: 00010246 RAX: 0000000000000051 RBX: 0000000000001000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8305cf42 RDI: 00000000ffffffff RBP: 0000000000000002 R08: 00000000fffeffff R09: ffffffff837fa988 R10: ffffffff8327a9e0 R11: 6f69747265737361 R12: ffff88813018d310 R13: ffff888168b8a000 R14: ffffc90001d2bd90 R15: ffff88810a169000 FS: 0000000000000000(0000) GS:ffff8885e752c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 ------------[ cut here ]------------ [CAUSE] The cause of -ENOSPC error during the test case btrfs/124 is still unknown, although it's known that we still have cases where metadata can be over-committed but can not be fulfilled correctly, thus if we hit such ENOSPC error inside a critical path, we have no choice but abort the current transaction. This will mark the fs read-only. The problem is inside the btrfs_repair_io_failure() path that we require the fs not to be mount read-only. This is normally fine, but if we are doing a read-repair meanwhile the fs flips RO due to a critical error, we can enter btrfs_repair_io_failure() with super block set to read-only, thus triggering the above crash. [FIX] Just replace the ASSERT() with a proper return if the fs is already read-only. Reported-by: Christoph Hellwig Link: https://lore.kernel.org/linux-btrfs/20260126045555.GB31641@lst.de/ Tested-by: Christoph Hellwig Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/bio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 0a69e09bfe28..4a1528803ff7 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -934,7 +934,6 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff, struct bio *bio = NULL; int ret = 0; - ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); BUG_ON(!mirror_num); /* Basic alignment checks. */ @@ -946,6 +945,13 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff, ASSERT(step <= length); ASSERT(is_power_of_2(step)); + /* + * The fs either mounted RO or hit critical errors, no need + * to continue repairing. + */ + if (unlikely(sb_rdonly(fs_info->sb))) + return 0; + if (btrfs_repair_one_zone(fs_info, logical)) return 0; From 2155d0c0a761a56ce7ede83a26eb23ea0f935260 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 3 Feb 2026 18:03:35 +0000 Subject: [PATCH 5/9] btrfs: use the correct type to initialize block reserve for delayed refs When initializing the delayed refs block reserve for a transaction handle we are passing a type of BTRFS_BLOCK_RSV_DELOPS, which is meant for delayed items and not for delayed refs. The correct type for delayed refs is BTRFS_BLOCK_RSV_DELREFS. On release of any excess space reserved in a local delayed refs reserve, we also should transfer that excess space to the global block reserve (it it's full, we return to the space info for general availability). By initializing a transaction's local delayed refs block reserve with a type of BTRFS_BLOCK_RSV_DELOPS, we were also causing any excess space released from the delayed block reserve (fs_info->delayed_block_rsv, used for delayed inodes and items) to be transferred to the global block reserve instead of the global delayed refs block reserve. This was an unintentional change in commit 28270e25c69a ("btrfs: always reserve space for delayed refs when starting transaction"), but it's not particularly serious as things tend to cancel out each other most of the time and it's relatively rare to be anywhere near exhaustion of the global reserve. Fix this by initializing a transaction's local delayed refs reserve with a type of BTRFS_BLOCK_RSV_DELREFS and making btrfs_block_rsv_release() attempt to transfer unused space from such a reserve into the global block reserve, just as we did before that commit for when the block reserve is a delayed refs rsv. Reported-by: Alex Lyakas Link: https://lore.kernel.org/linux-btrfs/CAOcd+r0FHG5LWzTSu=LknwSoqxfw+C00gFAW7fuX71+Z5AfEew@mail.gmail.com/ Fixes: 28270e25c69a ("btrfs: always reserve space for delayed refs when starting transaction") Reviewed-by: Alex Lyakas Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/block-rsv.c | 7 ++++--- fs/btrfs/transaction.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index e823230c09b7..93c371db8731 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -276,10 +276,11 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *target = NULL; /* - * If we are a delayed block reserve then push to the global rsv, - * otherwise dump into the global delayed reserve if it is not full. + * If we are a delayed refs block reserve then push to the global + * reserve, otherwise dump into the global delayed refs reserve if it is + * not full. */ - if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS) + if (block_rsv->type == BTRFS_BLOCK_RSV_DELREFS) target = global_rsv; else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv)) target = delayed_rsv; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0b2498749b1e..463238ca8a4d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -726,7 +726,7 @@ again: h->type = type; INIT_LIST_HEAD(&h->new_bgs); - btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS); + btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELREFS); smp_mb(); if (cur_trans->state >= TRANS_STATE_COMMIT_START && From f46a283bbc58d7871ab22f5882e942f889fa2b0e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 3 Feb 2026 15:59:26 +0000 Subject: [PATCH 6/9] btrfs: change unaligned root messages to error level in btrfs_validate_super() If the root nodes for the chunk root, tree root or log root are not sector size aligned, we are logging a warning message but these are in fact errors that makes the super block validation fail. So change the level of the messages from warning to error. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20c405a4789d..13e400046c87 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2416,18 +2416,18 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, /* Root alignment check */ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { - btrfs_warn(fs_info, "tree_root block unaligned: %llu", - btrfs_super_root(sb)); + btrfs_err(fs_info, "tree_root block unaligned: %llu", + btrfs_super_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { - btrfs_warn(fs_info, "chunk_root block unaligned: %llu", + btrfs_err(fs_info, "chunk_root block unaligned: %llu", btrfs_super_chunk_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { - btrfs_warn(fs_info, "log_root block unaligned: %llu", - btrfs_super_log_root(sb)); + btrfs_err(fs_info, "log_root block unaligned: %llu", + btrfs_super_log_root(sb)); ret = -EINVAL; } From 29e525665a77a70ea8f19310e96b1b1472b07fc9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 8 Feb 2026 18:30:08 +0000 Subject: [PATCH 7/9] btrfs: fix lost return value on error in finish_verity() If btrfs_update_inode() or del_orphan() fail, we jump to the 'end_trans' label and then return 0 instead of the error returned by one of those calls. Fix this and return the error. Fixes: 61fb7f04ee06 ("btrfs: remove out label in finish_verity()") Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/20260208161129.3888234-1-clm@meta.com/ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/verity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index 06cbd6f00a78..95ea794f20d3 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -552,7 +552,7 @@ static int finish_verity(struct btrfs_inode *inode, const void *desc, btrfs_set_fs_compat_ro(root->fs_info, VERITY); end_trans: btrfs_end_transaction(trans); - return 0; + return ret; } From 7b54e08f2ef8f94d7e3959dde3694c4c34fa7701 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 8 Feb 2026 19:48:14 +0000 Subject: [PATCH 8/9] btrfs: fix lost error return in btrfs_find_orphan_roots() If the call to btrfs_get_fs_root() returns an error different from -ENOENT we break out of the loop and then return 0, losing the error. Fix this by returning the error instead of breaking from the loop. Reported-by: Chris Mason Link: https://lore.kernel.org/linux-btrfs/20260208185321.1128472-1-clm@meta.com/ Fixes: 8670a25ecb2f ("btrfs: use single return variable in btrfs_find_orphan_roots()") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/root-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 37a4173c0a0b..d85a09ae1733 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -257,7 +257,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) root = btrfs_get_fs_root(fs_info, root_objectid, false); ret = PTR_ERR_OR_ZERO(root); if (ret && ret != -ENOENT) { - break; + return ret; } else if (ret == -ENOENT) { struct btrfs_trans_handle *trans; From ecb7c2484cfc83a93658907580035a8adf1e0a92 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 4 Feb 2026 17:15:53 +0000 Subject: [PATCH 9/9] btrfs: fix invalid leaf access in btrfs_quota_enable() if ref key not found If btrfs_search_slot_for_read() returns 1, it means we did not find any key greater than or equals to the key we asked for, meaning we have reached the end of the tree and therefore the path is not valid. If this happens we need to break out of the loop and stop, instead of continuing and accessing an invalid path. Fixes: 5223cc60b40a ("btrfs: drop the path before adding qgroup items when enabling qgroups") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f53c313ab6e4..38adadb936dc 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1169,11 +1169,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, } if (ret > 0) { /* - * Shouldn't happen, but in case it does we - * don't need to do the btrfs_next_item, just - * continue. + * Shouldn't happen because the key should still + * be there (return 0), but in case it does it + * means we have reached the end of the tree - + * there are no more leaves with items that have + * a key greater than or equals to @found_key, + * so just stop the search loop. */ - continue; + break; } } ret = btrfs_next_item(tree_root, path);