btrfs: populate fully_remapped_bgs_list on mount

Add a function btrfs_populate_fully_remapped_bgs_list() which gets
called on mount, which looks for fully remapped block groups
(i.e. identity_remap_count == 0) which haven't yet had their chunk
stripes and device extents removed.

This happens when a filesystem is unmounted while async discard has not
yet finished, as otherwise the data range occupied by the chunk stripes
would be permanently unusable.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Mark Harmstone 2026-01-07 14:09:17 +00:00 committed by David Sterba
parent 7cddbb4339
commit 2aef934b56
5 changed files with 107 additions and 0 deletions

View file

@ -4794,6 +4794,10 @@ void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
spin_lock(&bg->lock);
set_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &bg->runtime_flags);
spin_unlock(&bg->lock);
btrfs_discard_queue_work(&fs_info->discard_ctl, bg);
} else {
spin_lock(&fs_info->unused_bgs_lock);
@ -4811,3 +4815,74 @@ void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
spin_unlock(&fs_info->unused_bgs_lock);
}
}
/*
* Compare the block group and chunk trees, and find any fully-remapped block
* groups which haven't yet had their chunk stripes and device extents removed,
* and put them on the fully_remapped_bgs list so this gets done.
*
* This happens when a block group becomes fully remapped, i.e. its last
* identity mapping is removed, and the volume is unmounted before async
* discard has finished. It's important this gets done as until it is the
* chunk's stripes are dead space.
*/
int btrfs_populate_fully_remapped_bgs_list(struct btrfs_fs_info *fs_info)
{
struct rb_node *node_bg, *node_chunk;
node_bg = rb_first_cached(&fs_info->block_group_cache_tree);
node_chunk = rb_first_cached(&fs_info->mapping_tree);
while (node_bg && node_chunk) {
struct btrfs_block_group *bg;
struct btrfs_chunk_map *map;
bg = rb_entry(node_bg, struct btrfs_block_group, cache_node);
map = rb_entry(node_chunk, struct btrfs_chunk_map, rb_node);
ASSERT(bg->start == map->start);
if (!(bg->flags & BTRFS_BLOCK_GROUP_REMAPPED))
goto next;
if (bg->identity_remap_count != 0)
goto next;
if (map->num_stripes == 0)
goto next;
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&bg->bg_list)) {
btrfs_get_block_group(bg);
list_add_tail(&bg->bg_list, &fs_info->fully_remapped_bgs);
} else {
list_move_tail(&bg->bg_list, &fs_info->fully_remapped_bgs);
}
spin_unlock(&fs_info->unused_bgs_lock);
/*
* Ideally we'd want to call btrfs_discard_queue_work() here,
* but it'd do nothing as the discard worker hasn't been
* started yet.
*
* The block group will get added to the discard list when
* btrfs_handle_fully_remapped_bgs() gets called, when we
* commit the first transaction.
*/
if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
spin_lock(&bg->lock);
set_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &bg->runtime_flags);
spin_unlock(&bg->lock);
}
next:
node_bg = rb_next(node_bg);
node_chunk = rb_next(node_chunk);
}
ASSERT(!node_bg && !node_chunk);
return 0;
}

View file

@ -94,6 +94,7 @@ enum btrfs_block_group_flags {
*/
BLOCK_GROUP_FLAG_NEW,
BLOCK_GROUP_FLAG_FULLY_REMAPPED,
BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING,
};
enum btrfs_caching_type {
@ -418,5 +419,6 @@ int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg);
void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
struct btrfs_trans_handle *trans);
int btrfs_populate_fully_remapped_bgs_list(struct btrfs_fs_info *fs_info);
#endif /* BTRFS_BLOCK_GROUP_H */

View file

@ -3601,6 +3601,14 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs;
}
if (btrfs_fs_incompat(fs_info, REMAP_TREE)) {
ret = btrfs_populate_fully_remapped_bgs_list(fs_info);
if (ret) {
btrfs_err(fs_info, "failed to populate fully_remapped_bgs list: %d", ret);
goto fail_sysfs;
}
}
btrfs_zoned_reserve_data_reloc_bg(fs_info);
btrfs_free_zone_cache(fs_info);

View file

@ -3068,6 +3068,7 @@ bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
bool ret = true;
if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
!test_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &block_group->runtime_flags) &&
block_group->identity_remap_count == 0) {
return true;
}
@ -3849,6 +3850,23 @@ void btrfs_trim_fully_remapped_block_group(struct btrfs_block_group *bg)
const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
u64 end = btrfs_block_group_end(bg);
if (!test_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &bg->runtime_flags)) {
bg->discard_cursor = end;
if (bg->used == 0) {
spin_lock(&fs_info->unused_bgs_lock);
if (!list_empty(&bg->bg_list)) {
list_del_init(&bg->bg_list);
btrfs_put_block_group(bg);
}
spin_unlock(&fs_info->unused_bgs_lock);
btrfs_mark_bg_unused(bg);
}
return;
}
bytes = end - bg->discard_cursor;
if (max_discard_size &&

View file

@ -4743,6 +4743,10 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map,
btrfs_remove_bg_from_sinfo(bg);
spin_lock(&bg->lock);
clear_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &bg->runtime_flags);
spin_unlock(&bg->lock);
ret = remove_chunk_stripes(trans, chunk_map, path);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);