diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a1ab513fa8ea..3b8a750d8519 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1067,6 +1067,24 @@ static int remove_block_group_item(struct btrfs_trans_handle *trans, return btrfs_del_item(trans, root, path); } +void btrfs_remove_bg_from_sinfo(struct btrfs_block_group *bg) +{ + int factor = btrfs_bg_type_to_factor(bg->flags); + + spin_lock(&bg->space_info->lock); + if (btrfs_test_opt(bg->fs_info, ENOSPC_DEBUG)) { + WARN_ON(bg->space_info->total_bytes < bg->length); + WARN_ON(bg->space_info->bytes_readonly < bg->length - bg->zone_unusable); + WARN_ON(bg->space_info->bytes_zone_unusable < bg->zone_unusable); + WARN_ON(bg->space_info->disk_total < bg->length * factor); + } + bg->space_info->total_bytes -= bg->length; + bg->space_info->bytes_readonly -= (bg->length - bg->zone_unusable); + btrfs_space_info_update_bytes_zone_unusable(bg->space_info, -bg->zone_unusable); + bg->space_info->disk_total -= bg->length * factor; + spin_unlock(&bg->space_info->lock); +} + int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map) { @@ -1078,7 +1096,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct kobject *kobj = NULL; int ret; int index; - int factor; struct btrfs_caching_control *caching_ctl = NULL; bool remove_map; bool remove_rsv = false; @@ -1087,7 +1104,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, if (!block_group) return -ENOENT; - BUG_ON(!block_group->ro); + BUG_ON(!block_group->ro && !(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)); trace_btrfs_remove_block_group(block_group); /* @@ -1099,7 +1116,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->length); index = btrfs_bg_flags_to_raid_index(block_group->flags); - factor = btrfs_bg_type_to_factor(block_group->flags); /* make sure this block group isn't part of an allocation cluster */ cluster = &fs_info->data_alloc_cluster; @@ -1223,26 +1239,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_lock(&block_group->space_info->lock); list_del_init(&block_group->ro_list); - - if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { - WARN_ON(block_group->space_info->total_bytes - < block_group->length); - WARN_ON(block_group->space_info->bytes_readonly - < block_group->length - block_group->zone_unusable); - WARN_ON(block_group->space_info->bytes_zone_unusable - < block_group->zone_unusable); - WARN_ON(block_group->space_info->disk_total - < block_group->length * factor); - } - block_group->space_info->total_bytes -= block_group->length; - block_group->space_info->bytes_readonly -= - (block_group->length - block_group->zone_unusable); - btrfs_space_info_update_bytes_zone_unusable(block_group->space_info, - -block_group->zone_unusable); - block_group->space_info->disk_total -= block_group->length * factor; - spin_unlock(&block_group->space_info->lock); + if (!(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) + btrfs_remove_bg_from_sinfo(block_group); + /* * Remove the free space for the block group from the free space tree * and the block group's item from the extent tree before marking the @@ -1575,8 +1576,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&space_info->lock); spin_lock(&block_group->lock); - if (btrfs_is_block_group_used(block_group) || block_group->ro || - list_is_singular(&block_group->list)) { + if (btrfs_is_block_group_used(block_group) || + (block_group->ro && !(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) || + list_is_singular(&block_group->list) || + test_bit(BLOCK_GROUP_FLAG_FULLY_REMAPPED, &block_group->runtime_flags)) { /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do @@ -1617,9 +1620,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if ((space_info->total_bytes - block_group->length < used && - block_group->zone_unusable < block_group->length) || - has_unwritten_metadata(block_group)) { + if (((space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) || + has_unwritten_metadata(block_group)) && + !(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the @@ -1784,6 +1788,9 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) btrfs_get_block_group(bg); trace_btrfs_add_unused_block_group(bg); list_add_tail(&bg->bg_list, &fs_info->unused_bgs); + } else if (bg->flags & BTRFS_BLOCK_GROUP_REMAPPED && + bg->identity_remap_count == 0) { + /* Leave fully remapped block groups on the fully_remapped_bgs list. */ } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) { /* Pull out the block group from the reclaim_bgs list. */ trace_btrfs_add_unused_block_group(bg); @@ -4581,6 +4588,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) list_del_init(&block_group->bg_list); btrfs_put_block_group(block_group); } + + while (!list_empty(&info->fully_remapped_bgs)) { + block_group = list_first_entry(&info->fully_remapped_bgs, + struct btrfs_block_group, bg_list); + list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); + } spin_unlock(&info->unused_bgs_lock); spin_lock(&info->zone_active_bgs_lock); @@ -4768,3 +4782,24 @@ bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg) return false; return true; } + +void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg, + struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + + spin_lock(&fs_info->unused_bgs_lock); + /* + * The block group might already be on the unused_bgs list, remove it + * if it is. It'll get readded after the async discard worker finishes, + * or in btrfs_handle_fully_remapped_bgs() if we're not using async + * discard. + */ + if (!list_empty(&bg->bg_list)) + list_del(&bg->bg_list); + else + btrfs_get_block_group(bg); + + list_add_tail(&bg->bg_list, &fs_info->fully_remapped_bgs); + spin_unlock(&fs_info->unused_bgs_lock); +} diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index ecabb1a9fc0e..f5c15c7f6cc7 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -92,6 +92,7 @@ enum btrfs_block_group_flags { * transaction. */ BLOCK_GROUP_FLAG_NEW, + BLOCK_GROUP_FLAG_FULLY_REMAPPED, }; enum btrfs_caching_type { @@ -340,6 +341,7 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); +void btrfs_remove_bg_from_sinfo(struct btrfs_block_group *bg); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map); void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); @@ -411,5 +413,7 @@ int btrfs_use_block_group_size_class(struct btrfs_block_group *bg, enum btrfs_block_group_size_class size_class, bool force_wrong_size_class); bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg); +void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg, + struct btrfs_trans_handle *trans); #endif /* BTRFS_BLOCK_GROUP_H */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c69734c74c26..627282613eee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1473,6 +1473,10 @@ static int cleaner_kthread(void *arg) */ btrfs_run_defrag_inodes(fs_info); + if (btrfs_fs_incompat(fs_info, REMAP_TREE) && + !btrfs_test_opt(fs_info, DISCARD_ASYNC)) + btrfs_handle_fully_remapped_bgs(fs_info); + /* * Acquires fs_info->reclaim_bgs_lock to avoid racing * with relocation (btrfs_relocate_chunk) and relocation @@ -2807,6 +2811,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs); + INIT_LIST_HEAD(&fs_info->fully_remapped_bgs); INIT_LIST_HEAD(&fs_info->zone_active_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); @@ -2862,6 +2867,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); + mutex_init(&fs_info->remap_mutex); mutex_init(&fs_info->ro_block_group_mutex); init_rwsem(&fs_info->commit_root_sem); init_rwsem(&fs_info->cleanup_work_sem); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ce4bda1f37ad..5e3e9f18b263 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -41,6 +41,7 @@ #include "tree-checker.h" #include "raid-stripe-tree.h" #include "delayed-inode.h" +#include "relocation.h" #undef SCRAMBLE_DELAYED_REFS @@ -2848,6 +2849,73 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, return 0; } +/* + * Complete the remapping of a block group by removing its chunk stripes and + * device extents, and adding it to the unused list if there's no longer any + * extents nominally within it. + */ +int btrfs_complete_bg_remapping(struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_chunk_map *map; + int ret; + + map = btrfs_get_chunk_map(fs_info, bg->start, 1); + if (IS_ERR(map)) + return PTR_ERR(map); + + ret = btrfs_last_identity_remap_gone(map, bg); + if (ret) { + btrfs_free_chunk_map(map); + return ret; + } + + /* + * Set num_stripes to 0, so that btrfs_remove_dev_extents() won't run a + * second time. + */ + map->num_stripes = 0; + + btrfs_free_chunk_map(map); + + if (bg->used == 0) { + spin_lock(&fs_info->unused_bgs_lock); + if (!list_empty(&bg->bg_list)) { + list_del_init(&bg->bg_list); + btrfs_put_block_group(bg); + } + spin_unlock(&fs_info->unused_bgs_lock); + + btrfs_mark_bg_unused(bg); + } + + return 0; +} + +void btrfs_handle_fully_remapped_bgs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group *bg; + int ret; + + spin_lock(&fs_info->unused_bgs_lock); + while (!list_empty(&fs_info->fully_remapped_bgs)) { + bg = list_first_entry(&fs_info->fully_remapped_bgs, + struct btrfs_block_group, bg_list); + list_del_init(&bg->bg_list); + spin_unlock(&fs_info->unused_bgs_lock); + + ret = btrfs_complete_bg_remapping(bg); + if (ret) { + btrfs_put_block_group(bg); + return; + } + + btrfs_put_block_group(bg); + spin_lock(&fs_info->unused_bgs_lock); + } + spin_unlock(&fs_info->unused_bgs_lock); +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -3000,11 +3068,22 @@ u64 btrfs_get_extent_owner_root(struct btrfs_fs_info *fs_info, } static int do_free_extent_accounting(struct btrfs_trans_handle *trans, - u64 bytenr, struct btrfs_squota_delta *delta) + u64 bytenr, struct btrfs_squota_delta *delta, + struct btrfs_path *path) { int ret; + bool remapped = false; u64 num_bytes = delta->num_bytes; + /* Returns 1 on success and 0 on no-op. */ + ret = btrfs_remove_extent_from_remap_tree(trans, path, bytenr, num_bytes); + if (unlikely(ret < 0)) { + btrfs_abort_transaction(trans, ret); + return ret; + } else if (ret == 1) { + remapped = true; + } + if (delta->is_data) { struct btrfs_root *csum_root; @@ -3028,10 +3107,13 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans, return ret; } - ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes); - if (unlikely(ret)) { - btrfs_abort_transaction(trans, ret); - return ret; + /* If remapped, FST has already been taken care of in remove_range_from_remap_tree(). */ + if (!remapped) { + ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes); + if (unlikely(ret)) { + btrfs_abort_transaction(trans, ret); + return ret; + } } ret = btrfs_update_block_group(trans, bytenr, num_bytes, false); @@ -3390,7 +3472,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - ret = do_free_extent_accounting(trans, bytenr, &delta); + ret = do_free_extent_accounting(trans, bytenr, &delta, path); } btrfs_release_path(path); diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 71bb8109c969..d7b6aeb63656 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -163,5 +163,7 @@ void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u6 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 *actual_bytes); int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); +void btrfs_handle_fully_remapped_bgs(struct btrfs_fs_info *fs_info); +int btrfs_complete_bg_remapping(struct btrfs_block_group *bg); #endif diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 13b0aa0b9da9..5bbc993a66e1 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -587,6 +587,7 @@ struct btrfs_fs_info { struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex chunk_mutex; + struct mutex remap_mutex; /* * This is taken to make sure we don't set block groups ro after the @@ -840,10 +841,11 @@ struct btrfs_fs_info { struct list_head reclaim_bgs; int bg_reclaim_threshold; - /* Protects the lists unused_bgs and reclaim_bgs. */ + /* Protects the lists unused_bgs, reclaim_bgs, and fully_remapped_bgs. */ spinlock_t unused_bgs_lock; /* Protected by unused_bgs_lock. */ struct list_head unused_bgs; + struct list_head fully_remapped_bgs; struct mutex unused_bg_unpin_mutex; /* Protect block groups that are going to be deleted */ struct mutex reclaim_bgs_lock; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 6de508323dbd..e0558b2cd0b4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -37,6 +37,7 @@ #include "super.h" #include "tree-checker.h" #include "raid-stripe-tree.h" +#include "free-space-tree.h" /* * Relocation overview @@ -3859,6 +3860,177 @@ static const char *stage_to_string(enum reloc_stage stage) return "unknown"; } +static void adjust_block_group_remap_bytes(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg, s64 diff) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + bool bg_already_dirty = true; + bool mark_unused = false; + + spin_lock(&bg->lock); + bg->remap_bytes += diff; + if (bg->used == 0 && bg->remap_bytes == 0) + mark_unused = true; + spin_unlock(&bg->lock); + + if (mark_unused) + btrfs_mark_bg_unused(bg); + + spin_lock(&trans->transaction->dirty_bgs_lock); + if (list_empty(&bg->dirty_list)) { + list_add_tail(&bg->dirty_list, &trans->transaction->dirty_bgs); + bg_already_dirty = false; + btrfs_get_block_group(bg); + } + spin_unlock(&trans->transaction->dirty_bgs_lock); + + /* Modified block groups are accounted for in the delayed_refs_rsv. */ + if (!bg_already_dirty) + btrfs_inc_delayed_refs_rsv_bg_updates(fs_info); +} + +static int remove_chunk_stripes(struct btrfs_trans_handle *trans, + struct btrfs_chunk_map *chunk_map, + struct btrfs_path *path) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_chunk *chunk; + int ret; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_map->start; + + btrfs_reserve_chunk_metadata(trans, false); + + ret = btrfs_search_slot(trans, fs_info->chunk_root, &key, path, 0, 1); + if (ret) { + if (ret == 1) { + btrfs_release_path(path); + ret = -ENOENT; + } + btrfs_trans_release_chunk_metadata(trans); + return ret; + } + + leaf = path->nodes[0]; + + chunk = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_chunk); + btrfs_set_chunk_num_stripes(leaf, chunk, 0); + btrfs_set_chunk_sub_stripes(leaf, chunk, 0); + + btrfs_truncate_item(trans, path, offsetof(struct btrfs_chunk, stripe), 1); + + btrfs_mark_buffer_dirty(trans, leaf); + + btrfs_release_path(path); + btrfs_trans_release_chunk_metadata(trans); + + return 0; +} + +int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, + struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_trans_handle *trans; + int ret; + unsigned int num_items; + BTRFS_PATH_AUTO_FREE(path); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * One item for each entry we're removing in the dev extents tree, and + * another for each device. DUP chunks are all on one device, + * everything else has one device per stripe. + */ + if (bg->flags & BTRFS_BLOCK_GROUP_DUP) + num_items = chunk_map->num_stripes + 1; + else + num_items = 2 * chunk_map->num_stripes; + + trans = btrfs_start_transaction_fallback_global_rsv(fs_info->tree_root, num_items); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + ret = btrfs_remove_dev_extents(trans, chunk_map); + if (unlikely(ret)) { + btrfs_abort_transaction(trans, ret); + return ret; + } + + mutex_lock(&trans->fs_info->chunk_mutex); + for (unsigned int i = 0; i < chunk_map->num_stripes; i++) { + ret = btrfs_update_device(trans, chunk_map->stripes[i].dev); + if (unlikely(ret)) { + mutex_unlock(&trans->fs_info->chunk_mutex); + btrfs_abort_transaction(trans, ret); + return ret; + } + } + mutex_unlock(&trans->fs_info->chunk_mutex); + + write_lock(&trans->fs_info->mapping_tree_lock); + btrfs_chunk_map_device_clear_bits(chunk_map, CHUNK_ALLOCATED); + write_unlock(&trans->fs_info->mapping_tree_lock); + + btrfs_remove_bg_from_sinfo(bg); + + ret = remove_chunk_stripes(trans, chunk_map, path); + if (unlikely(ret)) { + btrfs_abort_transaction(trans, ret); + return ret; + } + + ret = btrfs_commit_transaction(trans); + if (ret) + return ret; + + return 0; +} + +static void adjust_identity_remap_count(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg, int delta) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + bool bg_already_dirty = true; + bool mark_fully_remapped = false; + + WARN_ON(delta < 0 && -delta > bg->identity_remap_count); + + spin_lock(&bg->lock); + + bg->identity_remap_count += delta; + + if (bg->identity_remap_count == 0 && + !test_bit(BLOCK_GROUP_FLAG_FULLY_REMAPPED, &bg->runtime_flags)) { + set_bit(BLOCK_GROUP_FLAG_FULLY_REMAPPED, &bg->runtime_flags); + mark_fully_remapped = true; + } + + spin_unlock(&bg->lock); + + spin_lock(&trans->transaction->dirty_bgs_lock); + if (list_empty(&bg->dirty_list)) { + list_add_tail(&bg->dirty_list, &trans->transaction->dirty_bgs); + bg_already_dirty = false; + btrfs_get_block_group(bg); + } + spin_unlock(&trans->transaction->dirty_bgs_lock); + + /* Modified block groups are accounted for in the delayed_refs_rsv. */ + if (!bg_already_dirty) + btrfs_inc_delayed_refs_rsv_bg_updates(fs_info); + + if (mark_fully_remapped) + btrfs_mark_bg_fully_remapped(bg, trans); +} + int btrfs_translate_remap(struct btrfs_fs_info *fs_info, u64 *logical, u64 *length) { int ret; @@ -4463,3 +4635,260 @@ u64 btrfs_get_reloc_bg_bytenr(const struct btrfs_fs_info *fs_info) logical = fs_info->reloc_ctl->block_group->start; return logical; } + +static int insert_remap_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, + u64 old_addr, u64 length, u64 new_addr) +{ + int ret; + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_key key; + struct btrfs_remap_item remap = { 0 }; + + if (old_addr == new_addr) { + /* Add new identity remap item. */ + key.objectid = old_addr; + key.type = BTRFS_IDENTITY_REMAP_KEY; + key.offset = length; + + ret = btrfs_insert_empty_item(trans, fs_info->remap_root, path, + &key, 0); + if (ret) + return ret; + } else { + /* Add new remap item. */ + key.objectid = old_addr; + key.type = BTRFS_REMAP_KEY; + key.offset = length; + + ret = btrfs_insert_empty_item(trans, fs_info->remap_root, + path, &key, sizeof(struct btrfs_remap_item)); + if (ret) + return ret; + + btrfs_set_stack_remap_address(&remap, new_addr); + + write_extent_buffer(path->nodes[0], &remap, + btrfs_item_ptr_offset(path->nodes[0], path->slots[0]), + sizeof(struct btrfs_remap_item)); + + btrfs_release_path(path); + + /* Add new backref item. */ + key.objectid = new_addr; + key.type = BTRFS_REMAP_BACKREF_KEY; + key.offset = length; + + ret = btrfs_insert_empty_item(trans, fs_info->remap_root, + path, &key, + sizeof(struct btrfs_remap_item)); + if (ret) + return ret; + + btrfs_set_stack_remap_address(&remap, old_addr); + + write_extent_buffer(path->nodes[0], &remap, + btrfs_item_ptr_offset(path->nodes[0], path->slots[0]), + sizeof(struct btrfs_remap_item)); + } + + btrfs_release_path(path); + + return 0; +} + +/* + * Punch a hole in the remap item or identity remap item pointed to by path, + * for the range [hole_start, hole_start + hole_length). + */ +static int remove_range_from_remap_tree(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_block_group *bg, + u64 hole_start, u64 hole_length) +{ + int ret; + struct btrfs_fs_info *fs_info = trans->fs_info; + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_key key; + u64 hole_end, new_addr, remap_start, remap_length, remap_end; + u64 overlap_length; + bool is_identity_remap; + int identity_count_delta = 0; + + hole_end = hole_start + hole_length; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + is_identity_remap = (key.type == BTRFS_IDENTITY_REMAP_KEY); + + remap_start = key.objectid; + remap_length = key.offset; + remap_end = remap_start + remap_length; + + if (is_identity_remap) { + new_addr = remap_start; + } else { + struct btrfs_remap_item *remap_ptr; + + remap_ptr = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item); + new_addr = btrfs_remap_address(leaf, remap_ptr); + } + + /* Delete old item. */ + ret = btrfs_del_item(trans, fs_info->remap_root, path); + btrfs_release_path(path); + if (ret) + return ret; + + if (is_identity_remap) { + identity_count_delta = -1; + } else { + /* Remove backref. */ + key.objectid = new_addr; + key.type = BTRFS_REMAP_BACKREF_KEY; + key.offset = remap_length; + + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, -1, 1); + if (ret) { + if (ret == 1) { + btrfs_release_path(path); + ret = -ENOENT; + } + return ret; + } + + ret = btrfs_del_item(trans, fs_info->remap_root, path); + + btrfs_release_path(path); + + if (ret) + return ret; + } + + /* If hole_start > remap_start, re-add the start of the remap item. */ + if (hole_start > remap_start) { + ret = insert_remap_item(trans, path, remap_start, + hole_start - remap_start, new_addr); + if (ret) + return ret; + + if (is_identity_remap) + identity_count_delta++; + } + + /* If hole_end < remap_end, re-add the end of the remap item. */ + if (hole_end < remap_end) { + ret = insert_remap_item(trans, path, hole_end, + remap_end - hole_end, + hole_end - remap_start + new_addr); + if (ret) + return ret; + + if (is_identity_remap) + identity_count_delta++; + } + + if (identity_count_delta != 0) + adjust_identity_remap_count(trans, bg, identity_count_delta); + + overlap_length = min_t(u64, hole_end, remap_end) - + max_t(u64, hole_start, remap_start); + + if (!is_identity_remap) { + struct btrfs_block_group *dest_bg; + + dest_bg = btrfs_lookup_block_group(fs_info, new_addr); + adjust_block_group_remap_bytes(trans, dest_bg, -overlap_length); + btrfs_put_block_group(dest_bg); + ret = btrfs_add_to_free_space_tree(trans, + hole_start - remap_start + new_addr, + overlap_length); + if (ret) + return ret; + } + + ret = overlap_length; + + return ret; +} + +/* + * Return 1 if remove_range_from_remap_tree() has been called successfully, + * 0 if block group wasn't remapped, and a negative number on error. + */ +int btrfs_remove_extent_from_remap_tree(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_key key, found_key; + struct extent_buffer *leaf; + struct btrfs_block_group *bg; + int ret, length; + + if (!(btrfs_super_incompat_flags(fs_info->super_copy) & + BTRFS_FEATURE_INCOMPAT_REMAP_TREE)) + return 0; + + bg = btrfs_lookup_block_group(fs_info, bytenr); + if (!bg) + return 0; + + mutex_lock(&fs_info->remap_mutex); + + if (!(bg->flags & BTRFS_BLOCK_GROUP_REMAPPED)) { + mutex_unlock(&fs_info->remap_mutex); + btrfs_put_block_group(bg); + return 0; + } + + do { + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, -1, 1); + if (ret < 0) + goto end; + + leaf = path->nodes[0]; + if (path->slots[0] == 0) { + ret = -ENOENT; + goto end; + } + + path->slots[0]--; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.type != BTRFS_IDENTITY_REMAP_KEY && + found_key.type != BTRFS_REMAP_KEY) { + ret = -ENOENT; + goto end; + } + + if (bytenr < found_key.objectid || + bytenr >= found_key.objectid + found_key.offset) { + ret = -ENOENT; + goto end; + } + + length = remove_range_from_remap_tree(trans, path, bg, bytenr, num_bytes); + if (length < 0) { + ret = length; + goto end; + } + + bytenr += length; + num_bytes -= length; + } while (num_bytes > 0); + + ret = 1; + +end: + mutex_unlock(&fs_info->remap_mutex); + + btrfs_put_block_group(bg); + btrfs_release_path(path); + + return ret; +} diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h index c0ee26004fc1..3afb6f85b722 100644 --- a/fs/btrfs/relocation.h +++ b/fs/btrfs/relocation.h @@ -32,5 +32,10 @@ struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr); bool btrfs_should_ignore_reloc_root(const struct btrfs_root *root); u64 btrfs_get_reloc_bg_bytenr(const struct btrfs_fs_info *fs_info); int btrfs_translate_remap(struct btrfs_fs_info *fs_info, u64 *logical, u64 *length); +int btrfs_remove_extent_from_remap_tree(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes); +int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, + struct btrfs_block_group *bg); #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2a4bda452d85..4884c7b62c61 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2925,8 +2925,7 @@ error: return ret; } -static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, - struct btrfs_device *device) +int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { int ret; BTRFS_PATH_AUTO_FREE(path); @@ -3224,25 +3223,12 @@ static int remove_chunk_item(struct btrfs_trans_handle *trans, return btrfs_free_chunk(trans, chunk_offset); } -int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) +int btrfs_remove_dev_extents(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_chunk_map *map; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; u64 dev_extent_len = 0; int i, ret = 0; - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - - map = btrfs_get_chunk_map(fs_info, chunk_offset, 1); - if (IS_ERR(map)) { - /* - * This is a logic error, but we don't want to just rely on the - * user having built with ASSERT enabled, so if ASSERT doesn't - * do anything we still error out. - */ - DEBUG_WARN("errr %ld reading chunk map at offset %llu", - PTR_ERR(map), chunk_offset); - return PTR_ERR(map); - } /* * First delete the device extent items from the devices btree. @@ -3263,7 +3249,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) if (unlikely(ret)) { mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, ret); - goto out; + return ret; } if (device->bytes_used > 0) { @@ -3283,6 +3269,26 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) } mutex_unlock(&fs_devices->device_list_mutex); + return 0; +} + +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_chunk_map *map; + int ret; + + map = btrfs_get_chunk_map(fs_info, chunk_offset, 1); + if (IS_ERR(map)) { + DEBUG_WARN("errr %ld reading chunk map at offset %llu", + PTR_ERR(map), chunk_offset); + return PTR_ERR(map); + } + + ret = btrfs_remove_dev_extents(trans, map); + if (ret) + goto out; + /* * We acquire fs_info->chunk_mutex for 2 reasons: * @@ -5419,7 +5425,7 @@ static void chunk_map_device_set_bits(struct btrfs_chunk_map *map, unsigned int } } -static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned int bits) +void btrfs_chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned int bits) { for (int i = 0; i < map->num_stripes; i++) { struct btrfs_io_stripe *stripe = &map->stripes[i]; @@ -5436,7 +5442,7 @@ void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_ma write_lock(&fs_info->mapping_tree_lock); rb_erase_cached(&map->rb_node, &fs_info->mapping_tree); RB_CLEAR_NODE(&map->rb_node); - chunk_map_device_clear_bits(map, CHUNK_ALLOCATED); + btrfs_chunk_map_device_clear_bits(map, CHUNK_ALLOCATED); write_unlock(&fs_info->mapping_tree_lock); /* Once for the tree reference. */ @@ -5472,7 +5478,7 @@ int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *m return -EEXIST; } chunk_map_device_set_bits(map, CHUNK_ALLOCATED); - chunk_map_device_clear_bits(map, CHUNK_TRIMMED); + btrfs_chunk_map_device_clear_bits(map, CHUNK_TRIMMED); write_unlock(&fs_info->mapping_tree_lock); return 0; @@ -5828,7 +5834,7 @@ void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info) map = rb_entry(node, struct btrfs_chunk_map, rb_node); rb_erase_cached(&map->rb_node, &fs_info->mapping_tree); RB_CLEAR_NODE(&map->rb_node); - chunk_map_device_clear_bits(map, CHUNK_ALLOCATED); + btrfs_chunk_map_device_clear_bits(map, CHUNK_ALLOCATED); /* Once for the tree ref. */ btrfs_free_chunk_map(map); cond_resched_rwlock_write(&fs_info->mapping_tree_lock); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index e4b3cb50f94a..ed1f8fd95fff 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -776,6 +776,7 @@ u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map); int btrfs_nr_parity_stripes(u64 type); int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, struct btrfs_block_group *bg); +int btrfs_remove_dev_extents(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map); int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS @@ -888,6 +889,8 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical); bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb); +int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device); +void btrfs_chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned int bits); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,