mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
[ENHANCEMENT]
Btrfs currently calculates data checksums then submits the bio.
But after commit 968f19c5b1 ("btrfs: always fallback to buffered write
if the inode requires checksum"), any writes with data checksum will
fallback to buffered IO, meaning the content will not change during
writeback.
This means we're safe to calculate the data checksum and submit the bio
in parallel, and only need the following new behavior:
- Wait the csum generation to finish before calling btrfs_bio::end_io()
Or this can lead to use-after-free for the csum generation worker.
- Save the current bi_iter for csum_one_bio()
As the submission part can advance btrfs_bio::bio.bi_iter, if not
saved csum_one_bio() may got an empty bi_iter and do not generate any
checksum.
Unfortunately this means we have to increase the size of btrfs_bio for
16 bytes, but this is still acceptable.
As usual, such new feature is hidden behind the experimental flag.
[THEORETIC ANALYZE]
Consider the following theoretic hardware performance, which should be
more or less close to modern mainstream hardware:
Memory bandwidth: 50GiB/s
CRC32C bandwidth: 45GiB/s
SSD bandwidth: 8GiB/s
Then write bandwidth with data checksum before the patch is:
1 / ( 1 / 50 + 1 / 45 + 1 / 8) = 5.98 GiB/s
After the patch, the bandwidth is:
1 / ( 1 / 50 + max( 1 / 45 + 1 / 8)) = 6.90 GiB/s
The difference is 15.32% improvement.
[REAL WORLD BENCHMARK]
I'm using a Zen5 (HX 370) as the host, the VM has 4GiB memory, 10 vCPUs, the
storage is backed by a PCIe gen3 x4 NVMe.
The test is a direct IO write, with 1MiB block size, write 7GiB data
into a btrfs mount with data checksum. Thus the direct write will
fallback to buffered one:
Vanilla Datasum: 1619.97 GiB/s
Patched Datasum: 1792.26 GiB/s
Diff +10.6 %
In my case, the bottleneck is the storage, thus the improvement is not
reaching the theoretic one, but still some observable improvement.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
87 lines
2.9 KiB
C
87 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef BTRFS_FILE_ITEM_H
|
|
#define BTRFS_FILE_ITEM_H
|
|
|
|
#include <linux/blk_types.h>
|
|
#include <linux/list.h>
|
|
#include <uapi/linux/btrfs_tree.h>
|
|
#include "ctree.h"
|
|
#include "ordered-data.h"
|
|
|
|
struct extent_map;
|
|
struct btrfs_file_extent_item;
|
|
struct btrfs_fs_info;
|
|
struct btrfs_path;
|
|
struct btrfs_bio;
|
|
struct btrfs_trans_handle;
|
|
struct btrfs_root;
|
|
struct btrfs_ordered_sum;
|
|
struct btrfs_path;
|
|
struct btrfs_inode;
|
|
|
|
#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
|
|
(offsetof(struct btrfs_file_extent_item, disk_bytenr))
|
|
|
|
static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
|
|
{
|
|
return BTRFS_MAX_ITEM_SIZE(info) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
|
|
}
|
|
|
|
/*
|
|
* Return the number of bytes used by the item on disk, minus the size of any
|
|
* extent headers. If a file is compressed on disk, this is the compressed
|
|
* size.
|
|
*/
|
|
static inline u32 btrfs_file_extent_inline_item_len(
|
|
const struct extent_buffer *eb,
|
|
int nr)
|
|
{
|
|
return btrfs_item_size(eb, nr) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
|
|
}
|
|
|
|
static inline unsigned long btrfs_file_extent_inline_start(
|
|
const struct btrfs_file_extent_item *e)
|
|
{
|
|
return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START;
|
|
}
|
|
|
|
static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
|
|
{
|
|
return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize;
|
|
}
|
|
|
|
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, u64 bytenr, u64 len);
|
|
int btrfs_lookup_bio_sums(struct btrfs_bio *bbio);
|
|
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, u64 objectid, u64 pos,
|
|
u64 num_bytes);
|
|
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_path *path, u64 objectid,
|
|
u64 bytenr, int mod);
|
|
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_ordered_sum *sums);
|
|
int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async);
|
|
int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
|
|
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|
struct list_head *list, int search_commit,
|
|
bool nowait);
|
|
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
|
|
struct list_head *list, bool nowait);
|
|
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
|
|
u64 start, u64 end, u8 *csum_buf,
|
|
unsigned long *csum_bitmap);
|
|
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
|
const struct btrfs_path *path,
|
|
const struct btrfs_file_extent_item *fi,
|
|
struct extent_map *em);
|
|
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
|
u64 len);
|
|
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, u64 len);
|
|
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size);
|
|
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
|
|
|
#endif
|