btrfs: raid56: store a physical address in structure sector_ptr

Instead of using a @page + @pg_offset pair inside sector_ptr structure,
use a single physical address instead.

This allows us to grab both the page and offset from a single u64 value.
Although we still need an extra bool value, @has_paddr, to distinguish
if the sector is properly mapped (as the 0 physical address is totally
valid).

This change doesn't change the size of structure sector_ptr, but reduces
the parameters of several functions.

Note: the original idea and patch is from Christoph Hellwig
(https://lore.kernel.org/linux-btrfs/20250409111055.3640328-7-hch@lst.de/)
but the final implementation is different.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[ Use physical addresses instead to handle highmem. ]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2025-04-09 13:10:40 +02:00 committed by David Sterba
parent 6f3f722df7
commit cd678925e9

View file

@ -134,14 +134,17 @@ struct btrfs_stripe_hash_table {
};
/*
* A bvec like structure to present a sector inside a page.
*
* Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
* A structure to present a sector inside a page, the length is fixed to
* sectorsize;
*/
struct sector_ptr {
struct page *page;
unsigned int pgoff:24;
unsigned int uptodate:8;
/*
* Blocks from the bio list can still be highmem.
* So here we use physical address to present a page and the offset inside it.
*/
phys_addr_t paddr;
bool has_paddr;
bool uptodate;
};
static void rmw_rbio_work(struct work_struct *work);
@ -233,6 +236,14 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
return 0;
}
static void memcpy_sectors(const struct sector_ptr *dst,
const struct sector_ptr *src, u32 blocksize)
{
memcpy_page(phys_to_page(dst->paddr), offset_in_page(dst->paddr),
phys_to_page(src->paddr), offset_in_page(src->paddr),
blocksize);
}
/*
* caching an rbio means to copy anything from the
* bio_sectors array into the stripe_pages array. We
@ -253,7 +264,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_sectors; i++) {
/* Some range not covered by bio (partial write), skip it */
if (!rbio->bio_sectors[i].page) {
if (!rbio->bio_sectors[i].has_paddr) {
/*
* Even if the sector is not covered by bio, if it is
* a data sector it should still be uptodate as it is
@ -264,12 +275,8 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
continue;
}
ASSERT(rbio->stripe_sectors[i].page);
memcpy_page(rbio->stripe_sectors[i].page,
rbio->stripe_sectors[i].pgoff,
rbio->bio_sectors[i].page,
rbio->bio_sectors[i].pgoff,
rbio->bioc->fs_info->sectorsize);
memcpy_sectors(&rbio->stripe_sectors[i], &rbio->bio_sectors[i],
rbio->bioc->fs_info->sectorsize);
rbio->stripe_sectors[i].uptodate = 1;
}
set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@ -326,8 +333,13 @@ static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
int page_index = offset >> PAGE_SHIFT;
ASSERT(page_index < rbio->nr_pages);
rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
if (!rbio->stripe_pages[page_index])
continue;
rbio->stripe_sectors[i].has_paddr = true;
rbio->stripe_sectors[i].paddr =
page_to_phys(rbio->stripe_pages[page_index]) +
offset_in_page(offset);
}
}
@ -962,9 +974,9 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
spin_lock(&rbio->bio_list_lock);
sector = &rbio->bio_sectors[index];
if (sector->page || bio_list_only) {
if (sector->has_paddr || bio_list_only) {
/* Don't return sector without a valid page pointer */
if (!sector->page)
if (!sector->has_paddr)
sector = NULL;
spin_unlock(&rbio->bio_list_lock);
return sector;
@ -1142,7 +1154,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
rbio, sector_nr);
ASSERT(sector->page);
ASSERT(sector->has_paddr);
stripe = &rbio->bioc->stripes[stripe_nr];
disk_start = stripe->physical + sector_nr * sectorsize;
@ -1173,8 +1185,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
*/
if (last_end == disk_start && !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
ret = bio_add_page(last, sector->page, sectorsize,
sector->pgoff);
ret = bio_add_page(last, phys_to_page(sector->paddr),
sectorsize, offset_in_page(sector->paddr));
if (ret == sectorsize)
return 0;
}
@ -1187,7 +1199,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
bio->bi_private = rbio;
__bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
__bio_add_page(bio, phys_to_page(sector->paddr), sectorsize,
offset_in_page(sector->paddr));
bio_list_add(bio_list, bio);
return 0;
}
@ -1205,10 +1218,8 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
struct sector_ptr *sector = &rbio->bio_sectors[index];
struct bio_vec bv = bio_iter_iovec(bio, iter);
sector->page = bv.bv_page;
sector->pgoff = bv.bv_offset;
ASSERT(sector->pgoff < PAGE_SIZE);
sector->has_paddr = true;
sector->paddr = bvec_phys(&bv);
bio_advance_iter_single(bio, &iter, sectorsize);
offset += sectorsize;
}
@ -1288,6 +1299,15 @@ static void assert_rbio(struct btrfs_raid_bio *rbio)
ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
}
static inline void *kmap_local_sector(const struct sector_ptr *sector)
{
/* The sector pointer must have a page mapped to it. */
ASSERT(sector->has_paddr);
return kmap_local_page(phys_to_page(sector->paddr)) +
offset_in_page(sector->paddr);
}
/* Generate PQ for one vertical stripe. */
static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
{
@ -1300,14 +1320,13 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
/* First collect one sector from each data stripe */
for (stripe = 0; stripe < rbio->nr_data; stripe++) {
sector = sector_in_rbio(rbio, stripe, sectornr, 0);
pointers[stripe] = kmap_local_page(sector->page) +
sector->pgoff;
pointers[stripe] = kmap_local_sector(sector);
}
/* Then add the parity stripe */
sector = rbio_pstripe_sector(rbio, sectornr);
sector->uptodate = 1;
pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
pointers[stripe++] = kmap_local_sector(sector);
if (has_qstripe) {
/*
@ -1316,8 +1335,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
*/
sector = rbio_qstripe_sector(rbio, sectornr);
sector->uptodate = 1;
pointers[stripe++] = kmap_local_page(sector->page) +
sector->pgoff;
pointers[stripe++] = kmap_local_sector(sector);
assert_rbio(rbio);
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
@ -1476,15 +1494,14 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
* stripe_pages[], thus we need to locate the sector.
*/
static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
struct page *page,
unsigned int pgoff)
phys_addr_t paddr)
{
int i;
for (i = 0; i < rbio->nr_sectors; i++) {
struct sector_ptr *sector = &rbio->stripe_sectors[i];
if (sector->page == page && sector->pgoff == pgoff)
if (sector->has_paddr && sector->paddr == paddr)
return sector;
}
return NULL;
@ -1504,11 +1521,10 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct sector_ptr *sector;
int pgoff;
phys_addr_t paddr = bvec_phys(bvec);
for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
pgoff += sectorsize) {
sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
for (u32 off = 0; off < bvec->bv_len; off += sectorsize) {
sector = find_stripe_sector(rbio, paddr + off);
ASSERT(sector);
if (sector)
sector->uptodate = 1;
@ -1518,17 +1534,14 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
{
struct bio_vec *bv = bio_first_bvec_all(bio);
phys_addr_t bvec_paddr = bvec_phys(bio_first_bvec_all(bio));
int i;
for (i = 0; i < rbio->nr_sectors; i++) {
struct sector_ptr *sector;
sector = &rbio->stripe_sectors[i];
if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
if (rbio->stripe_sectors[i].paddr == bvec_paddr)
break;
sector = &rbio->bio_sectors[i];
if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
if (rbio->bio_sectors[i].has_paddr &&
rbio->bio_sectors[i].paddr == bvec_paddr)
break;
}
ASSERT(i < rbio->nr_sectors);
@ -1810,12 +1823,10 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
}
ASSERT(sector->page);
kaddr = kmap_local_page(sector->page) + sector->pgoff;
csum_expected = rbio->csum_buf +
(stripe_nr * rbio->stripe_nsectors + sector_nr) *
fs_info->csum_size;
kaddr = kmap_local_sector(sector);
ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
kunmap_local(kaddr);
return ret;
@ -1874,9 +1885,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
} else {
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
}
ASSERT(sector->page);
pointers[stripe_nr] = kmap_local_page(sector->page) +
sector->pgoff;
pointers[stripe_nr] = kmap_local_sector(sector);
unmap_array[stripe_nr] = pointers[stripe_nr];
}
@ -2328,7 +2337,7 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
* thus this rbio can not be cached one, as cached one must
* have all its data sectors present and uptodate.
*/
if (!sector->page || !sector->uptodate)
if (!sector->has_paddr || !sector->uptodate)
return true;
}
return false;
@ -2518,6 +2527,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
int stripe;
int sectornr;
bool has_qstripe;
struct page *page;
struct sector_ptr p_sector = { 0 };
struct sector_ptr q_sector = { 0 };
struct bio_list bio_list;
@ -2549,29 +2559,33 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
*/
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
p_sector.page = alloc_page(GFP_NOFS);
if (!p_sector.page)
page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
p_sector.pgoff = 0;
p_sector.has_paddr = true;
p_sector.paddr = page_to_phys(page);
p_sector.uptodate = 1;
page = NULL;
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
q_sector.page = alloc_page(GFP_NOFS);
if (!q_sector.page) {
__free_page(p_sector.page);
p_sector.page = NULL;
page = alloc_page(GFP_NOFS);
if (!page) {
__free_page(phys_to_page(p_sector.paddr));
p_sector.has_paddr = false;
return -ENOMEM;
}
q_sector.pgoff = 0;
q_sector.has_paddr = true;
q_sector.paddr = page_to_phys(page);
q_sector.uptodate = 1;
pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
page = NULL;
pointers[rbio->real_stripes - 1] = kmap_local_sector(&q_sector);
}
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
/* Map the parity stripe just once */
pointers[nr_data] = kmap_local_page(p_sector.page);
pointers[nr_data] = kmap_local_sector(&p_sector);
for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
struct sector_ptr *sector;
@ -2580,8 +2594,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
/* first collect one page from each data stripe */
for (stripe = 0; stripe < nr_data; stripe++) {
sector = sector_in_rbio(rbio, stripe, sectornr, 0);
pointers[stripe] = kmap_local_page(sector->page) +
sector->pgoff;
pointers[stripe] = kmap_local_sector(sector);
}
if (has_qstripe) {
@ -2597,7 +2610,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
/* Check scrubbing parity and repair it */
sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
parity = kmap_local_page(sector->page) + sector->pgoff;
parity = kmap_local_sector(sector);
if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
memcpy(parity, pointers[rbio->scrubp], sectorsize);
else
@ -2610,12 +2623,11 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
}
kunmap_local(pointers[nr_data]);
__free_page(p_sector.page);
p_sector.page = NULL;
if (q_sector.page) {
kunmap_local(pointers[rbio->real_stripes - 1]);
__free_page(q_sector.page);
q_sector.page = NULL;
__free_page(phys_to_page(p_sector.paddr));
p_sector.has_paddr = false;
if (q_sector.has_paddr) {
__free_page(phys_to_page(q_sector.paddr));
q_sector.has_paddr = false;
}
/*