btrfs: raid56: introduce a new parameter to locate a sector

Since we cannot ensure that all bios from the higher layer are backed by
large folios (e.g. direct IO, encoded read/write/send), we need the
ability to locate sub-block (aka, a page) inside a full stripe.

So the existing @stripe_nr + @sector_nr combination is not enough to
locate such page for bs > ps cases.

Introduce a new parameter, @step_nr, to locate the page of a larger fs
block.  The naming is following the conventions used inside btrfs
elsewhere, where one step is min(sectorsize, PAGE_SIZE).

It's still a preparation, only touching the following aspects:

- btrfs_dump_rbio()
  To show the new @sector_nsteps member.

- btrfs_raid_bio::sector_nsteps
  Recording how many steps there are inside a fs block.

- Enlarge btrfs_raid_bio::*_paddrs[] size
  To take @sector_nsteps into consideration.

- index_one_bio()
- index_stripe_sectors()
- memcpy_from_bio_to_stripe()
- cache_rbio_pages()
- need_read_stripe_sectors()
  Those functions are iterating *_paddrs[], which needs to take
  sector_nsteps into consideration.

- Rename rbio_stripe_sector_index() to rbio_sector_index()
  The "stripe" part is not that helpful.

  And an extra ASSERT() before returning the result.

- Add a new rbio_paddr_index() helper
  This will take the extra @step_nr into consideration.

- The comments of btrfs_raid_bio

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2025-11-13 19:11:36 +10:30 committed by David Sterba
parent 9042dc0002
commit 91cd1b5865
2 changed files with 80 additions and 34 deletions

View File

@ -66,10 +66,10 @@ static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info,
dump_bioc(fs_info, rbio->bioc);
btrfs_crit(fs_info,
"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx",
"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u sector_nsteps=%u scrubp=%u dbitmap=0x%lx",
rbio->flags, rbio->nr_sectors, rbio->nr_data,
rbio->real_stripes, rbio->stripe_nsectors,
rbio->scrubp, rbio->dbitmap);
rbio->sector_nsteps, rbio->scrubp, rbio->dbitmap);
}
#define ASSERT_RBIO(expr, rbio) \
@ -229,15 +229,20 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
static void memcpy_from_bio_to_stripe(struct btrfs_raid_bio *rbio, unsigned int sector_nr)
{
phys_addr_t dst = rbio->stripe_paddrs[sector_nr];
phys_addr_t src = rbio->bio_paddrs[sector_nr];
const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
ASSERT(sector_nr < rbio->nr_sectors);
for (int i = 0; i < rbio->sector_nsteps; i++) {
unsigned int index = sector_nr * rbio->sector_nsteps + i;
phys_addr_t dst = rbio->stripe_paddrs[index];
phys_addr_t src = rbio->bio_paddrs[index];
ASSERT(dst != INVALID_PADDR);
ASSERT(src != INVALID_PADDR);
memcpy_page(phys_to_page(dst), offset_in_page(dst),
phys_to_page(src), offset_in_page(src),
rbio->bioc->fs_info->sectorsize);
phys_to_page(src), offset_in_page(src), step);
}
}
/*
@ -260,7 +265,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_sectors; i++) {
/* Some range not covered by bio (partial write), skip it */
if (rbio->bio_paddrs[i] == INVALID_PADDR) {
if (rbio->bio_paddrs[i * rbio->sector_nsteps] == INVALID_PADDR) {
/*
* Even if the sector is not covered by bio, if it is
* a data sector it should still be uptodate as it is
@ -320,11 +325,12 @@ static __maybe_unused bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbi
*/
static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
u32 offset;
int i;
for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
for (i = 0, offset = 0; i < rbio->nr_sectors * rbio->sector_nsteps;
i++, offset += step) {
int page_index = offset >> PAGE_SHIFT;
ASSERT(page_index < rbio->nr_pages);
@ -668,21 +674,41 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
return 1;
}
static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
/* Return the sector index for @stripe_nr and @sector_nr. */
static unsigned int rbio_sector_index(const struct btrfs_raid_bio *rbio,
unsigned int stripe_nr,
unsigned int sector_nr)
{
unsigned int ret;
ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr);
return stripe_nr * rbio->stripe_nsectors + sector_nr;
ret = stripe_nr * rbio->stripe_nsectors + sector_nr;
ASSERT(ret < rbio->nr_sectors);
return ret;
}
/* Return the paddr array index for @stripe_nr, @sector_nr and @step_nr. */
static unsigned int rbio_paddr_index(const struct btrfs_raid_bio *rbio,
unsigned int stripe_nr,
unsigned int sector_nr,
unsigned int step_nr)
{
unsigned int ret;
ASSERT_RBIO_SECTOR(step_nr < rbio->sector_nsteps, rbio, step_nr);
ret = rbio_sector_index(rbio, stripe_nr, sector_nr) * rbio->sector_nsteps + step_nr;
ASSERT(ret < rbio->nr_sectors * rbio->sector_nsteps);
return ret;
}
/* Return a paddr from rbio->stripe_sectors, not from the bio list */
static phys_addr_t rbio_stripe_paddr(const struct btrfs_raid_bio *rbio,
unsigned int stripe_nr, unsigned int sector_nr)
{
return rbio->stripe_paddrs[rbio_stripe_sector_index(rbio, stripe_nr, sector_nr)];
return rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, 0)];
}
/* Grab a paddr inside P stripe */
@ -985,6 +1011,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
const unsigned int stripe_nsectors =
BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
const unsigned int num_sectors = stripe_nsectors * real_stripes;
const unsigned int step = min(fs_info->sectorsize, PAGE_SIZE);
const unsigned int sector_nsteps = fs_info->sectorsize / step;
struct btrfs_raid_bio *rbio;
/* PAGE_SIZE must also be aligned to sectorsize for subpage support */
@ -1007,8 +1035,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
return ERR_PTR(-ENOMEM);
rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *),
GFP_NOFS);
rbio->bio_paddrs = kcalloc(num_sectors, sizeof(phys_addr_t), GFP_NOFS);
rbio->stripe_paddrs = kcalloc(num_sectors, sizeof(phys_addr_t), GFP_NOFS);
rbio->bio_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS);
rbio->stripe_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS);
rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS);
rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
rbio->stripe_uptodate_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
@ -1019,7 +1047,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
kfree(rbio);
return ERR_PTR(-ENOMEM);
}
for (int i = 0; i < num_sectors; i++) {
for (int i = 0; i < num_sectors * sector_nsteps; i++) {
rbio->stripe_paddrs[i] = INVALID_PADDR;
rbio->bio_paddrs[i] = INVALID_PADDR;
}
@ -1037,6 +1065,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
rbio->real_stripes = real_stripes;
rbio->stripe_npages = stripe_npages;
rbio->stripe_nsectors = stripe_nsectors;
rbio->sector_nsteps = sector_nsteps;
refcount_set(&rbio->refs, 1);
atomic_set(&rbio->stripes_pending, 0);
@ -1192,18 +1221,19 @@ static int rbio_add_io_paddr(struct btrfs_raid_bio *rbio, struct bio_list *bio_l
static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits;
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
const u32 step_bits = min(fs_info->sectorsize_bits, PAGE_SHIFT);
struct bvec_iter iter = bio->bi_iter;
phys_addr_t paddr;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->full_stripe_logical;
btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) {
unsigned int index = (offset >> sectorsize_bits);
btrfs_bio_for_each_block(paddr, bio, &iter, step) {
unsigned int index = (offset >> step_bits);
rbio->bio_paddrs[index] = paddr;
offset += sectorsize;
offset += step;
}
}
@ -1303,7 +1333,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
sector_paddr_in_rbio(rbio, stripe, sectornr, 0));
/* Then add the parity stripe */
set_bit(rbio_stripe_sector_index(rbio, rbio->nr_data, sectornr),
set_bit(rbio_sector_index(rbio, rbio->nr_data, sectornr),
rbio->stripe_uptodate_bitmap);
pointers[stripe++] = kmap_local_paddr(rbio_pstripe_paddr(rbio, sectornr));
@ -1312,7 +1342,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
* RAID6, add the qstripe and call the library function
* to fill in our p/q
*/
set_bit(rbio_stripe_sector_index(rbio, rbio->nr_data + 1, sectornr),
set_bit(rbio_sector_index(rbio, rbio->nr_data + 1, sectornr),
rbio->stripe_uptodate_bitmap);
pointers[stripe++] = kmap_local_paddr(rbio_qstripe_paddr(rbio, sectornr));
@ -1932,7 +1962,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
if (ret < 0)
goto cleanup;
set_bit(rbio_stripe_sector_index(rbio, faila, sector_nr),
set_bit(rbio_sector_index(rbio, faila, sector_nr),
rbio->stripe_uptodate_bitmap);
}
if (failb >= 0) {
@ -1940,7 +1970,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
if (ret < 0)
goto cleanup;
set_bit(rbio_stripe_sector_index(rbio, failb, sector_nr),
set_bit(rbio_sector_index(rbio, failb, sector_nr),
rbio->stripe_uptodate_bitmap);
}
@ -2288,7 +2318,7 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
int i;
for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) {
phys_addr_t paddr = rbio->stripe_paddrs[i];
phys_addr_t paddr = rbio->stripe_paddrs[i * rbio->sector_nsteps];
/*
* We have a sector which doesn't have page nor uptodate,
@ -2746,7 +2776,7 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
* The bio cache may have handed us an uptodate sector. If so,
* use it.
*/
if (test_bit(rbio_stripe_sector_index(rbio, stripe, sectornr),
if (test_bit(rbio_sector_index(rbio, stripe, sectornr),
rbio->stripe_uptodate_bitmap))
continue;

View File

@ -48,7 +48,7 @@ enum btrfs_rbio_ops {
* If there is no bio covering a sector, then btrfs_raid_bio::bio_paddrs[i] will
* be INVALID_PADDR.
*
* The length of each entry in bio_paddrs[] is sectorsize.
* The length of each entry in bio_paddrs[] is a step (aka, min(sectorsize, PAGE_SIZE)).
*
* [PAGES FOR INTERNAL USAGES]
* Pages not covered by any bio or belonging to P/Q stripes are stored in
@ -70,7 +70,7 @@ enum btrfs_rbio_ops {
* If the corresponding page of stripe_paddrs[i] is not allocated, the value of
* stripe_paddrs[i] will be INVALID_PADDR.
*
* The length of each entry in stripe_paddrs[] is sectorsize.
* The length of each entry in stripe_paddrs[] is a step.
*
* [LOCATING A SECTOR]
* To locate a sector for IO, we need the following info:
@ -83,7 +83,15 @@ enum btrfs_rbio_ops {
* Starts from 0 (representing the first sector of the stripe), ends
* at BTRFS_STRIPE_LEN / sectorsize - 1.
*
* All existing bitmaps are based on sector numbers.
* - step_nr
* A step is min(sector_size, PAGE_SIZE).
*
* Starts from 0 (representing the first step of the sector), ends
* at @sector_nsteps - 1.
*
* For most call sites they do not need to bother this parameter.
* It is for bs > ps support and only for vertical stripe related works.
* (e.g. RMW/recover)
*
* - from which array
* Whether grabbing from stripe_paddrs[] (aka, internal pages) or from the
@ -151,6 +159,14 @@ struct btrfs_raid_bio {
/* How many sectors there are for each stripe */
u8 stripe_nsectors;
/*
* How many steps there are for one sector.
*
* For bs > ps cases, it's sectorsize / PAGE_SIZE.
* For bs <= ps cases, it's always 1.
*/
u8 sector_nsteps;
/* Stripe number that we're scrubbing */
u8 scrubp;