NFSv4/flexfiles: Write path updates for striped layouts

Updates write path to calculate and use dss_id to direct IO to the
appropriate stripe DS.

Signed-off-by: Jonathan Curley <jcurley@purestorage.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
This commit is contained in:
Jonathan Curley 2025-09-24 16:20:48 +00:00 committed by Anna Schumaker
parent 8a8729db67
commit 06d157d6fc
1 changed files with 30 additions and 12 deletions

View File

@ -605,6 +605,14 @@ ff_layout_free_lseg(struct pnfs_layout_segment *lseg)
_ff_layout_free_lseg(fls);
}
static u32 calc_commit_idx(struct pnfs_layout_segment *lseg,
u32 mirror_idx, u32 dss_id)
{
struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
return (mirror_idx * flseg->mirror_array[0]->dss_count) + dss_id;
}
static u32 calc_mirror_idx_from_commit(struct pnfs_layout_segment *lseg,
u32 commit_index)
{
@ -1009,7 +1017,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs4_ff_layout_mirror *mirror;
struct nfs_pgio_mirror *pgm;
struct nfs4_pnfs_ds *ds;
u32 i;
u32 i, dss_id;
retry:
pnfs_generic_pg_check_layout(pgio, req);
@ -1034,7 +1042,12 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, 0, true);
dss_id = nfs4_ff_layout_calc_dss_id(
FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit,
mirror->dss_count,
req_offset(req));
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror,
dss_id, true);
if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
@ -1044,7 +1057,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
goto retry;
}
pgm = &pgio->pg_mirrors[i];
pgm->pg_bsize = mirror->dss[0].mirror_ds->ds_versions[0].wsize;
pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize;
}
if (NFS_SERVER(pgio->pg_inode)->flags &
@ -1117,7 +1130,7 @@ static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
static const struct nfs_pageio_ops ff_layout_pg_write_ops = {
.pg_init = ff_layout_pg_init_write,
.pg_test = pnfs_generic_pg_test,
.pg_test = ff_layout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
.pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
.pg_cleanup = pnfs_generic_pg_cleanup,
@ -2051,25 +2064,30 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
u32 dss_id;
bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, true);
dss_id = nfs4_ff_layout_calc_dss_id(
FF_LAYOUT_LSEG(lseg)->stripe_unit,
mirror->dss_count,
offset);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true);
if (IS_ERR(ds)) {
ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
}
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode, 0);
hdr->inode, dss_id);
if (IS_ERR(ds_clnt))
goto out_failed;
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, 0);
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id);
if (!ds_cred)
goto out_failed;
vers = nfs4_ff_layout_ds_version(mirror, 0);
vers = nfs4_ff_layout_ds_version(mirror, dss_id);
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
@ -2079,12 +2097,12 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
hdr->pgio_done_cb = ff_layout_write_done_cb;
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_ff_layout_select_ds_fh(mirror, 0);
hdr->ds_commit_idx = calc_commit_idx(lseg, idx, dss_id);
fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id);
if (fh)
hdr->args.fh = fh;
nfs4_ff_layout_select_ds_stateid(mirror, 0, &hdr->args.stateid);
nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@ -2093,7 +2111,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
hdr->args.offset = offset;
/* Start IO accounting for local write */
localio = ff_local_open_fh(lseg, idx, 0, ds->ds_clp, ds_cred, fh,
localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh,
FMODE_READ|FMODE_WRITE);
if (localio) {
hdr->task.tk_start = ktime_get();