NFSv4/flexfiles: Add support for striped layouts

Updates lseg creation path to parse and add striped layouts. Enable
support for striped layouts.

Limitations:

1. All mirrors must have the same number of stripes.

Signed-off-by: Jonathan Curley <jcurley@purestorage.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
This commit is contained in:
Jonathan Curley 2025-09-24 16:20:50 +00:00 committed by Anna Schumaker
parent 8a8e0f5566
commit 20b1d75fb8
2 changed files with 167 additions and 102 deletions

View File

@ -177,18 +177,19 @@ ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
#endif #endif
} }
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
const struct nfs4_ff_layout_mirror *m2) const struct nfs4_ff_layout_ds_stripe *dss2)
{ {
int i, j; int i, j;
if (m1->dss[0].fh_versions_cnt != m2->dss[0].fh_versions_cnt) if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
return false; return false;
for (i = 0; i < m1->dss[0].fh_versions_cnt; i++) {
for (i = 0; i < dss1->fh_versions_cnt; i++) {
bool found_fh = false; bool found_fh = false;
for (j = 0; j < m2->dss[0].fh_versions_cnt; j++) { for (j = 0; j < dss2->fh_versions_cnt; j++) {
if (nfs_compare_fh(&m1->dss[0].fh_versions[i], if (nfs_compare_fh(&dss1->fh_versions[i],
&m2->dss[0].fh_versions[j]) == 0) { &dss2->fh_versions[j]) == 0) {
found_fh = true; found_fh = true;
break; break;
} }
@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
return true; return true;
} }
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
u32 dss_id;
if (m1->dss_count != m2->dss_count)
return false;
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
return false;
return true;
}
static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
u32 dss_id;
if (m1->dss_count != m2->dss_count)
return false;
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
if (memcmp(&m1->dss[dss_id].devid,
&m2->dss[dss_id].devid,
sizeof(m1->dss[dss_id].devid)) != 0)
return false;
return true;
}
static struct nfs4_ff_layout_mirror * static struct nfs4_ff_layout_mirror *
ff_layout_add_mirror(struct pnfs_layout_hdr *lo, ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
struct nfs4_ff_layout_mirror *mirror) struct nfs4_ff_layout_mirror *mirror)
@ -209,8 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
if (memcmp(&mirror->dss[0].devid, &pos->dss[0].devid, if (!ff_mirror_match_devid(mirror, pos))
sizeof(pos->dss[0].devid)) != 0)
continue; continue;
if (!ff_mirror_match_fh(mirror, pos)) if (!ff_mirror_match_fh(mirror, pos))
continue; continue;
@ -241,13 +273,15 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
{ {
struct nfs4_ff_layout_mirror *mirror; struct nfs4_ff_layout_mirror *mirror;
u32 dss_id;
mirror = kzalloc(sizeof(*mirror), gfp_flags); mirror = kzalloc(sizeof(*mirror), gfp_flags);
if (mirror != NULL) { if (mirror != NULL) {
spin_lock_init(&mirror->lock); spin_lock_init(&mirror->lock);
refcount_set(&mirror->ref, 1); refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors); INIT_LIST_HEAD(&mirror->mirrors);
nfs_localio_file_init(&mirror->dss[0].nfl); for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
nfs_localio_file_init(&mirror->dss[dss_id].nfl);
} }
return mirror; return mirror;
} }
@ -255,17 +289,19 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{ {
const struct cred *cred; const struct cred *cred;
int dss_id = 0; u32 dss_id;
ff_layout_remove_mirror(mirror); ff_layout_remove_mirror(mirror);
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
kfree(mirror->dss[dss_id].fh_versions); kfree(mirror->dss[dss_id].fh_versions);
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred); cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
put_cred(cred); put_cred(cred);
cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred); cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
put_cred(cred); put_cred(cred);
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds); nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
}
kfree(mirror->dss); kfree(mirror->dss);
kfree(mirror); kfree(mirror);
@ -371,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
free_me); free_me);
} }
static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
{
u32 dss_id, sum = 0;
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
sum += mirror->dss[dss_id].efficiency;
return sum;
}
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{ {
int i, j; int i, j;
for (i = 0; i < fls->mirror_array_cnt - 1; i++) { for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
for (j = i + 1; j < fls->mirror_array_cnt; j++) for (j = i + 1; j < fls->mirror_array_cnt; j++)
if (fls->mirror_array[i]->dss[0].efficiency < if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
fls->mirror_array[j]->dss[0].efficiency) ff_mirror_efficiency_sum(fls->mirror_array[j]))
swap(fls->mirror_array[i], swap(fls->mirror_array[i],
fls->mirror_array[j]); fls->mirror_array[j]);
} }
@ -398,6 +444,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
u32 mirror_array_cnt; u32 mirror_array_cnt;
__be32 *p; __be32 *p;
int i, rc; int i, rc;
struct nfs4_ff_layout_ds_stripe *dss_info;
dprintk("--> %s\n", __func__); dprintk("--> %s\n", __func__);
scratch = folio_alloc(gfp_flags, 0); scratch = folio_alloc(gfp_flags, 0);
@ -440,17 +487,24 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
kuid_t uid; kuid_t uid;
kgid_t gid; kgid_t gid;
u32 fh_count, id; u32 fh_count, id;
int j, dss_id = 0; int j, dss_id;
rc = -EIO; rc = -EIO;
p = xdr_inline_decode(&stream, 4); p = xdr_inline_decode(&stream, 4);
if (!p) if (!p)
goto out_err_free; goto out_err_free;
// Ensure all mirrors have same stripe count.
if (dss_count == 0)
dss_count = be32_to_cpup(p); dss_count = be32_to_cpup(p);
else if (dss_count != be32_to_cpup(p))
goto out_err_free;
/* FIXME: allow for striping? */ if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
if (dss_count != 1) dss_count == 0)
goto out_err_free;
if (dss_count > 1 && stripe_unit == 0)
goto out_err_free; goto out_err_free;
fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
@ -464,8 +518,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
gfp_flags); gfp_flags);
for (dss_id = 0; dss_id < dss_count; dss_id++) {
dss_info = &fls->mirror_array[i]->dss[dss_id];
dss_info->mirror = fls->mirror_array[i];
/* deviceid */ /* deviceid */
rc = decode_deviceid(&stream, &fls->mirror_array[i]->dss[dss_id].devid); rc = decode_deviceid(&stream, &dss_info->devid);
if (rc) if (rc)
goto out_err_free; goto out_err_free;
@ -474,10 +532,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
p = xdr_inline_decode(&stream, 4); p = xdr_inline_decode(&stream, 4);
if (!p) if (!p)
goto out_err_free; goto out_err_free;
fls->mirror_array[i]->dss[dss_id].efficiency = be32_to_cpup(p); dss_info->efficiency = be32_to_cpup(p);
/* stateid */ /* stateid */
rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->dss[dss_id].stateid); rc = decode_pnfs_stateid(&stream, &dss_info->stateid);
if (rc) if (rc)
goto out_err_free; goto out_err_free;
@ -488,22 +546,22 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free; goto out_err_free;
fh_count = be32_to_cpup(p); fh_count = be32_to_cpup(p);
fls->mirror_array[i]->dss[dss_id].fh_versions = dss_info->fh_versions =
kcalloc(fh_count, sizeof(struct nfs_fh), kcalloc(fh_count, sizeof(struct nfs_fh),
gfp_flags); gfp_flags);
if (fls->mirror_array[i]->dss[dss_id].fh_versions == NULL) { if (dss_info->fh_versions == NULL) {
rc = -ENOMEM; rc = -ENOMEM;
goto out_err_free; goto out_err_free;
} }
for (j = 0; j < fh_count; j++) { for (j = 0; j < fh_count; j++) {
rc = decode_nfs_fh(&stream, rc = decode_nfs_fh(&stream,
&fls->mirror_array[i]->dss[dss_id].fh_versions[j]); &dss_info->fh_versions[j]);
if (rc) if (rc)
goto out_err_free; goto out_err_free;
} }
fls->mirror_array[i]->dss[dss_id].fh_versions_cnt = fh_count; dss_info->fh_versions_cnt = fh_count;
/* user */ /* user */
rc = decode_name(&stream, &id); rc = decode_name(&stream, &id);
@ -523,6 +581,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
kcred = prepare_kernel_cred(&init_task); kcred = prepare_kernel_cred(&init_task);
else { else {
unsigned int nofs_flags = memalloc_nofs_save(); unsigned int nofs_flags = memalloc_nofs_save();
kcred = prepare_kernel_cred(&init_task); kcred = prepare_kernel_cred(&init_task);
memalloc_nofs_restore(nofs_flags); memalloc_nofs_restore(nofs_flags);
} }
@ -534,21 +593,25 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
cred = RCU_INITIALIZER(kcred); cred = RCU_INITIALIZER(kcred);
if (lgr->range.iomode == IOMODE_READ) if (lgr->range.iomode == IOMODE_READ)
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred); rcu_assign_pointer(dss_info->ro_cred, cred);
else else
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred); rcu_assign_pointer(dss_info->rw_cred, cred);
}
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) { if (mirror != fls->mirror_array[i]) {
for (dss_id = 0; dss_id < dss_count; dss_id++) {
dss_info = &fls->mirror_array[i]->dss[dss_id];
/* swap cred ptrs so free_mirror will clean up old */ /* swap cred ptrs so free_mirror will clean up old */
if (lgr->range.iomode == IOMODE_READ) { if (lgr->range.iomode == IOMODE_READ) {
cred = xchg(&mirror->dss[dss_id].ro_cred, cred = xchg(&mirror->dss[dss_id].ro_cred,
fls->mirror_array[i]->dss[dss_id].ro_cred); dss_info->ro_cred);
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred); rcu_assign_pointer(dss_info->ro_cred, cred);
} else { } else {
cred = xchg(&mirror->dss[dss_id].rw_cred, cred = xchg(&mirror->dss[dss_id].rw_cred,
fls->mirror_array[i]->dss[dss_id].rw_cred); dss_info->rw_cred);
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred); rcu_assign_pointer(dss_info->rw_cred, cred);
}
} }
ff_layout_free_mirror(fls->mirror_array[i]); ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror; fls->mirror_array[i] = mirror;

View File

@ -21,6 +21,8 @@
* due to network error etc. */ * due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096
/* LAYOUTSTATS report interval in ms */ /* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
#define FF_LAYOUTSTATS_MAXDEV 4 #define FF_LAYOUTSTATS_MAXDEV 4