NFSv4/flexfiles: Add support for striped layouts

Updates lseg creation path to parse and add striped layouts. Enable
support for striped layouts.

Limitations:

1. All mirrors must have the same number of stripes.

Signed-off-by: Jonathan Curley <jcurley@purestorage.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
This commit is contained in:
Jonathan Curley 2025-09-24 16:20:50 +00:00 committed by Anna Schumaker
parent 8a8e0f5566
commit 20b1d75fb8
2 changed files with 167 additions and 102 deletions

View File

@ -177,18 +177,19 @@ ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
#endif
}
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
const struct nfs4_ff_layout_ds_stripe *dss2)
{
int i, j;
if (m1->dss[0].fh_versions_cnt != m2->dss[0].fh_versions_cnt)
if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
return false;
for (i = 0; i < m1->dss[0].fh_versions_cnt; i++) {
for (i = 0; i < dss1->fh_versions_cnt; i++) {
bool found_fh = false;
for (j = 0; j < m2->dss[0].fh_versions_cnt; j++) {
if (nfs_compare_fh(&m1->dss[0].fh_versions[i],
&m2->dss[0].fh_versions[j]) == 0) {
for (j = 0; j < dss2->fh_versions_cnt; j++) {
if (nfs_compare_fh(&dss1->fh_versions[i],
&dss2->fh_versions[j]) == 0) {
found_fh = true;
break;
}
@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
return true;
}
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
u32 dss_id;
if (m1->dss_count != m2->dss_count)
return false;
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
return false;
return true;
}
static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
u32 dss_id;
if (m1->dss_count != m2->dss_count)
return false;
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
if (memcmp(&m1->dss[dss_id].devid,
&m2->dss[dss_id].devid,
sizeof(m1->dss[dss_id].devid)) != 0)
return false;
return true;
}
static struct nfs4_ff_layout_mirror *
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
struct nfs4_ff_layout_mirror *mirror)
@ -209,8 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
spin_lock(&inode->i_lock);
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
if (memcmp(&mirror->dss[0].devid, &pos->dss[0].devid,
sizeof(pos->dss[0].devid)) != 0)
if (!ff_mirror_match_devid(mirror, pos))
continue;
if (!ff_mirror_match_fh(mirror, pos))
continue;
@ -241,13 +273,15 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
{
struct nfs4_ff_layout_mirror *mirror;
u32 dss_id;
mirror = kzalloc(sizeof(*mirror), gfp_flags);
if (mirror != NULL) {
spin_lock_init(&mirror->lock);
refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors);
nfs_localio_file_init(&mirror->dss[0].nfl);
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
nfs_localio_file_init(&mirror->dss[dss_id].nfl);
}
return mirror;
}
@ -255,17 +289,19 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
const struct cred *cred;
int dss_id = 0;
u32 dss_id;
ff_layout_remove_mirror(mirror);
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
kfree(mirror->dss[dss_id].fh_versions);
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
put_cred(cred);
cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
put_cred(cred);
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
}
kfree(mirror->dss);
kfree(mirror);
@ -371,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
free_me);
}
static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
{
u32 dss_id, sum = 0;
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
sum += mirror->dss[dss_id].efficiency;
return sum;
}
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{
int i, j;
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
for (j = i + 1; j < fls->mirror_array_cnt; j++)
if (fls->mirror_array[i]->dss[0].efficiency <
fls->mirror_array[j]->dss[0].efficiency)
if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
ff_mirror_efficiency_sum(fls->mirror_array[j]))
swap(fls->mirror_array[i],
fls->mirror_array[j]);
}
@ -398,6 +444,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
u32 mirror_array_cnt;
__be32 *p;
int i, rc;
struct nfs4_ff_layout_ds_stripe *dss_info;
dprintk("--> %s\n", __func__);
scratch = folio_alloc(gfp_flags, 0);
@ -440,17 +487,24 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
kuid_t uid;
kgid_t gid;
u32 fh_count, id;
int j, dss_id = 0;
int j, dss_id;
rc = -EIO;
p = xdr_inline_decode(&stream, 4);
if (!p)
goto out_err_free;
// Ensure all mirrors have same stripe count.
if (dss_count == 0)
dss_count = be32_to_cpup(p);
else if (dss_count != be32_to_cpup(p))
goto out_err_free;
/* FIXME: allow for striping? */
if (dss_count != 1)
if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
dss_count == 0)
goto out_err_free;
if (dss_count > 1 && stripe_unit == 0)
goto out_err_free;
fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
@ -464,8 +518,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
gfp_flags);
for (dss_id = 0; dss_id < dss_count; dss_id++) {
dss_info = &fls->mirror_array[i]->dss[dss_id];
dss_info->mirror = fls->mirror_array[i];
/* deviceid */
rc = decode_deviceid(&stream, &fls->mirror_array[i]->dss[dss_id].devid);
rc = decode_deviceid(&stream, &dss_info->devid);
if (rc)
goto out_err_free;
@ -474,10 +532,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
p = xdr_inline_decode(&stream, 4);
if (!p)
goto out_err_free;
fls->mirror_array[i]->dss[dss_id].efficiency = be32_to_cpup(p);
dss_info->efficiency = be32_to_cpup(p);
/* stateid */
rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->dss[dss_id].stateid);
rc = decode_pnfs_stateid(&stream, &dss_info->stateid);
if (rc)
goto out_err_free;
@ -488,22 +546,22 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free;
fh_count = be32_to_cpup(p);
fls->mirror_array[i]->dss[dss_id].fh_versions =
dss_info->fh_versions =
kcalloc(fh_count, sizeof(struct nfs_fh),
gfp_flags);
if (fls->mirror_array[i]->dss[dss_id].fh_versions == NULL) {
if (dss_info->fh_versions == NULL) {
rc = -ENOMEM;
goto out_err_free;
}
for (j = 0; j < fh_count; j++) {
rc = decode_nfs_fh(&stream,
&fls->mirror_array[i]->dss[dss_id].fh_versions[j]);
&dss_info->fh_versions[j]);
if (rc)
goto out_err_free;
}
fls->mirror_array[i]->dss[dss_id].fh_versions_cnt = fh_count;
dss_info->fh_versions_cnt = fh_count;
/* user */
rc = decode_name(&stream, &id);
@ -523,6 +581,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
kcred = prepare_kernel_cred(&init_task);
else {
unsigned int nofs_flags = memalloc_nofs_save();
kcred = prepare_kernel_cred(&init_task);
memalloc_nofs_restore(nofs_flags);
}
@ -534,21 +593,25 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
cred = RCU_INITIALIZER(kcred);
if (lgr->range.iomode == IOMODE_READ)
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
rcu_assign_pointer(dss_info->ro_cred, cred);
else
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
rcu_assign_pointer(dss_info->rw_cred, cred);
}
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) {
for (dss_id = 0; dss_id < dss_count; dss_id++) {
dss_info = &fls->mirror_array[i]->dss[dss_id];
/* swap cred ptrs so free_mirror will clean up old */
if (lgr->range.iomode == IOMODE_READ) {
cred = xchg(&mirror->dss[dss_id].ro_cred,
fls->mirror_array[i]->dss[dss_id].ro_cred);
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
dss_info->ro_cred);
rcu_assign_pointer(dss_info->ro_cred, cred);
} else {
cred = xchg(&mirror->dss[dss_id].rw_cred,
fls->mirror_array[i]->dss[dss_id].rw_cred);
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
dss_info->rw_cred);
rcu_assign_pointer(dss_info->rw_cred, cred);
}
}
ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror;

View File

@ -21,6 +21,8 @@
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096
/* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
#define FF_LAYOUTSTATS_MAXDEV 4