mirror of https://github.com/torvalds/linux.git
NFSv4/flexfiles: Add support for striped layouts
Updates lseg creation path to parse and add striped layouts. Enable support for striped layouts. Limitations: 1. All mirrors must have the same number of stripes. Signed-off-by: Jonathan Curley <jcurley@purestorage.com> Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
This commit is contained in:
parent
8a8e0f5566
commit
20b1d75fb8
|
|
@ -177,18 +177,19 @@ ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
|
static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
|
||||||
const struct nfs4_ff_layout_mirror *m2)
|
const struct nfs4_ff_layout_ds_stripe *dss2)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
if (m1->dss[0].fh_versions_cnt != m2->dss[0].fh_versions_cnt)
|
if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
|
||||||
return false;
|
return false;
|
||||||
for (i = 0; i < m1->dss[0].fh_versions_cnt; i++) {
|
|
||||||
|
for (i = 0; i < dss1->fh_versions_cnt; i++) {
|
||||||
bool found_fh = false;
|
bool found_fh = false;
|
||||||
for (j = 0; j < m2->dss[0].fh_versions_cnt; j++) {
|
for (j = 0; j < dss2->fh_versions_cnt; j++) {
|
||||||
if (nfs_compare_fh(&m1->dss[0].fh_versions[i],
|
if (nfs_compare_fh(&dss1->fh_versions[i],
|
||||||
&m2->dss[0].fh_versions[j]) == 0) {
|
&dss2->fh_versions[j]) == 0) {
|
||||||
found_fh = true;
|
found_fh = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
|
||||||
|
const struct nfs4_ff_layout_mirror *m2)
|
||||||
|
{
|
||||||
|
u32 dss_id;
|
||||||
|
|
||||||
|
if (m1->dss_count != m2->dss_count)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
|
||||||
|
if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
|
||||||
|
const struct nfs4_ff_layout_mirror *m2)
|
||||||
|
{
|
||||||
|
u32 dss_id;
|
||||||
|
|
||||||
|
if (m1->dss_count != m2->dss_count)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
|
||||||
|
if (memcmp(&m1->dss[dss_id].devid,
|
||||||
|
&m2->dss[dss_id].devid,
|
||||||
|
sizeof(m1->dss[dss_id].devid)) != 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static struct nfs4_ff_layout_mirror *
|
static struct nfs4_ff_layout_mirror *
|
||||||
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
|
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
|
||||||
struct nfs4_ff_layout_mirror *mirror)
|
struct nfs4_ff_layout_mirror *mirror)
|
||||||
|
|
@ -209,8 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
|
||||||
|
|
||||||
spin_lock(&inode->i_lock);
|
spin_lock(&inode->i_lock);
|
||||||
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
|
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
|
||||||
if (memcmp(&mirror->dss[0].devid, &pos->dss[0].devid,
|
if (!ff_mirror_match_devid(mirror, pos))
|
||||||
sizeof(pos->dss[0].devid)) != 0)
|
|
||||||
continue;
|
continue;
|
||||||
if (!ff_mirror_match_fh(mirror, pos))
|
if (!ff_mirror_match_fh(mirror, pos))
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -241,13 +273,15 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
|
||||||
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
|
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
|
||||||
{
|
{
|
||||||
struct nfs4_ff_layout_mirror *mirror;
|
struct nfs4_ff_layout_mirror *mirror;
|
||||||
|
u32 dss_id;
|
||||||
|
|
||||||
mirror = kzalloc(sizeof(*mirror), gfp_flags);
|
mirror = kzalloc(sizeof(*mirror), gfp_flags);
|
||||||
if (mirror != NULL) {
|
if (mirror != NULL) {
|
||||||
spin_lock_init(&mirror->lock);
|
spin_lock_init(&mirror->lock);
|
||||||
refcount_set(&mirror->ref, 1);
|
refcount_set(&mirror->ref, 1);
|
||||||
INIT_LIST_HEAD(&mirror->mirrors);
|
INIT_LIST_HEAD(&mirror->mirrors);
|
||||||
nfs_localio_file_init(&mirror->dss[0].nfl);
|
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
|
||||||
|
nfs_localio_file_init(&mirror->dss[dss_id].nfl);
|
||||||
}
|
}
|
||||||
return mirror;
|
return mirror;
|
||||||
}
|
}
|
||||||
|
|
@ -255,17 +289,19 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
|
||||||
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
|
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
|
||||||
{
|
{
|
||||||
const struct cred *cred;
|
const struct cred *cred;
|
||||||
int dss_id = 0;
|
u32 dss_id;
|
||||||
|
|
||||||
ff_layout_remove_mirror(mirror);
|
ff_layout_remove_mirror(mirror);
|
||||||
|
|
||||||
|
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
|
||||||
kfree(mirror->dss[dss_id].fh_versions);
|
kfree(mirror->dss[dss_id].fh_versions);
|
||||||
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
|
|
||||||
cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
|
cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
|
||||||
put_cred(cred);
|
put_cred(cred);
|
||||||
cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
|
cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
|
||||||
put_cred(cred);
|
put_cred(cred);
|
||||||
|
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
|
||||||
nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
|
nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
|
||||||
|
}
|
||||||
|
|
||||||
kfree(mirror->dss);
|
kfree(mirror->dss);
|
||||||
kfree(mirror);
|
kfree(mirror);
|
||||||
|
|
@ -371,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
|
||||||
free_me);
|
free_me);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
|
||||||
|
{
|
||||||
|
u32 dss_id, sum = 0;
|
||||||
|
|
||||||
|
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
|
||||||
|
sum += mirror->dss[dss_id].efficiency;
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
|
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
|
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
|
||||||
for (j = i + 1; j < fls->mirror_array_cnt; j++)
|
for (j = i + 1; j < fls->mirror_array_cnt; j++)
|
||||||
if (fls->mirror_array[i]->dss[0].efficiency <
|
if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
|
||||||
fls->mirror_array[j]->dss[0].efficiency)
|
ff_mirror_efficiency_sum(fls->mirror_array[j]))
|
||||||
swap(fls->mirror_array[i],
|
swap(fls->mirror_array[i],
|
||||||
fls->mirror_array[j]);
|
fls->mirror_array[j]);
|
||||||
}
|
}
|
||||||
|
|
@ -398,6 +444,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
u32 mirror_array_cnt;
|
u32 mirror_array_cnt;
|
||||||
__be32 *p;
|
__be32 *p;
|
||||||
int i, rc;
|
int i, rc;
|
||||||
|
struct nfs4_ff_layout_ds_stripe *dss_info;
|
||||||
|
|
||||||
dprintk("--> %s\n", __func__);
|
dprintk("--> %s\n", __func__);
|
||||||
scratch = folio_alloc(gfp_flags, 0);
|
scratch = folio_alloc(gfp_flags, 0);
|
||||||
|
|
@ -440,17 +487,24 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
kuid_t uid;
|
kuid_t uid;
|
||||||
kgid_t gid;
|
kgid_t gid;
|
||||||
u32 fh_count, id;
|
u32 fh_count, id;
|
||||||
int j, dss_id = 0;
|
int j, dss_id;
|
||||||
|
|
||||||
rc = -EIO;
|
rc = -EIO;
|
||||||
p = xdr_inline_decode(&stream, 4);
|
p = xdr_inline_decode(&stream, 4);
|
||||||
if (!p)
|
if (!p)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
|
|
||||||
|
// Ensure all mirrors have same stripe count.
|
||||||
|
if (dss_count == 0)
|
||||||
dss_count = be32_to_cpup(p);
|
dss_count = be32_to_cpup(p);
|
||||||
|
else if (dss_count != be32_to_cpup(p))
|
||||||
|
goto out_err_free;
|
||||||
|
|
||||||
/* FIXME: allow for striping? */
|
if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
|
||||||
if (dss_count != 1)
|
dss_count == 0)
|
||||||
|
goto out_err_free;
|
||||||
|
|
||||||
|
if (dss_count > 1 && stripe_unit == 0)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
|
|
||||||
fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
|
fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
|
||||||
|
|
@ -464,8 +518,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
|
kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
|
||||||
gfp_flags);
|
gfp_flags);
|
||||||
|
|
||||||
|
for (dss_id = 0; dss_id < dss_count; dss_id++) {
|
||||||
|
dss_info = &fls->mirror_array[i]->dss[dss_id];
|
||||||
|
dss_info->mirror = fls->mirror_array[i];
|
||||||
|
|
||||||
/* deviceid */
|
/* deviceid */
|
||||||
rc = decode_deviceid(&stream, &fls->mirror_array[i]->dss[dss_id].devid);
|
rc = decode_deviceid(&stream, &dss_info->devid);
|
||||||
if (rc)
|
if (rc)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
|
|
||||||
|
|
@ -474,10 +532,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
p = xdr_inline_decode(&stream, 4);
|
p = xdr_inline_decode(&stream, 4);
|
||||||
if (!p)
|
if (!p)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
fls->mirror_array[i]->dss[dss_id].efficiency = be32_to_cpup(p);
|
dss_info->efficiency = be32_to_cpup(p);
|
||||||
|
|
||||||
/* stateid */
|
/* stateid */
|
||||||
rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->dss[dss_id].stateid);
|
rc = decode_pnfs_stateid(&stream, &dss_info->stateid);
|
||||||
if (rc)
|
if (rc)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
|
|
||||||
|
|
@ -488,22 +546,22 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
fh_count = be32_to_cpup(p);
|
fh_count = be32_to_cpup(p);
|
||||||
|
|
||||||
fls->mirror_array[i]->dss[dss_id].fh_versions =
|
dss_info->fh_versions =
|
||||||
kcalloc(fh_count, sizeof(struct nfs_fh),
|
kcalloc(fh_count, sizeof(struct nfs_fh),
|
||||||
gfp_flags);
|
gfp_flags);
|
||||||
if (fls->mirror_array[i]->dss[dss_id].fh_versions == NULL) {
|
if (dss_info->fh_versions == NULL) {
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < fh_count; j++) {
|
for (j = 0; j < fh_count; j++) {
|
||||||
rc = decode_nfs_fh(&stream,
|
rc = decode_nfs_fh(&stream,
|
||||||
&fls->mirror_array[i]->dss[dss_id].fh_versions[j]);
|
&dss_info->fh_versions[j]);
|
||||||
if (rc)
|
if (rc)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
fls->mirror_array[i]->dss[dss_id].fh_versions_cnt = fh_count;
|
dss_info->fh_versions_cnt = fh_count;
|
||||||
|
|
||||||
/* user */
|
/* user */
|
||||||
rc = decode_name(&stream, &id);
|
rc = decode_name(&stream, &id);
|
||||||
|
|
@ -523,6 +581,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
kcred = prepare_kernel_cred(&init_task);
|
kcred = prepare_kernel_cred(&init_task);
|
||||||
else {
|
else {
|
||||||
unsigned int nofs_flags = memalloc_nofs_save();
|
unsigned int nofs_flags = memalloc_nofs_save();
|
||||||
|
|
||||||
kcred = prepare_kernel_cred(&init_task);
|
kcred = prepare_kernel_cred(&init_task);
|
||||||
memalloc_nofs_restore(nofs_flags);
|
memalloc_nofs_restore(nofs_flags);
|
||||||
}
|
}
|
||||||
|
|
@ -534,21 +593,25 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
|
||||||
cred = RCU_INITIALIZER(kcred);
|
cred = RCU_INITIALIZER(kcred);
|
||||||
|
|
||||||
if (lgr->range.iomode == IOMODE_READ)
|
if (lgr->range.iomode == IOMODE_READ)
|
||||||
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
|
rcu_assign_pointer(dss_info->ro_cred, cred);
|
||||||
else
|
else
|
||||||
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
|
rcu_assign_pointer(dss_info->rw_cred, cred);
|
||||||
|
}
|
||||||
|
|
||||||
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
|
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
|
||||||
if (mirror != fls->mirror_array[i]) {
|
if (mirror != fls->mirror_array[i]) {
|
||||||
|
for (dss_id = 0; dss_id < dss_count; dss_id++) {
|
||||||
|
dss_info = &fls->mirror_array[i]->dss[dss_id];
|
||||||
/* swap cred ptrs so free_mirror will clean up old */
|
/* swap cred ptrs so free_mirror will clean up old */
|
||||||
if (lgr->range.iomode == IOMODE_READ) {
|
if (lgr->range.iomode == IOMODE_READ) {
|
||||||
cred = xchg(&mirror->dss[dss_id].ro_cred,
|
cred = xchg(&mirror->dss[dss_id].ro_cred,
|
||||||
fls->mirror_array[i]->dss[dss_id].ro_cred);
|
dss_info->ro_cred);
|
||||||
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
|
rcu_assign_pointer(dss_info->ro_cred, cred);
|
||||||
} else {
|
} else {
|
||||||
cred = xchg(&mirror->dss[dss_id].rw_cred,
|
cred = xchg(&mirror->dss[dss_id].rw_cred,
|
||||||
fls->mirror_array[i]->dss[dss_id].rw_cred);
|
dss_info->rw_cred);
|
||||||
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
|
rcu_assign_pointer(dss_info->rw_cred, cred);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ff_layout_free_mirror(fls->mirror_array[i]);
|
ff_layout_free_mirror(fls->mirror_array[i]);
|
||||||
fls->mirror_array[i] = mirror;
|
fls->mirror_array[i] = mirror;
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,8 @@
|
||||||
* due to network error etc. */
|
* due to network error etc. */
|
||||||
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
|
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
|
||||||
|
|
||||||
|
#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096
|
||||||
|
|
||||||
/* LAYOUTSTATS report interval in ms */
|
/* LAYOUTSTATS report interval in ms */
|
||||||
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
|
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
|
||||||
#define FF_LAYOUTSTATS_MAXDEV 4
|
#define FF_LAYOUTSTATS_MAXDEV 4
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue