ocfs2: fix the issue with discontiguous allocation in the global_bitmap

commit 4eb7b93e03 ("ocfs2: improve write IO performance when
fragmentation is high") introduced another regression.

The following ocfs2-test case can trigger this issue:
> discontig_runner.sh => activate_discontig_bg.sh => resv_unwritten:
> ${RESV_UNWRITTEN_BIN} -f ${WORK_PLACE}/large_testfile -s 0 -l \
> $((${FILE_MAJOR_SIZE_M}*1024*1024))

In my env, test disk size (by "fdisk -l <dev>"):
> 53687091200 bytes, 104857600 sectors.

Above command is:
> /usr/local/ocfs2-test/bin/resv_unwritten -f \
> /mnt/ocfs2/ocfs2-activate-discontig-bg-dir/large_testfile -s 0 -l \
> 53187969024

Error log:
> [*] Reserve 50724M space for a LARGE file, reserve 200M space for future test.
> ioctl error 28: "No space left on device"
> resv allocation failed Unknown error -1
> reserve unwritten region from 0 to 53187969024.

Call flow:
__ocfs2_change_file_space //by ioctl OCFS2_IOC_RESVSP64
 ocfs2_allocate_unwritten_extents //start:0 len:53187969024
  while()
   + ocfs2_get_clusters //cpos:0, alloc_size:1623168 (cluster number)
   + ocfs2_extend_allocation
     + ocfs2_lock_allocators
     |  + choose OCFS2_AC_USE_MAIN & ocfs2_cluster_group_search
     |
     + ocfs2_add_inode_data
        ocfs2_add_clusters_in_btree
         __ocfs2_claim_clusters
          ocfs2_claim_suballoc_bits
          + During the allocation of the final part of the large file
	    (after ~47GB), no chain had the required contiguous
            bits_wanted. Consequently, the allocation failed.

How to fix:
When OCFS2 is encountering fragmented allocation, the file system should
stop attempting bits_wanted contiguous allocation and instead provide the
largest available contiguous free bits from the cluster groups.

Link: https://lkml.kernel.org/r/20250414060125.19938-2-heming.zhao@suse.com
Fixes: 4eb7b93e03 ("ocfs2: improve write IO performance when fragmentation is high")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Heming Zhao 2025-04-14 14:01:23 +08:00 committed by Andrew Morton
parent df84d2fd35
commit bd1261b16d
2 changed files with 33 additions and 6 deletions

View File

@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
ac, cl); ac, cl);
if (PTR_ERR(bg_bh) == -ENOSPC) if (PTR_ERR(bg_bh) == -ENOSPC) {
ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
bg_bh = ocfs2_block_group_alloc_discontig(handle, bg_bh = ocfs2_block_group_alloc_discontig(handle,
alloc_inode, alloc_inode,
ac, cl); ac, cl);
}
if (IS_ERR(bg_bh)) { if (IS_ERR(bg_bh)) {
status = PTR_ERR(bg_bh); status = PTR_ERR(bg_bh);
bg_bh = NULL; bg_bh = NULL;
@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
{ {
int status; int status;
u16 chain; u16 chain;
u32 contig_bits;
u64 next_group; u64 next_group;
struct inode *alloc_inode = ac->ac_inode; struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL; struct buffer_head *group_bh = NULL;
@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
status = -ENOSPC; status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use /* for now, the chain search is a bit simplistic. We just use
* the 1st group with any empty bits. */ * the 1st group with any empty bits. */
while ((status = ac->ac_group_search(alloc_inode, group_bh, while (1) {
if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) {
contig_bits = le16_to_cpu(bg->bg_contig_free_bits);
if (!contig_bits)
contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
le16_to_cpu(bg->bg_bits), 0);
if (bits_wanted > contig_bits && contig_bits >= min_bits)
bits_wanted = contig_bits;
}
status = ac->ac_group_search(alloc_inode, group_bh,
bits_wanted, min_bits, bits_wanted, min_bits,
ac->ac_max_block, ac->ac_max_block, res);
res)) == -ENOSPC) { if (status != -ENOSPC)
break;
if (!bg->bg_next_group) if (!bg->bg_next_group)
break; break;
@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl); victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim; ac->ac_chain = victim;
search:
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left); res, &bits_left);
if (!status) { if (!status) {
@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
} }
} }
/* Chains can't supply the bits_wanted contiguous space.
* We should switch to using every single bit when allocating
* from the global bitmap. */
if (i == le16_to_cpu(cl->cl_next_free_rec) &&
status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) {
ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
ac->ac_chain = victim;
goto search;
}
set_hint: set_hint:
if (status != -ENOSPC) { if (status != -ENOSPC) {
/* If the next search of this group is not likely to /* If the next search of this group is not likely to
@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
&& ac->ac_which != OCFS2_AC_USE_MAIN); && ac->ac_which != OCFS2_AC_USE_MAIN
&& ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
if (ac->ac_which == OCFS2_AC_USE_LOCAL) { if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
WARN_ON(min_clusters > 1); WARN_ON(min_clusters > 1);

View File

@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
#define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_MAIN 2
#define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_INODE 3
#define OCFS2_AC_USE_META 4 #define OCFS2_AC_USE_META 4
#define OCFS2_AC_USE_MAIN_DISCONTIG 5
u32 ac_which; u32 ac_which;
/* these are used by the chain search */ /* these are used by the chain search */