xfs: fixes for v6.18-rc4

Signed-off-by: Carlos Maiolino <cem@kernel.org> -----BEGIN PGP SIGNATURE----- iJUEABMJAB0WIQSmtYVZ/MfVMGUq1GNcsMJ8RxYuYwUCaQXOZAAKCRBcsMJ8RxYu Y43FAX9gZLSS+6U9NtjQXbwCzhKg0fPWMqt2eEhLxPF2bpNO7FmgecQKg3wZi0XC gkhOqHYBfikmW5D/rUm0OMY7mBmyIMaSm0r77u82up08YZnc40LnMavH4+HAm15h lhT/MhpJKQ== =BJng -----END PGP SIGNATURE----- Merge tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs fixes from Carlos Maiolino: "Just a single bug fix (and documentation for the issue)" * tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: document another racy GC case in xfs_zoned_map_extent xfs: prevent gc from picking the same zone twice
2025-11-01 10:04:35 -07:00 · 2025-11-01 10:04:35 -07:00 · 9db0d7c5a5
parent cb7f9fc372 0db22d7ee4
commit 9db0d7c5a5
3 changed files with 41 additions and 0 deletions
--- a/fs/xfs/libxfs/xfs_rtgroup.h
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@ -50,6 +50,12 @@ struct xfs_rtgroup {
 		uint8_t			*rtg_rsum_cache;
 		struct xfs_open_zone	*rtg_open_zone;
 	};
+
+	/*
+	 * Count of outstanding GC operations for zoned XFS.  Any RTG with a
+	 * non-zero rtg_gccount will not be picked as new GC victim.
+	 */
+	atomic_t		rtg_gccount;
 };

 /*
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@ -246,6 +246,14 @@ xfs_zoned_map_extent(
 	 * If a data write raced with this GC write, keep the existing data in
 	 * the data fork, mark our newly written GC extent as reclaimable, then
 	 * move on to the next extent.
+	 *
+	 * Note that this can also happen when racing with operations that do
+	 * not actually invalidate the data, but just move it to a different
+	 * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the
+	 * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE).  If the
+	 * data was just moved around, GC fails to free the zone, but the zone
+	 * becomes a GC candidate again as soon as all previous GC I/O has
+	 * finished and these blocks will be moved out eventually.
 	 */
 	if (old_startblock != NULLFSBLOCK &&
 	    old_startblock != data.br_startblock)
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@ -114,6 +114,8 @@ struct xfs_gc_bio {
 	/* Open Zone being written to */
 	struct xfs_open_zone		*oz;

+	struct xfs_rtgroup		*victim_rtg;
+
 	/* Bio used for reads and writes, including the bvec used by it */
 	struct bio_vec			bv;
 	struct bio			bio;	/* must be last */
@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
 	iter->rec_count = 0;
 	iter->rec_idx = 0;
 	iter->victim_rtg = victim_rtg;
+	atomic_inc(&victim_rtg->rtg_gccount);
 }

 /*
@ -362,6 +365,7 @@ xfs_zone_gc_query(

 	return 0;
 done:
+	atomic_dec(&iter->victim_rtg->rtg_gccount);
 	xfs_rtgroup_rele(iter->victim_rtg);
 	iter->victim_rtg = NULL;
 	return 0;
@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
 		if (!rtg)
 			continue;

+		/*
+		 * If the zone is already undergoing GC, don't pick it again.
+		 *
+		 * This prevents us from picking one of the zones for which we
+		 * already submitted GC I/O, but for which the remapping hasn't
+		 * concluded yet.  This won't cause data corruption, but
+		 * increases write amplification and slows down GC, so this is
+		 * a bad thing.
+		 */
+		if (atomic_read(&rtg->rtg_gccount)) {
+			xfs_rtgroup_rele(rtg);
+			continue;
+		}
+
 		/* skip zones that are just waiting for a reset */
 		if (rtg_rmap(rtg)->i_used_blocks == 0 ||
 		    rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk(
 	chunk->scratch = &data->scratch[data->scratch_idx];
 	chunk->data = data;
 	chunk->oz = oz;
+	chunk->victim_rtg = iter->victim_rtg;
+	atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+	atomic_inc(&chunk->victim_rtg->rtg_gccount);

 	bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
 	bio->bi_end_io = xfs_zone_gc_end_io;
@ -710,6 +731,8 @@ static void
 xfs_zone_gc_free_chunk(
 	struct xfs_gc_bio	*chunk)
 {
+	atomic_dec(&chunk->victim_rtg->rtg_gccount);
+	xfs_rtgroup_rele(chunk->victim_rtg);
 	list_del(&chunk->entry);
 	xfs_open_zone_put(chunk->oz);
 	xfs_irele(chunk->ip);
@ -770,6 +793,10 @@ xfs_zone_gc_split_write(
 	split_chunk->oz = chunk->oz;
 	atomic_inc(&chunk->oz->oz_ref);

+	split_chunk->victim_rtg = chunk->victim_rtg;
+	atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+	atomic_inc(&chunk->victim_rtg->rtg_gccount);
+
 	chunk->offset += split_len;
 	chunk->len -= split_len;
 	chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);