xfs: fixes for v6.18-rc4

Signed-off-by: Carlos Maiolino <cem@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iJUEABMJAB0WIQSmtYVZ/MfVMGUq1GNcsMJ8RxYuYwUCaQXOZAAKCRBcsMJ8RxYu
 Y43FAX9gZLSS+6U9NtjQXbwCzhKg0fPWMqt2eEhLxPF2bpNO7FmgecQKg3wZi0XC
 gkhOqHYBfikmW5D/rUm0OMY7mBmyIMaSm0r77u82up08YZnc40LnMavH4+HAm15h
 lhT/MhpJKQ==
 =BJng
 -----END PGP SIGNATURE-----

Merge tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:
 "Just a single bug fix (and documentation for the issue)"

* tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: document another racy GC case in xfs_zoned_map_extent
  xfs: prevent gc from picking the same zone twice
This commit is contained in:
Linus Torvalds 2025-11-01 10:04:35 -07:00
commit 9db0d7c5a5
3 changed files with 41 additions and 0 deletions

View File

@ -50,6 +50,12 @@ struct xfs_rtgroup {
uint8_t *rtg_rsum_cache;
struct xfs_open_zone *rtg_open_zone;
};
/*
* Count of outstanding GC operations for zoned XFS. Any RTG with a
* non-zero rtg_gccount will not be picked as new GC victim.
*/
atomic_t rtg_gccount;
};
/*

View File

@ -246,6 +246,14 @@ xfs_zoned_map_extent(
* If a data write raced with this GC write, keep the existing data in
* the data fork, mark our newly written GC extent as reclaimable, then
* move on to the next extent.
*
* Note that this can also happen when racing with operations that do
* not actually invalidate the data, but just move it to a different
* inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the
* inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the
* data was just moved around, GC fails to free the zone, but the zone
* becomes a GC candidate again as soon as all previous GC I/O has
* finished and these blocks will be moved out eventually.
*/
if (old_startblock != NULLFSBLOCK &&
old_startblock != data.br_startblock)

View File

@ -114,6 +114,8 @@ struct xfs_gc_bio {
/* Open Zone being written to */
struct xfs_open_zone *oz;
struct xfs_rtgroup *victim_rtg;
/* Bio used for reads and writes, including the bvec used by it */
struct bio_vec bv;
struct bio bio; /* must be last */
@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
iter->rec_count = 0;
iter->rec_idx = 0;
iter->victim_rtg = victim_rtg;
atomic_inc(&victim_rtg->rtg_gccount);
}
/*
@ -362,6 +365,7 @@ xfs_zone_gc_query(
return 0;
done:
atomic_dec(&iter->victim_rtg->rtg_gccount);
xfs_rtgroup_rele(iter->victim_rtg);
iter->victim_rtg = NULL;
return 0;
@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
if (!rtg)
continue;
/*
* If the zone is already undergoing GC, don't pick it again.
*
* This prevents us from picking one of the zones for which we
* already submitted GC I/O, but for which the remapping hasn't
* concluded yet. This won't cause data corruption, but
* increases write amplification and slows down GC, so this is
* a bad thing.
*/
if (atomic_read(&rtg->rtg_gccount)) {
xfs_rtgroup_rele(rtg);
continue;
}
/* skip zones that are just waiting for a reset */
if (rtg_rmap(rtg)->i_used_blocks == 0 ||
rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk(
chunk->scratch = &data->scratch[data->scratch_idx];
chunk->data = data;
chunk->oz = oz;
chunk->victim_rtg = iter->victim_rtg;
atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
atomic_inc(&chunk->victim_rtg->rtg_gccount);
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
bio->bi_end_io = xfs_zone_gc_end_io;
@ -710,6 +731,8 @@ static void
xfs_zone_gc_free_chunk(
struct xfs_gc_bio *chunk)
{
atomic_dec(&chunk->victim_rtg->rtg_gccount);
xfs_rtgroup_rele(chunk->victim_rtg);
list_del(&chunk->entry);
xfs_open_zone_put(chunk->oz);
xfs_irele(chunk->ip);
@ -770,6 +793,10 @@ xfs_zone_gc_split_write(
split_chunk->oz = chunk->oz;
atomic_inc(&chunk->oz->oz_ref);
split_chunk->victim_rtg = chunk->victim_rtg;
atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
atomic_inc(&chunk->victim_rtg->rtg_gccount);
chunk->offset += split_len;
chunk->len -= split_len;
chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);