mm/migrate: fix sleep in atomic for large folios and buffer heads

The large folio + buffer head noref migration scenarios are
being naughty and blocking while holding a spinlock.

As a consequence of the pagecache lookup path taking the
folio lock this serializes against migration paths, so
they can wait for each other. For the private_lock
atomic case, a new BH_Migrate flag is introduced which
enables the lookup to bail.

This allows the critical region of the private_lock on
the migration path to be reduced to the way it was before
ebdf4de564 ("mm: migrate: fix reference  check race
between __find_get_block() and migration"), that is covering
the count checks.

The scope is always noref migration.

Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: syzbot+f3c6fda1297c748a7076@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/oe-lkp/202503101536.27099c77-lkp@intel.com
Fixes: 3c20917120 ("block/bdev: enable large folio support for large logical block sizes")
Reviewed-by: Jan Kara <jack@suse.cz>
Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Link: https://kdevops.org/ext4/v6.15-rc2.html # [0]
Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1]
Link: https://lore.kernel.org/20250418015921.132400-8-dave@stgolabs.net
Tested-by: kdevops@lists.linux.dev # [0] [1]
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Davidlohr Bueso 2025-04-17 18:59:21 -07:00 committed by Christian Brauner
parent 6e8f57fd09
commit 2d900efff9
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
4 changed files with 19 additions and 5 deletions

View File

@ -207,6 +207,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic)
head = folio_buffers(folio); head = folio_buffers(folio);
if (!head) if (!head)
goto out_unlock; goto out_unlock;
/*
* Upon a noref migration, the folio lock serializes here;
* otherwise bail.
*/
if (test_bit_acquire(BH_Migrate, &head->b_state)) {
WARN_ON(!atomic);
goto out_unlock;
}
bh = head; bh = head;
do { do {
if (!buffer_mapped(bh)) if (!buffer_mapped(bh))
@ -1390,7 +1399,8 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
/* /*
* Perform a pagecache lookup for the matching buffer. If it's there, refresh * Perform a pagecache lookup for the matching buffer. If it's there, refresh
* it in the LRU and mark it as accessed. If it is not present then return * it in the LRU and mark it as accessed. If it is not present then return
* NULL * NULL. Atomic context callers may also return NULL if the buffer is being
* migrated; similarly the page is not marked accessed either.
*/ */
static struct buffer_head * static struct buffer_head *
find_get_block_common(struct block_device *bdev, sector_t block, find_get_block_common(struct block_device *bdev, sector_t block,

View File

@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
if (!bh || !buffer_uptodate(bh)) if (!bh || !buffer_uptodate(bh))
/* /*
* If the block is not in the buffer cache, then it * If the block is not in the buffer cache, then it
* must have been written out. * must have been written out, or, most unlikely, is
* being migrated - false failure should be OK here.
*/ */
goto out; goto out;

View File

@ -34,6 +34,7 @@ enum bh_state_bits {
BH_Meta, /* Buffer contains metadata */ BH_Meta, /* Buffer contains metadata */
BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */
BH_Defer_Completion, /* Defer AIO completion to workqueue */ BH_Defer_Completion, /* Defer AIO completion to workqueue */
BH_Migrate, /* Buffer is being migrated (norefs) */
BH_PrivateStart,/* not a state bit, but the first bit available BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities * for private allocation by other entities

View File

@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping,
return -EAGAIN; return -EAGAIN;
if (check_refs) { if (check_refs) {
bool busy; bool busy, migrating;
bool invalidated = false; bool invalidated = false;
migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state);
VM_WARN_ON_ONCE(migrating);
recheck_buffers: recheck_buffers:
busy = false; busy = false;
spin_lock(&mapping->i_private_lock); spin_lock(&mapping->i_private_lock);
@ -859,12 +861,12 @@ static int __buffer_migrate_folio(struct address_space *mapping,
} }
bh = bh->b_this_page; bh = bh->b_this_page;
} while (bh != head); } while (bh != head);
spin_unlock(&mapping->i_private_lock);
if (busy) { if (busy) {
if (invalidated) { if (invalidated) {
rc = -EAGAIN; rc = -EAGAIN;
goto unlock_buffers; goto unlock_buffers;
} }
spin_unlock(&mapping->i_private_lock);
invalidate_bh_lrus(); invalidate_bh_lrus();
invalidated = true; invalidated = true;
goto recheck_buffers; goto recheck_buffers;
@ -883,7 +885,7 @@ static int __buffer_migrate_folio(struct address_space *mapping,
unlock_buffers: unlock_buffers:
if (check_refs) if (check_refs)
spin_unlock(&mapping->i_private_lock); clear_bit_unlock(BH_Migrate, &head->b_state);
bh = head; bh = head;
do { do {
unlock_buffer(bh); unlock_buffer(bh);