mirror of https://github.com/torvalds/linux.git
mm/migrate: fix sleep in atomic for large folios and buffer heads
The large folio + buffer head noref migration scenarios are being naughty and blocking while holding a spinlock. As a consequence of the pagecache lookup path taking the folio lock this serializes against migration paths, so they can wait for each other. For the private_lock atomic case, a new BH_Migrate flag is introduced which enables the lookup to bail. This allows the critical region of the private_lock on the migration path to be reduced to the way it was beforeebdf4de564("mm: migrate: fix reference check race between __find_get_block() and migration"), that is covering the count checks. The scope is always noref migration. Reported-by: kernel test robot <oliver.sang@intel.com> Reported-by: syzbot+f3c6fda1297c748a7076@syzkaller.appspotmail.com Closes: https://lore.kernel.org/oe-lkp/202503101536.27099c77-lkp@intel.com Fixes:3c20917120("block/bdev: enable large folio support for large logical block sizes") Reviewed-by: Jan Kara <jack@suse.cz> Co-developed-by: Luis Chamberlain <mcgrof@kernel.org> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-8-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev # [0] [1] Reviewed-by: Luis Chamberlain <mcgrof@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
parent
6e8f57fd09
commit
2d900efff9
12
fs/buffer.c
12
fs/buffer.c
|
|
@ -207,6 +207,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic)
|
||||||
head = folio_buffers(folio);
|
head = folio_buffers(folio);
|
||||||
if (!head)
|
if (!head)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
/*
|
||||||
|
* Upon a noref migration, the folio lock serializes here;
|
||||||
|
* otherwise bail.
|
||||||
|
*/
|
||||||
|
if (test_bit_acquire(BH_Migrate, &head->b_state)) {
|
||||||
|
WARN_ON(!atomic);
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
bh = head;
|
bh = head;
|
||||||
do {
|
do {
|
||||||
if (!buffer_mapped(bh))
|
if (!buffer_mapped(bh))
|
||||||
|
|
@ -1390,7 +1399,8 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
|
||||||
/*
|
/*
|
||||||
* Perform a pagecache lookup for the matching buffer. If it's there, refresh
|
* Perform a pagecache lookup for the matching buffer. If it's there, refresh
|
||||||
* it in the LRU and mark it as accessed. If it is not present then return
|
* it in the LRU and mark it as accessed. If it is not present then return
|
||||||
* NULL
|
* NULL. Atomic context callers may also return NULL if the buffer is being
|
||||||
|
* migrated; similarly the page is not marked accessed either.
|
||||||
*/
|
*/
|
||||||
static struct buffer_head *
|
static struct buffer_head *
|
||||||
find_get_block_common(struct block_device *bdev, sector_t block,
|
find_get_block_common(struct block_device *bdev, sector_t block,
|
||||||
|
|
|
||||||
|
|
@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
|
||||||
if (!bh || !buffer_uptodate(bh))
|
if (!bh || !buffer_uptodate(bh))
|
||||||
/*
|
/*
|
||||||
* If the block is not in the buffer cache, then it
|
* If the block is not in the buffer cache, then it
|
||||||
* must have been written out.
|
* must have been written out, or, most unlikely, is
|
||||||
|
* being migrated - false failure should be OK here.
|
||||||
*/
|
*/
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ enum bh_state_bits {
|
||||||
BH_Meta, /* Buffer contains metadata */
|
BH_Meta, /* Buffer contains metadata */
|
||||||
BH_Prio, /* Buffer should be submitted with REQ_PRIO */
|
BH_Prio, /* Buffer should be submitted with REQ_PRIO */
|
||||||
BH_Defer_Completion, /* Defer AIO completion to workqueue */
|
BH_Defer_Completion, /* Defer AIO completion to workqueue */
|
||||||
|
BH_Migrate, /* Buffer is being migrated (norefs) */
|
||||||
|
|
||||||
BH_PrivateStart,/* not a state bit, but the first bit available
|
BH_PrivateStart,/* not a state bit, but the first bit available
|
||||||
* for private allocation by other entities
|
* for private allocation by other entities
|
||||||
|
|
|
||||||
|
|
@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping,
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
if (check_refs) {
|
if (check_refs) {
|
||||||
bool busy;
|
bool busy, migrating;
|
||||||
bool invalidated = false;
|
bool invalidated = false;
|
||||||
|
|
||||||
|
migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state);
|
||||||
|
VM_WARN_ON_ONCE(migrating);
|
||||||
recheck_buffers:
|
recheck_buffers:
|
||||||
busy = false;
|
busy = false;
|
||||||
spin_lock(&mapping->i_private_lock);
|
spin_lock(&mapping->i_private_lock);
|
||||||
|
|
@ -859,12 +861,12 @@ static int __buffer_migrate_folio(struct address_space *mapping,
|
||||||
}
|
}
|
||||||
bh = bh->b_this_page;
|
bh = bh->b_this_page;
|
||||||
} while (bh != head);
|
} while (bh != head);
|
||||||
|
spin_unlock(&mapping->i_private_lock);
|
||||||
if (busy) {
|
if (busy) {
|
||||||
if (invalidated) {
|
if (invalidated) {
|
||||||
rc = -EAGAIN;
|
rc = -EAGAIN;
|
||||||
goto unlock_buffers;
|
goto unlock_buffers;
|
||||||
}
|
}
|
||||||
spin_unlock(&mapping->i_private_lock);
|
|
||||||
invalidate_bh_lrus();
|
invalidate_bh_lrus();
|
||||||
invalidated = true;
|
invalidated = true;
|
||||||
goto recheck_buffers;
|
goto recheck_buffers;
|
||||||
|
|
@ -883,7 +885,7 @@ static int __buffer_migrate_folio(struct address_space *mapping,
|
||||||
|
|
||||||
unlock_buffers:
|
unlock_buffers:
|
||||||
if (check_refs)
|
if (check_refs)
|
||||||
spin_unlock(&mapping->i_private_lock);
|
clear_bit_unlock(BH_Migrate, &head->b_state);
|
||||||
bh = head;
|
bh = head;
|
||||||
do {
|
do {
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue