vfs-6.19-rc1.writeback

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaSmOZQAKCRCRxhvAZXjc
 or4UAP9FbpFsZd0DpsYnKuv7kFepl291PuR0x2dKmseJ/wcf8AEAzI8FR5wd/fey
 25ZNdExoUojAOj5wVn+jUep3u54jBws=
 =/toi
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull writeback updates from Christian Brauner:
 "Features:

   - Allow file systems to increase the minimum writeback chunk size.

     The relatively low minimal writeback size of 4MiB means that
     written back inodes on rotational media are switched a lot. Besides
     introducing additional seeks, this also can lead to extreme file
     fragmentation on zoned devices when a lot of files are cached
     relative to the available writeback bandwidth.

     This adds a superblock field that allows the file system to
     override the default size, and sets it to the zone size for zoned
     XFS.

   - Add logging for slow writeback when it exceeds
     sysctl_hung_task_timeout_secs. This helps identify tasks waiting
     for a long time and pinpoint potential issues. Recording the
     starting jiffies is also useful when debugging a crashed vmcore.

   - Wake up waiting tasks when finishing the writeback of a chunk

  Cleanups:

   - filemap_* writeback interface cleanups.

     Adding filemap_fdatawrite_wbc ended up being a mistake, as all but
     the original btrfs caller should be using better high level
     interfaces instead.

     This series removes all these low-level interfaces, switches btrfs
     to a more specific interface, and cleans up other too low-level
     interfaces. With this the writeback_control that is passed to the
     writeback code is only initialized in three places.

   - Remove __filemap_fdatawrite, __filemap_fdatawrite_range, and
     filemap_fdatawrite_wbc

   - Add filemap_flush_nr helper for btrfs

   - Push struct writeback_control into start_delalloc_inodes in btrfs

   - Rename filemap_fdatawrite_range_kick to filemap_flush_range

   - Stop opencoding filemap_fdatawrite_range in 9p, ocfs2, and mm

   - Make wbc_to_tag() inline and use it in fs"

* tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs: Make wbc_to_tag() inline and use it in fs.
  xfs: set s_min_writeback_pages for zoned file systems
  writeback: allow the file system to override MIN_WRITEBACK_PAGES
  writeback: cleanup writeback_chunk_size
  mm: rename filemap_fdatawrite_range_kick to filemap_flush_range
  mm: remove __filemap_fdatawrite_range
  mm: remove filemap_fdatawrite_wbc
  mm: remove __filemap_fdatawrite
  mm,btrfs: add a filemap_flush_nr helper
  btrfs: push struct writeback_control into start_delalloc_inodes
  btrfs: use the local tmp_inode variable in start_delalloc_inodes
  ocfs2: don't opencode filemap_fdatawrite_range in ocfs2_journal_submit_inode_data_buffers
  9p: don't opencode filemap_fdatawrite_range in v9fs_mmap_vm_close
  mm: don't opencode filemap_fdatawrite_range in filemap_invalidate_inode
  writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs)
  writeback: Wake up waiting tasks when finishing the writeback of a chunk.
This commit is contained in:
Linus Torvalds 2025-12-01 09:20:51 -08:00
commit ebaeabfa5a
19 changed files with 157 additions and 187 deletions

View File

@ -483,24 +483,15 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
static void v9fs_mmap_vm_close(struct vm_area_struct *vma) static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
{ {
struct inode *inode;
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_ALL,
.range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
/* absolute end, byte at end included */
.range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
(vma->vm_end - vma->vm_start - 1),
};
if (!(vma->vm_flags & VM_SHARED)) if (!(vma->vm_flags & VM_SHARED))
return; return;
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
inode = file_inode(vma->vm_file); filemap_fdatawrite_range(file_inode(vma->vm_file)->i_mapping,
filemap_fdatawrite_wbc(inode->i_mapping, &wbc); (loff_t)vma->vm_pgoff * PAGE_SIZE,
(loff_t)vma->vm_pgoff * PAGE_SIZE +
(vma->vm_end - vma->vm_start - 1));
} }
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {

View File

@ -2468,10 +2468,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
&BTRFS_I(inode)->runtime_flags)) &BTRFS_I(inode)->runtime_flags))
wbc->tagged_writepages = 1; wbc->tagged_writepages = 1;
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = wbc_to_tag(wbc);
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry: retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end); tag_pages_for_writeback(mapping, index, end);

View File

@ -8715,15 +8715,13 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
* some fairly slow code that needs optimization. This walks the list * some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk. * of all the inodes with pending delalloc and forces them to disk.
*/ */
static int start_delalloc_inodes(struct btrfs_root *root, static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write,
struct writeback_control *wbc, bool snapshot, bool snapshot, bool in_reclaim_context)
bool in_reclaim_context)
{ {
struct btrfs_delalloc_work *work, *next; struct btrfs_delalloc_work *work, *next;
LIST_HEAD(works); LIST_HEAD(works);
LIST_HEAD(splice); LIST_HEAD(splice);
int ret = 0; int ret = 0;
bool full_flush = wbc->nr_to_write == LONG_MAX;
mutex_lock(&root->delalloc_mutex); mutex_lock(&root->delalloc_mutex);
spin_lock(&root->delalloc_lock); spin_lock(&root->delalloc_lock);
@ -8749,10 +8747,10 @@ static int start_delalloc_inodes(struct btrfs_root *root,
if (snapshot) if (snapshot)
set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags); set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags);
if (full_flush) { if (nr_to_write == NULL) {
work = btrfs_alloc_delalloc_work(&inode->vfs_inode); work = btrfs_alloc_delalloc_work(tmp_inode);
if (!work) { if (!work) {
iput(&inode->vfs_inode); iput(tmp_inode);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
@ -8760,9 +8758,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
btrfs_queue_work(root->fs_info->flush_workers, btrfs_queue_work(root->fs_info->flush_workers,
&work->work); &work->work);
} else { } else {
ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc); ret = filemap_flush_nr(tmp_inode->i_mapping,
nr_to_write);
btrfs_add_delayed_iput(inode); btrfs_add_delayed_iput(inode);
if (ret || wbc->nr_to_write <= 0)
if (ret || *nr_to_write <= 0)
goto out; goto out;
} }
cond_resched(); cond_resched();
@ -8788,29 +8788,17 @@ static int start_delalloc_inodes(struct btrfs_root *root,
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
{ {
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_NONE,
.range_start = 0,
.range_end = LLONG_MAX,
};
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
if (BTRFS_FS_ERROR(fs_info)) if (BTRFS_FS_ERROR(fs_info))
return -EROFS; return -EROFS;
return start_delalloc_inodes(root, NULL, true, in_reclaim_context);
return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
} }
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
bool in_reclaim_context) bool in_reclaim_context)
{ {
struct writeback_control wbc = { long *nr_to_write = nr == LONG_MAX ? NULL : &nr;
.nr_to_write = nr,
.sync_mode = WB_SYNC_NONE,
.range_start = 0,
.range_end = LLONG_MAX,
};
struct btrfs_root *root; struct btrfs_root *root;
LIST_HEAD(splice); LIST_HEAD(splice);
int ret; int ret;
@ -8822,13 +8810,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
spin_lock(&fs_info->delalloc_root_lock); spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice); list_splice_init(&fs_info->delalloc_roots, &splice);
while (!list_empty(&splice)) { while (!list_empty(&splice)) {
/*
* Reset nr_to_write here so we know that we're doing a full
* flush.
*/
if (nr == LONG_MAX)
wbc.nr_to_write = LONG_MAX;
root = list_first_entry(&splice, struct btrfs_root, root = list_first_entry(&splice, struct btrfs_root,
delalloc_root); delalloc_root);
root = btrfs_grab_root(root); root = btrfs_grab_root(root);
@ -8837,9 +8818,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
&fs_info->delalloc_roots); &fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock); spin_unlock(&fs_info->delalloc_root_lock);
ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); ret = start_delalloc_inodes(root, nr_to_write, false,
in_reclaim_context);
btrfs_put_root(root); btrfs_put_root(root);
if (ret < 0 || wbc.nr_to_write <= 0) if (ret < 0 || nr <= 0)
goto out; goto out;
spin_lock(&fs_info->delalloc_root_lock); spin_lock(&fs_info->delalloc_root_lock);
} }

View File

@ -1045,11 +1045,7 @@ void ceph_init_writeback_ctl(struct address_space *mapping,
ceph_wbc->index = ceph_wbc->start_index; ceph_wbc->index = ceph_wbc->start_index;
ceph_wbc->end = -1; ceph_wbc->end = -1;
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { ceph_wbc->tag = wbc_to_tag(wbc);
ceph_wbc->tag = PAGECACHE_TAG_TOWRITE;
} else {
ceph_wbc->tag = PAGECACHE_TAG_DIRTY;
}
ceph_wbc->op_idx = -1; ceph_wbc->op_idx = -1;
ceph_wbc->num_ops = 0; ceph_wbc->num_ops = 0;

View File

@ -2618,10 +2618,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
handle_t *handle = NULL; handle_t *handle = NULL;
int bpp = ext4_journal_blocks_per_folio(mpd->inode); int bpp = ext4_journal_blocks_per_folio(mpd->inode);
if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) tag = wbc_to_tag(mpd->wbc);
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
mpd->map.m_len = 0; mpd->map.m_len = 0;
mpd->next_pos = mpd->start_pos; mpd->next_pos = mpd->start_pos;

View File

@ -2986,10 +2986,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1; range_whole = 1;
} }
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = wbc_to_tag(wbc);
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry: retry:
retry = 0; retry = 0;
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)

View File

@ -14,6 +14,7 @@
* Additions for address_space-based writeback * Additions for address_space-based writeback
*/ */
#include <linux/sched/sysctl.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
@ -31,11 +32,6 @@
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
#include "internal.h" #include "internal.h"
/*
* 4MB minimal write chunk size
*/
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
/* /*
* Passed into wb_writeback(), essentially a subset of writeback_control * Passed into wb_writeback(), essentially a subset of writeback_control
*/ */
@ -200,6 +196,19 @@ static void wb_queue_work(struct bdi_writeback *wb,
spin_unlock_irq(&wb->work_lock); spin_unlock_irq(&wb->work_lock);
} }
static bool wb_wait_for_completion_cb(struct wb_completion *done)
{
unsigned long waited_secs = (jiffies - done->wait_start) / HZ;
done->progress_stamp = jiffies;
if (waited_secs > sysctl_hung_task_timeout_secs)
pr_info("INFO: The task %s:%d has been waiting for writeback "
"completion for more than %lu seconds.",
current->comm, current->pid, waited_secs);
return !atomic_read(&done->cnt);
}
/** /**
* wb_wait_for_completion - wait for completion of bdi_writeback_works * wb_wait_for_completion - wait for completion of bdi_writeback_works
* @done: target wb_completion * @done: target wb_completion
@ -212,8 +221,9 @@ static void wb_queue_work(struct bdi_writeback *wb,
*/ */
void wb_wait_for_completion(struct wb_completion *done) void wb_wait_for_completion(struct wb_completion *done)
{ {
done->wait_start = jiffies;
atomic_dec(&done->cnt); /* put down the initial count */ atomic_dec(&done->cnt); /* put down the initial count */
wait_event(*done->waitq, !atomic_read(&done->cnt)); wait_event(*done->waitq, wb_wait_for_completion_cb(done));
} }
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
@ -808,9 +818,9 @@ static void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
* @wbc: writeback_control of interest * @wbc: writeback_control of interest
* @inode: target inode * @inode: target inode
* *
* This function is to be used by __filemap_fdatawrite_range(), which is an * This function is to be used by filemap_writeback(), which is an alternative
* alternative entry point into writeback code, and first ensures @inode is * entry point into writeback code, and first ensures @inode is associated with
* associated with a bdi_writeback and attaches it to @wbc. * a bdi_writeback and attaches it to @wbc.
*/ */
void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
struct inode *inode) struct inode *inode)
@ -1882,8 +1892,8 @@ static int writeback_single_inode(struct inode *inode,
return ret; return ret;
} }
static long writeback_chunk_size(struct bdi_writeback *wb, static long writeback_chunk_size(struct super_block *sb,
struct wb_writeback_work *work) struct bdi_writeback *wb, struct wb_writeback_work *work)
{ {
long pages; long pages;
@ -1901,16 +1911,13 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
* (maybe slowly) sync all tagged pages * (maybe slowly) sync all tagged pages
*/ */
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
pages = LONG_MAX; return LONG_MAX;
else {
pages = min(wb->avg_write_bandwidth / 2, pages = min(wb->avg_write_bandwidth / 2,
global_wb_domain.dirty_limit / DIRTY_SCOPE); global_wb_domain.dirty_limit / DIRTY_SCOPE);
pages = min(pages, work->nr_pages); pages = min(pages, work->nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES, return round_down(pages + sb->s_min_writeback_pages,
MIN_WRITEBACK_PAGES); sb->s_min_writeback_pages);
}
return pages;
} }
/* /*
@ -2012,7 +2019,7 @@ static long writeback_sb_inodes(struct super_block *sb,
inode_state_set(inode, I_SYNC); inode_state_set(inode, I_SYNC);
wbc_attach_and_unlock_inode(&wbc, inode); wbc_attach_and_unlock_inode(&wbc, inode);
write_chunk = writeback_chunk_size(wb, work); write_chunk = writeback_chunk_size(inode->i_sb, wb, work);
wbc.nr_to_write = write_chunk; wbc.nr_to_write = write_chunk;
wbc.pages_skipped = 0; wbc.pages_skipped = 0;
@ -2022,6 +2029,12 @@ static long writeback_sb_inodes(struct super_block *sb,
*/ */
__writeback_single_inode(inode, &wbc); __writeback_single_inode(inode, &wbc);
/* Report progress to inform the hung task detector of the progress. */
if (work->done && work->done->progress_stamp &&
(jiffies - work->done->progress_stamp) > HZ *
sysctl_hung_task_timeout_secs / 2)
wake_up_all(work->done->waitq);
wbc_detach_inode(&wbc); wbc_detach_inode(&wbc);
work->nr_pages -= write_chunk - wbc.nr_to_write; work->nr_pages -= write_chunk - wbc.nr_to_write;
wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;

View File

@ -311,10 +311,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
range_whole = 1; range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */ cycled = 1; /* ignore range_cyclic tests */
} }
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = wbc_to_tag(wbc);
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry: retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)

View File

@ -902,15 +902,8 @@ int ocfs2_journal_alloc(struct ocfs2_super *osb)
static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{ {
struct address_space *mapping = jinode->i_vfs_inode->i_mapping; return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
struct writeback_control wbc = { jinode->i_dirty_start, jinode->i_dirty_end);
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
.range_start = jinode->i_dirty_start,
.range_end = jinode->i_dirty_end,
};
return filemap_fdatawrite_wbc(mapping, &wbc);
} }
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)

View File

@ -389,6 +389,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
goto fail; goto fail;
if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink)) if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink))
goto fail; goto fail;
s->s_min_writeback_pages = MIN_WRITEBACK_PAGES;
return s; return s;
fail: fail:

View File

@ -281,14 +281,12 @@ int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
} }
if (flags & SYNC_FILE_RANGE_WRITE) { if (flags & SYNC_FILE_RANGE_WRITE) {
int sync_mode = WB_SYNC_NONE;
if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
SYNC_FILE_RANGE_WRITE_AND_WAIT) SYNC_FILE_RANGE_WRITE_AND_WAIT)
sync_mode = WB_SYNC_ALL; ret = filemap_fdatawrite_range(mapping, offset,
endbyte);
ret = __filemap_fdatawrite_range(mapping, offset, endbyte, else
sync_mode); ret = filemap_flush_range(mapping, offset, endbyte);
if (ret < 0) if (ret < 0)
goto out; goto out;
} }

View File

@ -1204,6 +1204,7 @@ xfs_mount_zones(
.mp = mp, .mp = mp,
}; };
struct xfs_buftarg *bt = mp->m_rtdev_targp; struct xfs_buftarg *bt = mp->m_rtdev_targp;
xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks;
int error; int error;
if (!bt) { if (!bt) {
@ -1234,10 +1235,33 @@ xfs_mount_zones(
return -ENOMEM; return -ENOMEM;
xfs_info(mp, "%u zones of %u blocks (%u max open zones)", xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks, mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones);
mp->m_max_open_zones);
trace_xfs_zones_mount(mp); trace_xfs_zones_mount(mp);
/*
* The writeback code switches between inodes regularly to provide
* fairness. The default lower bound is 4MiB, but for zoned file
* systems we want to increase that both to reduce seeks, but also more
* importantly so that workloads that writes files in a multiple of the
* zone size do not get fragmented and require garbage collection when
* they shouldn't. Increase is to the zone size capped by the max
* extent len.
*
* Note that because s_min_writeback_pages is a superblock field, this
* value also get applied to non-zoned files on the data device if
* there are any. On typical zoned setup all data is on the RT device
* because using the more efficient sequential write required zones
* is the reason for using the zone allocator, and either the RT device
* and the (meta)data device are on the same block device, or the
* (meta)data device is on a fast SSD while the data on the RT device
* is on a SMR HDD. In any combination of the above cases enforcing
* the higher min_writeback_pages for non-RT inodes is either a noop
* or beneficial.
*/
mp->m_super->s_min_writeback_pages =
XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >>
PAGE_SHIFT;
if (bdev_is_zoned(bt->bt_bdev)) { if (bdev_is_zoned(bt->bt_bdev)) {
error = blkdev_report_zones(bt->bt_bdev, error = blkdev_report_zones(bt->bt_bdev,
XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart), XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart),

View File

@ -63,6 +63,8 @@ enum wb_reason {
struct wb_completion { struct wb_completion {
atomic_t cnt; atomic_t cnt;
wait_queue_head_t *waitq; wait_queue_head_t *waitq;
unsigned long progress_stamp; /* The jiffies when slow progress is detected */
unsigned long wait_start; /* The jiffies when waiting for the writeback work to finish */
}; };
#define __WB_COMPLETION_INIT(_waitq) \ #define __WB_COMPLETION_INIT(_waitq) \

View File

@ -1642,6 +1642,7 @@ struct super_block {
spinlock_t s_inode_wblist_lock; spinlock_t s_inode_wblist_lock;
struct list_head s_inodes_wb; /* writeback inodes */ struct list_head s_inodes_wb; /* writeback inodes */
long s_min_writeback_pages;
} __randomize_layout; } __randomize_layout;
static inline struct user_namespace *i_user_ns(const struct inode *inode) static inline struct user_namespace *i_user_ns(const struct inode *inode)
@ -3075,7 +3076,7 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_check_and_advance_wb_err(struct file *file);
extern int __must_check file_write_and_wait_range(struct file *file, extern int __must_check file_write_and_wait_range(struct file *file,
loff_t start, loff_t end); loff_t start, loff_t end);
int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, int filemap_flush_range(struct address_space *mapping, loff_t start,
loff_t end); loff_t end);
static inline int file_write_and_wait(struct file *file) static inline int file_write_and_wait(struct file *file)
@ -3112,7 +3113,7 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
} else if (iocb->ki_flags & IOCB_DONTCACHE) { } else if (iocb->ki_flags & IOCB_DONTCACHE) {
struct address_space *mapping = iocb->ki_filp->f_mapping; struct address_space *mapping = iocb->ki_filp->f_mapping;
filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, filemap_flush_range(mapping, iocb->ki_pos - count,
iocb->ki_pos - 1); iocb->ki_pos - 1);
} }

View File

@ -38,6 +38,7 @@ int filemap_invalidate_pages(struct address_space *mapping,
int write_inode_now(struct inode *, int sync); int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *); int filemap_fdatawrite(struct address_space *);
int filemap_flush(struct address_space *); int filemap_flush(struct address_space *);
int filemap_flush_nr(struct address_space *mapping, long *nr_to_write);
int filemap_fdatawait_keep_errors(struct address_space *mapping); int filemap_fdatawait_keep_errors(struct address_space *mapping);
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
int filemap_fdatawait_range_keep_errors(struct address_space *mapping, int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
@ -53,14 +54,10 @@ static inline int filemap_fdatawait(struct address_space *mapping)
bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
int filemap_write_and_wait_range(struct address_space *mapping, int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend); loff_t lstart, loff_t lend);
int __filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end, int sync_mode);
int filemap_fdatawrite_range(struct address_space *mapping, int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end); loff_t start, loff_t end);
int filemap_check_errors(struct address_space *mapping); int filemap_check_errors(struct address_space *mapping);
void __filemap_set_wb_err(struct address_space *mapping, int err); void __filemap_set_wb_err(struct address_space *mapping, int err);
int filemap_fdatawrite_wbc(struct address_space *mapping,
struct writeback_control *wbc);
int kiocb_write_and_wait(struct kiocb *iocb, size_t count); int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
static inline int filemap_write_and_wait(struct address_space *mapping) static inline int filemap_write_and_wait(struct address_space *mapping)

View File

@ -189,6 +189,13 @@ void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
void inode_wait_for_writeback(struct inode *inode); void inode_wait_for_writeback(struct inode *inode);
void inode_io_list_del(struct inode *inode); void inode_io_list_del(struct inode *inode);
static inline xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
return PAGECACHE_TAG_TOWRITE;
return PAGECACHE_TAG_DIRTY;
}
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
#include <linux/cgroup.h> #include <linux/cgroup.h>
@ -367,4 +374,9 @@ bool redirty_page_for_writepage(struct writeback_control *, struct page *);
void sb_mark_inode_writeback(struct inode *inode); void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode);
/*
* 4MB minimal write chunk size
*/
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
#endif /* WRITEBACK_H */ #endif /* WRITEBACK_H */

View File

@ -111,8 +111,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
spin_unlock(&file->f_lock); spin_unlock(&file->f_lock);
break; break;
case POSIX_FADV_DONTNEED: case POSIX_FADV_DONTNEED:
__filemap_fdatawrite_range(mapping, offset, endbyte, filemap_flush_range(mapping, offset, endbyte);
WB_SYNC_NONE);
/* /*
* First and last FULL page! Partial pages are deliberately * First and last FULL page! Partial pages are deliberately

View File

@ -366,83 +366,60 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)
return 0; return 0;
} }
/** static int filemap_writeback(struct address_space *mapping, loff_t start,
* filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range loff_t end, enum writeback_sync_modes sync_mode,
* @mapping: address space structure to write long *nr_to_write)
* @wbc: the writeback_control controlling the writeout
*
* Call writepages on the mapping using the provided wbc to control the
* writeout.
*
* Return: %0 on success, negative error code otherwise.
*/
int filemap_fdatawrite_wbc(struct address_space *mapping,
struct writeback_control *wbc)
{ {
struct writeback_control wbc = {
.sync_mode = sync_mode,
.nr_to_write = nr_to_write ? *nr_to_write : LONG_MAX,
.range_start = start,
.range_end = end,
};
int ret; int ret;
if (!mapping_can_writeback(mapping) || if (!mapping_can_writeback(mapping) ||
!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0; return 0;
wbc_attach_fdatawrite_inode(wbc, mapping->host); wbc_attach_fdatawrite_inode(&wbc, mapping->host);
ret = do_writepages(mapping, wbc); ret = do_writepages(mapping, &wbc);
wbc_detach_inode(wbc); wbc_detach_inode(&wbc);
if (!ret && nr_to_write)
*nr_to_write = wbc.nr_to_write;
return ret; return ret;
} }
EXPORT_SYMBOL(filemap_fdatawrite_wbc);
/** /**
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range * filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* @mapping: address space structure to write * @mapping: address space structure to write
* @start: offset in bytes where the range starts * @start: offset in bytes where the range starts
* @end: offset in bytes where the range ends (inclusive) * @end: offset in bytes where the range ends (inclusive)
* @sync_mode: enable synchronous operation
* *
* Start writeback against all of a mapping's dirty pages that lie * Start writeback against all of a mapping's dirty pages that lie
* within the byte offsets <start, end> inclusive. * within the byte offsets <start, end> inclusive.
* *
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as * This is a data integrity operation that waits upon dirty or in writeback
* opposed to a regular memory cleansing writeback. The difference between * pages.
* these two operations is that if a dirty page/buffer is encountered, it must
* be waited upon, and not just skipped over.
* *
* Return: %0 on success, negative error code otherwise. * Return: %0 on success, negative error code otherwise.
*/ */
int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
loff_t end, int sync_mode)
{
struct writeback_control wbc = {
.sync_mode = sync_mode,
.nr_to_write = LONG_MAX,
.range_start = start,
.range_end = end,
};
return filemap_fdatawrite_wbc(mapping, &wbc);
}
static inline int __filemap_fdatawrite(struct address_space *mapping,
int sync_mode)
{
return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
}
int filemap_fdatawrite(struct address_space *mapping)
{
return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
}
EXPORT_SYMBOL(filemap_fdatawrite);
int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
loff_t end) loff_t end)
{ {
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); return filemap_writeback(mapping, start, end, WB_SYNC_ALL, NULL);
} }
EXPORT_SYMBOL(filemap_fdatawrite_range); EXPORT_SYMBOL(filemap_fdatawrite_range);
int filemap_fdatawrite(struct address_space *mapping)
{
return filemap_fdatawrite_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL(filemap_fdatawrite);
/** /**
* filemap_fdatawrite_range_kick - start writeback on a range * filemap_flush_range - start writeback on a range
* @mapping: target address_space * @mapping: target address_space
* @start: index to start writeback on * @start: index to start writeback on
* @end: last (inclusive) index for writeback * @end: last (inclusive) index for writeback
@ -452,12 +429,12 @@ EXPORT_SYMBOL(filemap_fdatawrite_range);
* *
* Return: %0 on success, negative error code otherwise. * Return: %0 on success, negative error code otherwise.
*/ */
int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, int filemap_flush_range(struct address_space *mapping, loff_t start,
loff_t end) loff_t end)
{ {
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE); return filemap_writeback(mapping, start, end, WB_SYNC_NONE, NULL);
} }
EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick); EXPORT_SYMBOL_GPL(filemap_flush_range);
/** /**
* filemap_flush - mostly a non-blocking flush * filemap_flush - mostly a non-blocking flush
@ -470,10 +447,22 @@ EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick);
*/ */
int filemap_flush(struct address_space *mapping) int filemap_flush(struct address_space *mapping)
{ {
return __filemap_fdatawrite(mapping, WB_SYNC_NONE); return filemap_flush_range(mapping, 0, LLONG_MAX);
} }
EXPORT_SYMBOL(filemap_flush); EXPORT_SYMBOL(filemap_flush);
/*
* Start writeback on @nr_to_write pages from @mapping. No one but the existing
* btrfs caller should be using this. Talk to linux-mm if you think adding a
* new caller is a good idea.
*/
int filemap_flush_nr(struct address_space *mapping, long *nr_to_write)
{
return filemap_writeback(mapping, 0, LLONG_MAX, WB_SYNC_NONE,
nr_to_write);
}
EXPORT_SYMBOL_FOR_MODULES(filemap_flush_nr, "btrfs");
/** /**
* filemap_range_has_page - check if a page exists in range. * filemap_range_has_page - check if a page exists in range.
* @mapping: address space within which to check * @mapping: address space within which to check
@ -691,8 +680,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
return 0; return 0;
if (mapping_needs_writeback(mapping)) { if (mapping_needs_writeback(mapping)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend, err = filemap_fdatawrite_range(mapping, lstart, lend);
WB_SYNC_ALL);
/* /*
* Even if the above returned error, the pages may be * Even if the above returned error, the pages may be
* written partially (e.g. -ENOSPC), so we wait for it. * written partially (e.g. -ENOSPC), so we wait for it.
@ -794,8 +782,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
return 0; return 0;
if (mapping_needs_writeback(mapping)) { if (mapping_needs_writeback(mapping)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend, err = filemap_fdatawrite_range(mapping, lstart, lend);
WB_SYNC_ALL);
/* See comment of filemap_write_and_wait() */ /* See comment of filemap_write_and_wait() */
if (err != -EIO) if (err != -EIO)
__filemap_fdatawait_range(mapping, lstart, lend); __filemap_fdatawait_range(mapping, lstart, lend);
@ -4528,16 +4515,8 @@ int filemap_invalidate_inode(struct inode *inode, bool flush,
unmap_mapping_pages(mapping, first, nr, false); unmap_mapping_pages(mapping, first, nr, false);
/* Write back the data if we're asked to. */ /* Write back the data if we're asked to. */
if (flush) { if (flush)
struct writeback_control wbc = { filemap_fdatawrite_range(mapping, start, end);
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = start,
.range_end = end,
};
filemap_fdatawrite_wbc(mapping, &wbc);
}
/* Wait for writeback to complete on all folios and discard. */ /* Wait for writeback to complete on all folios and discard. */
invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE); invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE);

View File

@ -2434,12 +2434,6 @@ static bool folio_prepare_writeback(struct address_space *mapping,
return true; return true;
} }
static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
return PAGECACHE_TAG_TOWRITE;
return PAGECACHE_TAG_DIRTY;
}
static pgoff_t wbc_end(struct writeback_control *wbc) static pgoff_t wbc_end(struct writeback_control *wbc)
{ {