mirror of https://github.com/torvalds/linux.git
vfs-6.19-rc1.writeback
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaSmOZQAKCRCRxhvAZXjc
or4UAP9FbpFsZd0DpsYnKuv7kFepl291PuR0x2dKmseJ/wcf8AEAzI8FR5wd/fey
25ZNdExoUojAOj5wVn+jUep3u54jBws=
=/toi
-----END PGP SIGNATURE-----
Merge tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull writeback updates from Christian Brauner:
"Features:
- Allow file systems to increase the minimum writeback chunk size.
The relatively low minimal writeback size of 4MiB means that
written back inodes on rotational media are switched a lot. Besides
introducing additional seeks, this also can lead to extreme file
fragmentation on zoned devices when a lot of files are cached
relative to the available writeback bandwidth.
This adds a superblock field that allows the file system to
override the default size, and sets it to the zone size for zoned
XFS.
- Add logging for slow writeback when it exceeds
sysctl_hung_task_timeout_secs. This helps identify tasks waiting
for a long time and pinpoint potential issues. Recording the
starting jiffies is also useful when debugging a crashed vmcore.
- Wake up waiting tasks when finishing the writeback of a chunk
Cleanups:
- filemap_* writeback interface cleanups.
Adding filemap_fdatawrite_wbc ended up being a mistake, as all but
the original btrfs caller should be using better high level
interfaces instead.
This series removes all these low-level interfaces, switches btrfs
to a more specific interface, and cleans up other too low-level
interfaces. With this the writeback_control that is passed to the
writeback code is only initialized in three places.
- Remove __filemap_fdatawrite, __filemap_fdatawrite_range, and
filemap_fdatawrite_wbc
- Add filemap_flush_nr helper for btrfs
- Push struct writeback_control into start_delalloc_inodes in btrfs
- Rename filemap_fdatawrite_range_kick to filemap_flush_range
- Stop opencoding filemap_fdatawrite_range in 9p, ocfs2, and mm
- Make wbc_to_tag() inline and use it in fs"
* tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: Make wbc_to_tag() inline and use it in fs.
xfs: set s_min_writeback_pages for zoned file systems
writeback: allow the file system to override MIN_WRITEBACK_PAGES
writeback: cleanup writeback_chunk_size
mm: rename filemap_fdatawrite_range_kick to filemap_flush_range
mm: remove __filemap_fdatawrite_range
mm: remove filemap_fdatawrite_wbc
mm: remove __filemap_fdatawrite
mm,btrfs: add a filemap_flush_nr helper
btrfs: push struct writeback_control into start_delalloc_inodes
btrfs: use the local tmp_inode variable in start_delalloc_inodes
ocfs2: don't opencode filemap_fdatawrite_range in ocfs2_journal_submit_inode_data_buffers
9p: don't opencode filemap_fdatawrite_range in v9fs_mmap_vm_close
mm: don't opencode filemap_fdatawrite_range in filemap_invalidate_inode
writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs)
writeback: Wake up waiting tasks when finishing the writeback of a chunk.
This commit is contained in:
commit
ebaeabfa5a
|
|
@ -483,24 +483,15 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
|
|||
|
||||
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
struct writeback_control wbc = {
|
||||
.nr_to_write = LONG_MAX,
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
|
||||
/* absolute end, byte at end included */
|
||||
.range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
|
||||
(vma->vm_end - vma->vm_start - 1),
|
||||
};
|
||||
|
||||
if (!(vma->vm_flags & VM_SHARED))
|
||||
return;
|
||||
|
||||
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
|
||||
|
||||
inode = file_inode(vma->vm_file);
|
||||
filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
|
||||
filemap_fdatawrite_range(file_inode(vma->vm_file)->i_mapping,
|
||||
(loff_t)vma->vm_pgoff * PAGE_SIZE,
|
||||
(loff_t)vma->vm_pgoff * PAGE_SIZE +
|
||||
(vma->vm_end - vma->vm_start - 1));
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
|
||||
|
|
|
|||
|
|
@ -2468,10 +2468,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
|
|||
&BTRFS_I(inode)->runtime_flags))
|
||||
wbc->tagged_writepages = 1;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
tag = PAGECACHE_TAG_TOWRITE;
|
||||
else
|
||||
tag = PAGECACHE_TAG_DIRTY;
|
||||
tag = wbc_to_tag(wbc);
|
||||
retry:
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
tag_pages_for_writeback(mapping, index, end);
|
||||
|
|
|
|||
|
|
@ -8715,15 +8715,13 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
|
|||
* some fairly slow code that needs optimization. This walks the list
|
||||
* of all the inodes with pending delalloc and forces them to disk.
|
||||
*/
|
||||
static int start_delalloc_inodes(struct btrfs_root *root,
|
||||
struct writeback_control *wbc, bool snapshot,
|
||||
bool in_reclaim_context)
|
||||
static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write,
|
||||
bool snapshot, bool in_reclaim_context)
|
||||
{
|
||||
struct btrfs_delalloc_work *work, *next;
|
||||
LIST_HEAD(works);
|
||||
LIST_HEAD(splice);
|
||||
int ret = 0;
|
||||
bool full_flush = wbc->nr_to_write == LONG_MAX;
|
||||
|
||||
mutex_lock(&root->delalloc_mutex);
|
||||
spin_lock(&root->delalloc_lock);
|
||||
|
|
@ -8749,10 +8747,10 @@ static int start_delalloc_inodes(struct btrfs_root *root,
|
|||
|
||||
if (snapshot)
|
||||
set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags);
|
||||
if (full_flush) {
|
||||
work = btrfs_alloc_delalloc_work(&inode->vfs_inode);
|
||||
if (nr_to_write == NULL) {
|
||||
work = btrfs_alloc_delalloc_work(tmp_inode);
|
||||
if (!work) {
|
||||
iput(&inode->vfs_inode);
|
||||
iput(tmp_inode);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
|
@ -8760,9 +8758,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
|
|||
btrfs_queue_work(root->fs_info->flush_workers,
|
||||
&work->work);
|
||||
} else {
|
||||
ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc);
|
||||
ret = filemap_flush_nr(tmp_inode->i_mapping,
|
||||
nr_to_write);
|
||||
btrfs_add_delayed_iput(inode);
|
||||
if (ret || wbc->nr_to_write <= 0)
|
||||
|
||||
if (ret || *nr_to_write <= 0)
|
||||
goto out;
|
||||
}
|
||||
cond_resched();
|
||||
|
|
@ -8788,29 +8788,17 @@ static int start_delalloc_inodes(struct btrfs_root *root,
|
|||
|
||||
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
|
||||
{
|
||||
struct writeback_control wbc = {
|
||||
.nr_to_write = LONG_MAX,
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
.range_start = 0,
|
||||
.range_end = LLONG_MAX,
|
||||
};
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
return -EROFS;
|
||||
|
||||
return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
|
||||
return start_delalloc_inodes(root, NULL, true, in_reclaim_context);
|
||||
}
|
||||
|
||||
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
|
||||
bool in_reclaim_context)
|
||||
{
|
||||
struct writeback_control wbc = {
|
||||
.nr_to_write = nr,
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
.range_start = 0,
|
||||
.range_end = LLONG_MAX,
|
||||
};
|
||||
long *nr_to_write = nr == LONG_MAX ? NULL : &nr;
|
||||
struct btrfs_root *root;
|
||||
LIST_HEAD(splice);
|
||||
int ret;
|
||||
|
|
@ -8822,13 +8810,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
|
|||
spin_lock(&fs_info->delalloc_root_lock);
|
||||
list_splice_init(&fs_info->delalloc_roots, &splice);
|
||||
while (!list_empty(&splice)) {
|
||||
/*
|
||||
* Reset nr_to_write here so we know that we're doing a full
|
||||
* flush.
|
||||
*/
|
||||
if (nr == LONG_MAX)
|
||||
wbc.nr_to_write = LONG_MAX;
|
||||
|
||||
root = list_first_entry(&splice, struct btrfs_root,
|
||||
delalloc_root);
|
||||
root = btrfs_grab_root(root);
|
||||
|
|
@ -8837,9 +8818,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
|
|||
&fs_info->delalloc_roots);
|
||||
spin_unlock(&fs_info->delalloc_root_lock);
|
||||
|
||||
ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
|
||||
ret = start_delalloc_inodes(root, nr_to_write, false,
|
||||
in_reclaim_context);
|
||||
btrfs_put_root(root);
|
||||
if (ret < 0 || wbc.nr_to_write <= 0)
|
||||
if (ret < 0 || nr <= 0)
|
||||
goto out;
|
||||
spin_lock(&fs_info->delalloc_root_lock);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1045,11 +1045,7 @@ void ceph_init_writeback_ctl(struct address_space *mapping,
|
|||
ceph_wbc->index = ceph_wbc->start_index;
|
||||
ceph_wbc->end = -1;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
|
||||
ceph_wbc->tag = PAGECACHE_TAG_TOWRITE;
|
||||
} else {
|
||||
ceph_wbc->tag = PAGECACHE_TAG_DIRTY;
|
||||
}
|
||||
ceph_wbc->tag = wbc_to_tag(wbc);
|
||||
|
||||
ceph_wbc->op_idx = -1;
|
||||
ceph_wbc->num_ops = 0;
|
||||
|
|
|
|||
|
|
@ -2618,10 +2618,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
|
|||
handle_t *handle = NULL;
|
||||
int bpp = ext4_journal_blocks_per_folio(mpd->inode);
|
||||
|
||||
if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
|
||||
tag = PAGECACHE_TAG_TOWRITE;
|
||||
else
|
||||
tag = PAGECACHE_TAG_DIRTY;
|
||||
tag = wbc_to_tag(mpd->wbc);
|
||||
|
||||
mpd->map.m_len = 0;
|
||||
mpd->next_pos = mpd->start_pos;
|
||||
|
|
|
|||
|
|
@ -2986,10 +2986,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
|
|||
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
||||
range_whole = 1;
|
||||
}
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
tag = PAGECACHE_TAG_TOWRITE;
|
||||
else
|
||||
tag = PAGECACHE_TAG_DIRTY;
|
||||
tag = wbc_to_tag(wbc);
|
||||
retry:
|
||||
retry = 0;
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
* Additions for address_space-based writeback
|
||||
*/
|
||||
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
|
@ -31,11 +32,6 @@
|
|||
#include <linux/memcontrol.h>
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* 4MB minimal write chunk size
|
||||
*/
|
||||
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
|
||||
|
||||
/*
|
||||
* Passed into wb_writeback(), essentially a subset of writeback_control
|
||||
*/
|
||||
|
|
@ -200,6 +196,19 @@ static void wb_queue_work(struct bdi_writeback *wb,
|
|||
spin_unlock_irq(&wb->work_lock);
|
||||
}
|
||||
|
||||
static bool wb_wait_for_completion_cb(struct wb_completion *done)
|
||||
{
|
||||
unsigned long waited_secs = (jiffies - done->wait_start) / HZ;
|
||||
|
||||
done->progress_stamp = jiffies;
|
||||
if (waited_secs > sysctl_hung_task_timeout_secs)
|
||||
pr_info("INFO: The task %s:%d has been waiting for writeback "
|
||||
"completion for more than %lu seconds.",
|
||||
current->comm, current->pid, waited_secs);
|
||||
|
||||
return !atomic_read(&done->cnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* wb_wait_for_completion - wait for completion of bdi_writeback_works
|
||||
* @done: target wb_completion
|
||||
|
|
@ -212,8 +221,9 @@ static void wb_queue_work(struct bdi_writeback *wb,
|
|||
*/
|
||||
void wb_wait_for_completion(struct wb_completion *done)
|
||||
{
|
||||
done->wait_start = jiffies;
|
||||
atomic_dec(&done->cnt); /* put down the initial count */
|
||||
wait_event(*done->waitq, !atomic_read(&done->cnt));
|
||||
wait_event(*done->waitq, wb_wait_for_completion_cb(done));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
|
@ -808,9 +818,9 @@ static void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
|
|||
* @wbc: writeback_control of interest
|
||||
* @inode: target inode
|
||||
*
|
||||
* This function is to be used by __filemap_fdatawrite_range(), which is an
|
||||
* alternative entry point into writeback code, and first ensures @inode is
|
||||
* associated with a bdi_writeback and attaches it to @wbc.
|
||||
* This function is to be used by filemap_writeback(), which is an alternative
|
||||
* entry point into writeback code, and first ensures @inode is associated with
|
||||
* a bdi_writeback and attaches it to @wbc.
|
||||
*/
|
||||
void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
|
||||
struct inode *inode)
|
||||
|
|
@ -1882,8 +1892,8 @@ static int writeback_single_inode(struct inode *inode,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static long writeback_chunk_size(struct bdi_writeback *wb,
|
||||
struct wb_writeback_work *work)
|
||||
static long writeback_chunk_size(struct super_block *sb,
|
||||
struct bdi_writeback *wb, struct wb_writeback_work *work)
|
||||
{
|
||||
long pages;
|
||||
|
||||
|
|
@ -1901,16 +1911,13 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
|
|||
* (maybe slowly) sync all tagged pages
|
||||
*/
|
||||
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
|
||||
pages = LONG_MAX;
|
||||
else {
|
||||
pages = min(wb->avg_write_bandwidth / 2,
|
||||
global_wb_domain.dirty_limit / DIRTY_SCOPE);
|
||||
pages = min(pages, work->nr_pages);
|
||||
pages = round_down(pages + MIN_WRITEBACK_PAGES,
|
||||
MIN_WRITEBACK_PAGES);
|
||||
}
|
||||
return LONG_MAX;
|
||||
|
||||
return pages;
|
||||
pages = min(wb->avg_write_bandwidth / 2,
|
||||
global_wb_domain.dirty_limit / DIRTY_SCOPE);
|
||||
pages = min(pages, work->nr_pages);
|
||||
return round_down(pages + sb->s_min_writeback_pages,
|
||||
sb->s_min_writeback_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -2012,7 +2019,7 @@ static long writeback_sb_inodes(struct super_block *sb,
|
|||
inode_state_set(inode, I_SYNC);
|
||||
wbc_attach_and_unlock_inode(&wbc, inode);
|
||||
|
||||
write_chunk = writeback_chunk_size(wb, work);
|
||||
write_chunk = writeback_chunk_size(inode->i_sb, wb, work);
|
||||
wbc.nr_to_write = write_chunk;
|
||||
wbc.pages_skipped = 0;
|
||||
|
||||
|
|
@ -2022,6 +2029,12 @@ static long writeback_sb_inodes(struct super_block *sb,
|
|||
*/
|
||||
__writeback_single_inode(inode, &wbc);
|
||||
|
||||
/* Report progress to inform the hung task detector of the progress. */
|
||||
if (work->done && work->done->progress_stamp &&
|
||||
(jiffies - work->done->progress_stamp) > HZ *
|
||||
sysctl_hung_task_timeout_secs / 2)
|
||||
wake_up_all(work->done->waitq);
|
||||
|
||||
wbc_detach_inode(&wbc);
|
||||
work->nr_pages -= write_chunk - wbc.nr_to_write;
|
||||
wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
|
||||
|
|
|
|||
|
|
@ -311,10 +311,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
|
|||
range_whole = 1;
|
||||
cycled = 1; /* ignore range_cyclic tests */
|
||||
}
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
tag = PAGECACHE_TAG_TOWRITE;
|
||||
else
|
||||
tag = PAGECACHE_TAG_DIRTY;
|
||||
tag = wbc_to_tag(wbc);
|
||||
|
||||
retry:
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
|
|
|
|||
|
|
@ -902,15 +902,8 @@ int ocfs2_journal_alloc(struct ocfs2_super *osb)
|
|||
|
||||
static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
|
||||
{
|
||||
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.nr_to_write = mapping->nrpages * 2,
|
||||
.range_start = jinode->i_dirty_start,
|
||||
.range_end = jinode->i_dirty_end,
|
||||
};
|
||||
|
||||
return filemap_fdatawrite_wbc(mapping, &wbc);
|
||||
return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
|
||||
jinode->i_dirty_start, jinode->i_dirty_end);
|
||||
}
|
||||
|
||||
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
|
||||
|
|
|
|||
|
|
@ -389,6 +389,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
|
|||
goto fail;
|
||||
if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink))
|
||||
goto fail;
|
||||
s->s_min_writeback_pages = MIN_WRITEBACK_PAGES;
|
||||
return s;
|
||||
|
||||
fail:
|
||||
|
|
|
|||
10
fs/sync.c
10
fs/sync.c
|
|
@ -281,14 +281,12 @@ int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
|
|||
}
|
||||
|
||||
if (flags & SYNC_FILE_RANGE_WRITE) {
|
||||
int sync_mode = WB_SYNC_NONE;
|
||||
|
||||
if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
|
||||
SYNC_FILE_RANGE_WRITE_AND_WAIT)
|
||||
sync_mode = WB_SYNC_ALL;
|
||||
|
||||
ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
|
||||
sync_mode);
|
||||
ret = filemap_fdatawrite_range(mapping, offset,
|
||||
endbyte);
|
||||
else
|
||||
ret = filemap_flush_range(mapping, offset, endbyte);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1204,6 +1204,7 @@ xfs_mount_zones(
|
|||
.mp = mp,
|
||||
};
|
||||
struct xfs_buftarg *bt = mp->m_rtdev_targp;
|
||||
xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks;
|
||||
int error;
|
||||
|
||||
if (!bt) {
|
||||
|
|
@ -1234,10 +1235,33 @@ xfs_mount_zones(
|
|||
return -ENOMEM;
|
||||
|
||||
xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
|
||||
mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks,
|
||||
mp->m_max_open_zones);
|
||||
mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones);
|
||||
trace_xfs_zones_mount(mp);
|
||||
|
||||
/*
|
||||
* The writeback code switches between inodes regularly to provide
|
||||
* fairness. The default lower bound is 4MiB, but for zoned file
|
||||
* systems we want to increase that both to reduce seeks, but also more
|
||||
* importantly so that workloads that writes files in a multiple of the
|
||||
* zone size do not get fragmented and require garbage collection when
|
||||
* they shouldn't. Increase is to the zone size capped by the max
|
||||
* extent len.
|
||||
*
|
||||
* Note that because s_min_writeback_pages is a superblock field, this
|
||||
* value also get applied to non-zoned files on the data device if
|
||||
* there are any. On typical zoned setup all data is on the RT device
|
||||
* because using the more efficient sequential write required zones
|
||||
* is the reason for using the zone allocator, and either the RT device
|
||||
* and the (meta)data device are on the same block device, or the
|
||||
* (meta)data device is on a fast SSD while the data on the RT device
|
||||
* is on a SMR HDD. In any combination of the above cases enforcing
|
||||
* the higher min_writeback_pages for non-RT inodes is either a noop
|
||||
* or beneficial.
|
||||
*/
|
||||
mp->m_super->s_min_writeback_pages =
|
||||
XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >>
|
||||
PAGE_SHIFT;
|
||||
|
||||
if (bdev_is_zoned(bt->bt_bdev)) {
|
||||
error = blkdev_report_zones(bt->bt_bdev,
|
||||
XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart),
|
||||
|
|
|
|||
|
|
@ -63,6 +63,8 @@ enum wb_reason {
|
|||
struct wb_completion {
|
||||
atomic_t cnt;
|
||||
wait_queue_head_t *waitq;
|
||||
unsigned long progress_stamp; /* The jiffies when slow progress is detected */
|
||||
unsigned long wait_start; /* The jiffies when waiting for the writeback work to finish */
|
||||
};
|
||||
|
||||
#define __WB_COMPLETION_INIT(_waitq) \
|
||||
|
|
|
|||
|
|
@ -1642,6 +1642,7 @@ struct super_block {
|
|||
|
||||
spinlock_t s_inode_wblist_lock;
|
||||
struct list_head s_inodes_wb; /* writeback inodes */
|
||||
long s_min_writeback_pages;
|
||||
} __randomize_layout;
|
||||
|
||||
static inline struct user_namespace *i_user_ns(const struct inode *inode)
|
||||
|
|
@ -3075,7 +3076,7 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
|
|||
extern int __must_check file_check_and_advance_wb_err(struct file *file);
|
||||
extern int __must_check file_write_and_wait_range(struct file *file,
|
||||
loff_t start, loff_t end);
|
||||
int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start,
|
||||
int filemap_flush_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end);
|
||||
|
||||
static inline int file_write_and_wait(struct file *file)
|
||||
|
|
@ -3112,8 +3113,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
|
|||
} else if (iocb->ki_flags & IOCB_DONTCACHE) {
|
||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||
|
||||
filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count,
|
||||
iocb->ki_pos - 1);
|
||||
filemap_flush_range(mapping, iocb->ki_pos - count,
|
||||
iocb->ki_pos - 1);
|
||||
}
|
||||
|
||||
return count;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ int filemap_invalidate_pages(struct address_space *mapping,
|
|||
int write_inode_now(struct inode *, int sync);
|
||||
int filemap_fdatawrite(struct address_space *);
|
||||
int filemap_flush(struct address_space *);
|
||||
int filemap_flush_nr(struct address_space *mapping, long *nr_to_write);
|
||||
int filemap_fdatawait_keep_errors(struct address_space *mapping);
|
||||
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
|
||||
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
|
||||
|
|
@ -53,14 +54,10 @@ static inline int filemap_fdatawait(struct address_space *mapping)
|
|||
bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
|
||||
int filemap_write_and_wait_range(struct address_space *mapping,
|
||||
loff_t lstart, loff_t lend);
|
||||
int __filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end, int sync_mode);
|
||||
int filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end);
|
||||
int filemap_check_errors(struct address_space *mapping);
|
||||
void __filemap_set_wb_err(struct address_space *mapping, int err);
|
||||
int filemap_fdatawrite_wbc(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
|
||||
|
||||
static inline int filemap_write_and_wait(struct address_space *mapping)
|
||||
|
|
|
|||
|
|
@ -189,6 +189,13 @@ void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
|
|||
void inode_wait_for_writeback(struct inode *inode);
|
||||
void inode_io_list_del(struct inode *inode);
|
||||
|
||||
static inline xa_mark_t wbc_to_tag(struct writeback_control *wbc)
|
||||
{
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
return PAGECACHE_TAG_TOWRITE;
|
||||
return PAGECACHE_TAG_DIRTY;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
||||
#include <linux/cgroup.h>
|
||||
|
|
@ -367,4 +374,9 @@ bool redirty_page_for_writepage(struct writeback_control *, struct page *);
|
|||
void sb_mark_inode_writeback(struct inode *inode);
|
||||
void sb_clear_inode_writeback(struct inode *inode);
|
||||
|
||||
/*
|
||||
* 4MB minimal write chunk size
|
||||
*/
|
||||
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
|
||||
|
||||
#endif /* WRITEBACK_H */
|
||||
|
|
|
|||
|
|
@ -111,8 +111,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
|||
spin_unlock(&file->f_lock);
|
||||
break;
|
||||
case POSIX_FADV_DONTNEED:
|
||||
__filemap_fdatawrite_range(mapping, offset, endbyte,
|
||||
WB_SYNC_NONE);
|
||||
filemap_flush_range(mapping, offset, endbyte);
|
||||
|
||||
/*
|
||||
* First and last FULL page! Partial pages are deliberately
|
||||
|
|
|
|||
115
mm/filemap.c
115
mm/filemap.c
|
|
@ -366,83 +366,60 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range
|
||||
* @mapping: address space structure to write
|
||||
* @wbc: the writeback_control controlling the writeout
|
||||
*
|
||||
* Call writepages on the mapping using the provided wbc to control the
|
||||
* writeout.
|
||||
*
|
||||
* Return: %0 on success, negative error code otherwise.
|
||||
*/
|
||||
int filemap_fdatawrite_wbc(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
static int filemap_writeback(struct address_space *mapping, loff_t start,
|
||||
loff_t end, enum writeback_sync_modes sync_mode,
|
||||
long *nr_to_write)
|
||||
{
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = sync_mode,
|
||||
.nr_to_write = nr_to_write ? *nr_to_write : LONG_MAX,
|
||||
.range_start = start,
|
||||
.range_end = end,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (!mapping_can_writeback(mapping) ||
|
||||
!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
|
||||
return 0;
|
||||
|
||||
wbc_attach_fdatawrite_inode(wbc, mapping->host);
|
||||
ret = do_writepages(mapping, wbc);
|
||||
wbc_detach_inode(wbc);
|
||||
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
|
||||
ret = do_writepages(mapping, &wbc);
|
||||
wbc_detach_inode(&wbc);
|
||||
|
||||
if (!ret && nr_to_write)
|
||||
*nr_to_write = wbc.nr_to_write;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_fdatawrite_wbc);
|
||||
|
||||
/**
|
||||
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
|
||||
* filemap_fdatawrite_range - start writeback on mapping dirty pages in range
|
||||
* @mapping: address space structure to write
|
||||
* @start: offset in bytes where the range starts
|
||||
* @end: offset in bytes where the range ends (inclusive)
|
||||
* @sync_mode: enable synchronous operation
|
||||
*
|
||||
* Start writeback against all of a mapping's dirty pages that lie
|
||||
* within the byte offsets <start, end> inclusive.
|
||||
*
|
||||
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
|
||||
* opposed to a regular memory cleansing writeback. The difference between
|
||||
* these two operations is that if a dirty page/buffer is encountered, it must
|
||||
* be waited upon, and not just skipped over.
|
||||
* This is a data integrity operation that waits upon dirty or in writeback
|
||||
* pages.
|
||||
*
|
||||
* Return: %0 on success, negative error code otherwise.
|
||||
*/
|
||||
int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end, int sync_mode)
|
||||
{
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = sync_mode,
|
||||
.nr_to_write = LONG_MAX,
|
||||
.range_start = start,
|
||||
.range_end = end,
|
||||
};
|
||||
|
||||
return filemap_fdatawrite_wbc(mapping, &wbc);
|
||||
}
|
||||
|
||||
static inline int __filemap_fdatawrite(struct address_space *mapping,
|
||||
int sync_mode)
|
||||
{
|
||||
return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
|
||||
}
|
||||
|
||||
int filemap_fdatawrite(struct address_space *mapping)
|
||||
{
|
||||
return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_fdatawrite);
|
||||
|
||||
int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end)
|
||||
loff_t end)
|
||||
{
|
||||
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
|
||||
return filemap_writeback(mapping, start, end, WB_SYNC_ALL, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_fdatawrite_range);
|
||||
|
||||
int filemap_fdatawrite(struct address_space *mapping)
|
||||
{
|
||||
return filemap_fdatawrite_range(mapping, 0, LLONG_MAX);
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_fdatawrite);
|
||||
|
||||
/**
|
||||
* filemap_fdatawrite_range_kick - start writeback on a range
|
||||
* filemap_flush_range - start writeback on a range
|
||||
* @mapping: target address_space
|
||||
* @start: index to start writeback on
|
||||
* @end: last (inclusive) index for writeback
|
||||
|
|
@ -452,12 +429,12 @@ EXPORT_SYMBOL(filemap_fdatawrite_range);
|
|||
*
|
||||
* Return: %0 on success, negative error code otherwise.
|
||||
*/
|
||||
int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start,
|
||||
int filemap_flush_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end)
|
||||
{
|
||||
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
|
||||
return filemap_writeback(mapping, start, end, WB_SYNC_NONE, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick);
|
||||
EXPORT_SYMBOL_GPL(filemap_flush_range);
|
||||
|
||||
/**
|
||||
* filemap_flush - mostly a non-blocking flush
|
||||
|
|
@ -470,10 +447,22 @@ EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick);
|
|||
*/
|
||||
int filemap_flush(struct address_space *mapping)
|
||||
{
|
||||
return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
|
||||
return filemap_flush_range(mapping, 0, LLONG_MAX);
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_flush);
|
||||
|
||||
/*
|
||||
* Start writeback on @nr_to_write pages from @mapping. No one but the existing
|
||||
* btrfs caller should be using this. Talk to linux-mm if you think adding a
|
||||
* new caller is a good idea.
|
||||
*/
|
||||
int filemap_flush_nr(struct address_space *mapping, long *nr_to_write)
|
||||
{
|
||||
return filemap_writeback(mapping, 0, LLONG_MAX, WB_SYNC_NONE,
|
||||
nr_to_write);
|
||||
}
|
||||
EXPORT_SYMBOL_FOR_MODULES(filemap_flush_nr, "btrfs");
|
||||
|
||||
/**
|
||||
* filemap_range_has_page - check if a page exists in range.
|
||||
* @mapping: address space within which to check
|
||||
|
|
@ -691,8 +680,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
|
|||
return 0;
|
||||
|
||||
if (mapping_needs_writeback(mapping)) {
|
||||
err = __filemap_fdatawrite_range(mapping, lstart, lend,
|
||||
WB_SYNC_ALL);
|
||||
err = filemap_fdatawrite_range(mapping, lstart, lend);
|
||||
/*
|
||||
* Even if the above returned error, the pages may be
|
||||
* written partially (e.g. -ENOSPC), so we wait for it.
|
||||
|
|
@ -794,8 +782,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
|
|||
return 0;
|
||||
|
||||
if (mapping_needs_writeback(mapping)) {
|
||||
err = __filemap_fdatawrite_range(mapping, lstart, lend,
|
||||
WB_SYNC_ALL);
|
||||
err = filemap_fdatawrite_range(mapping, lstart, lend);
|
||||
/* See comment of filemap_write_and_wait() */
|
||||
if (err != -EIO)
|
||||
__filemap_fdatawait_range(mapping, lstart, lend);
|
||||
|
|
@ -4528,16 +4515,8 @@ int filemap_invalidate_inode(struct inode *inode, bool flush,
|
|||
unmap_mapping_pages(mapping, first, nr, false);
|
||||
|
||||
/* Write back the data if we're asked to. */
|
||||
if (flush) {
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.nr_to_write = LONG_MAX,
|
||||
.range_start = start,
|
||||
.range_end = end,
|
||||
};
|
||||
|
||||
filemap_fdatawrite_wbc(mapping, &wbc);
|
||||
}
|
||||
if (flush)
|
||||
filemap_fdatawrite_range(mapping, start, end);
|
||||
|
||||
/* Wait for writeback to complete on all folios and discard. */
|
||||
invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE);
|
||||
|
|
|
|||
|
|
@ -2434,12 +2434,6 @@ static bool folio_prepare_writeback(struct address_space *mapping,
|
|||
return true;
|
||||
}
|
||||
|
||||
static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
|
||||
{
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
return PAGECACHE_TAG_TOWRITE;
|
||||
return PAGECACHE_TAG_DIRTY;
|
||||
}
|
||||
|
||||
static pgoff_t wbc_end(struct writeback_control *wbc)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue