mirror of https://github.com/torvalds/linux.git
vfs-6.19-rc1.iomap
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaSmOZAAKCRCRxhvAZXjc
ooCXAQCwzX2GS/55QHV6JXBBoNxguuSQ5dCj91ZmTfHzij0xNAEAhKEBw7iMGX72
c2/x+xYf+Pc6mAfxdus5RLMggqBFPAk=
=jInB
-----END PGP SIGNATURE-----
Merge tag 'vfs-6.19-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull iomap updates from Christian Brauner:
"FUSE iomap Support for Buffered Reads:
This adds iomap support for FUSE buffered reads and readahead. This
enables granular uptodate tracking with large folios so only
non-uptodate portions need to be read. Also fixes a race condition
with large folios + writeback cache that could cause data corruption
on partial writes followed by reads.
- Refactored iomap read/readahead bio logic into helpers
- Added caller-provided callbacks for read operations
- Moved buffered IO bio logic into new file
- FUSE now uses iomap for read_folio and readahead
Zero Range Folio Batch Support:
Add folio batch support for iomap_zero_range() to handle dirty
folios over unwritten mappings. Fix raciness issues where dirty data
could be lost during zero range operations.
- filemap_get_folios_tag_range() helper for dirty folio lookup
- Optional zero range dirty folio processing
- XFS fills dirty folios on zero range of unwritten mappings
- Removed old partial EOF zeroing optimization
DIO Write Completions from Interrupt Context:
Restore pre-iomap behavior where pure overwrite completions run
inline rather than being deferred to workqueue. Reduces context
switches for high-performance workloads like ScyllaDB.
- Removed unused IOCB_DIO_CALLER_COMP code
- Error completions always run in user context (fixes zonefs)
- Reworked REQ_FUA selection logic
- Inverted IOMAP_DIO_INLINE_COMP to IOMAP_DIO_OFFLOAD_COMP
Buffered IO Cleanups:
Some performance and code clarity improvements:
- Replace manual bitmap scanning with find_next_bit()
- Simplify read skip logic for writes
- Optimize pending async writeback accounting
- Better variable naming
- Documentation for iomap_finish_folio_write() requirements
Misaligned Vectors for Zoned XFS:
Enables sub-block aligned vectors in XFS always-COW mode for zoned
devices via new IOMAP_DIO_FSBLOCK_ALIGNED flag.
Bug Fixes:
- Allocate s_dio_done_wq for async reads (fixes syzbot report after
error completion changes)
- Fix iomap_read_end() for already uptodate folios (regression fix)"
* tag 'vfs-6.19-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (40 commits)
iomap: allocate s_dio_done_wq for async reads as well
iomap: fix iomap_read_end() for already uptodate folios
iomap: invert the polarity of IOMAP_DIO_INLINE_COMP
iomap: support write completions from interrupt context
iomap: rework REQ_FUA selection
iomap: always run error completions in user context
fs, iomap: remove IOCB_DIO_CALLER_COMP
iomap: use find_next_bit() for uptodate bitmap scanning
iomap: use find_next_bit() for dirty bitmap scanning
iomap: simplify when reads can be skipped for writes
iomap: simplify ->read_folio_range() error handling for reads
iomap: optimize pending async writeback accounting
docs: document iomap writeback's iomap_finish_folio_write() requirement
iomap: account for unaligned end offsets when truncating read range
iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted
xfs: support sub-block aligned vectors in always COW mode
iomap: add IOMAP_DIO_FSBLOCK_ALIGNED flag
xfs: error tag to force zeroing on debug kernels
iomap: remove old partial eof zeroing optimization
xfs: fill dirty folios on zero range of unwritten mappings
...
This commit is contained in:
commit
1885cdbfbb
|
|
@ -135,6 +135,27 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap:
|
||||||
|
|
||||||
* ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``.
|
* ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``.
|
||||||
|
|
||||||
|
``struct iomap_read_ops``
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
struct iomap_read_ops {
|
||||||
|
int (*read_folio_range)(const struct iomap_iter *iter,
|
||||||
|
struct iomap_read_folio_ctx *ctx, size_t len);
|
||||||
|
void (*submit_read)(struct iomap_read_folio_ctx *ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
iomap calls these functions:
|
||||||
|
|
||||||
|
- ``read_folio_range``: Called to read in the range. This must be provided
|
||||||
|
by the caller. If this succeeds, iomap_finish_folio_read() must be called
|
||||||
|
after the range is read in, regardless of whether the read succeeded or
|
||||||
|
failed.
|
||||||
|
|
||||||
|
- ``submit_read``: Submit any pending read requests. This function is
|
||||||
|
optional.
|
||||||
|
|
||||||
Internal per-Folio State
|
Internal per-Folio State
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
|
@ -182,6 +203,28 @@ The ``flags`` argument to ``->iomap_begin`` will be set to zero.
|
||||||
The pagecache takes whatever locks it needs before calling the
|
The pagecache takes whatever locks it needs before calling the
|
||||||
filesystem.
|
filesystem.
|
||||||
|
|
||||||
|
Both ``iomap_readahead`` and ``iomap_read_folio`` pass in a ``struct
|
||||||
|
iomap_read_folio_ctx``:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
struct iomap_read_folio_ctx {
|
||||||
|
const struct iomap_read_ops *ops;
|
||||||
|
struct folio *cur_folio;
|
||||||
|
struct readahead_control *rac;
|
||||||
|
void *read_ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
``iomap_readahead`` must set:
|
||||||
|
* ``ops->read_folio_range()`` and ``rac``
|
||||||
|
|
||||||
|
``iomap_read_folio`` must set:
|
||||||
|
* ``ops->read_folio_range()`` and ``cur_folio``
|
||||||
|
|
||||||
|
``ops->submit_read()`` and ``read_ctx`` are optional. ``read_ctx`` is used to
|
||||||
|
pass in any custom data the caller needs accessible in the ops callbacks for
|
||||||
|
fulfilling reads.
|
||||||
|
|
||||||
Buffered Writes
|
Buffered Writes
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
@ -317,6 +360,9 @@ The fields are as follows:
|
||||||
delalloc reservations to avoid having delalloc reservations for
|
delalloc reservations to avoid having delalloc reservations for
|
||||||
clean pagecache.
|
clean pagecache.
|
||||||
This function must be supplied by the filesystem.
|
This function must be supplied by the filesystem.
|
||||||
|
If this succeeds, iomap_finish_folio_write() must be called once writeback
|
||||||
|
completes for the range, regardless of whether the writeback succeeded or
|
||||||
|
failed.
|
||||||
|
|
||||||
- ``writeback_submit``: Submit the previous built writeback context.
|
- ``writeback_submit``: Submit the previous built writeback context.
|
||||||
Block based file systems should use the iomap_ioend_writeback_submit
|
Block based file systems should use the iomap_ioend_writeback_submit
|
||||||
|
|
@ -444,10 +490,6 @@ These ``struct kiocb`` flags are significant for direct I/O with iomap:
|
||||||
Only meaningful for asynchronous I/O, and only if the entire I/O can
|
Only meaningful for asynchronous I/O, and only if the entire I/O can
|
||||||
be issued as a single ``struct bio``.
|
be issued as a single ``struct bio``.
|
||||||
|
|
||||||
* ``IOCB_DIO_CALLER_COMP``: Try to run I/O completion from the caller's
|
|
||||||
process context.
|
|
||||||
See ``linux/fs.h`` for more details.
|
|
||||||
|
|
||||||
Filesystems should call ``iomap_dio_rw`` from ``->read_iter`` and
|
Filesystems should call ``iomap_dio_rw`` from ``->read_iter`` and
|
||||||
``->write_iter``, and set ``FMODE_CAN_ODIRECT`` in the ``->open``
|
``->write_iter``, and set ``FMODE_CAN_ODIRECT`` in the ``->open``
|
||||||
function for the file.
|
function for the file.
|
||||||
|
|
|
||||||
|
|
@ -540,12 +540,13 @@ const struct address_space_operations def_blk_aops = {
|
||||||
#else /* CONFIG_BUFFER_HEAD */
|
#else /* CONFIG_BUFFER_HEAD */
|
||||||
static int blkdev_read_folio(struct file *file, struct folio *folio)
|
static int blkdev_read_folio(struct file *file, struct folio *folio)
|
||||||
{
|
{
|
||||||
return iomap_read_folio(folio, &blkdev_iomap_ops);
|
iomap_bio_read_folio(folio, &blkdev_iomap_ops);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blkdev_readahead(struct readahead_control *rac)
|
static void blkdev_readahead(struct readahead_control *rac)
|
||||||
{
|
{
|
||||||
iomap_readahead(rac, &blkdev_iomap_ops);
|
iomap_bio_readahead(rac, &blkdev_iomap_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
|
static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
|
||||||
|
|
|
||||||
|
|
@ -227,12 +227,6 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||||
!(file->f_mode & FMODE_CAN_ODIRECT))
|
!(file->f_mode & FMODE_CAN_ODIRECT))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
|
||||||
* Stacked filesystems don't support deferred completions, don't copy
|
|
||||||
* this property in case it is set by the issuer.
|
|
||||||
*/
|
|
||||||
flags &= ~IOCB_DIO_CALLER_COMP;
|
|
||||||
|
|
||||||
old_cred = override_creds(ctx->cred);
|
old_cred = override_creds(ctx->cred);
|
||||||
if (is_sync_kiocb(iocb)) {
|
if (is_sync_kiocb(iocb)) {
|
||||||
rwf_t rwf = iocb_to_rw_flags(flags);
|
rwf_t rwf = iocb_to_rw_flags(flags);
|
||||||
|
|
|
||||||
30
fs/dax.c
30
fs/dax.c
|
|
@ -1507,7 +1507,7 @@ static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||||
|
|
||||||
/* already zeroed? we're done. */
|
/* already zeroed? we're done. */
|
||||||
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* invalidate the pages whose sharing state is to be changed
|
* invalidate the pages whose sharing state is to be changed
|
||||||
|
|
@ -1536,10 +1536,10 @@ static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = iomap_iter_advance(iter, &length);
|
ret = iomap_iter_advance(iter, length);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
} while (length > 0);
|
} while ((length = iomap_length(iter)) > 0);
|
||||||
|
|
||||||
if (did_zero)
|
if (did_zero)
|
||||||
*did_zero = true;
|
*did_zero = true;
|
||||||
|
|
@ -1597,7 +1597,7 @@ static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
|
||||||
|
|
||||||
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) {
|
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) {
|
||||||
done = iov_iter_zero(min(length, end - pos), iter);
|
done = iov_iter_zero(min(length, end - pos), iter);
|
||||||
return iomap_iter_advance(iomi, &done);
|
return iomap_iter_advance(iomi, done);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1681,12 +1681,12 @@ static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
|
||||||
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
|
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
|
||||||
map_len, iter);
|
map_len, iter);
|
||||||
|
|
||||||
length = xfer;
|
ret = iomap_iter_advance(iomi, xfer);
|
||||||
ret = iomap_iter_advance(iomi, &length);
|
|
||||||
if (!ret && xfer == 0)
|
if (!ret && xfer == 0)
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (xfer < map_len)
|
if (xfer < map_len)
|
||||||
break;
|
break;
|
||||||
|
length = iomap_length(iomi);
|
||||||
}
|
}
|
||||||
dax_read_unlock(id);
|
dax_read_unlock(id);
|
||||||
|
|
||||||
|
|
@ -1919,10 +1919,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, unsigned long *pfnp,
|
||||||
ret |= VM_FAULT_MAJOR;
|
ret |= VM_FAULT_MAJOR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(ret & VM_FAULT_ERROR)) {
|
if (!(ret & VM_FAULT_ERROR))
|
||||||
u64 length = PAGE_SIZE;
|
iter.status = iomap_iter_advance(&iter, PAGE_SIZE);
|
||||||
iter.status = iomap_iter_advance(&iter, &length);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iomap_errp)
|
if (iomap_errp)
|
||||||
|
|
@ -2034,10 +2032,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp,
|
||||||
continue; /* actually breaks out of the loop */
|
continue; /* actually breaks out of the loop */
|
||||||
|
|
||||||
ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
|
ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
|
||||||
if (ret != VM_FAULT_FALLBACK) {
|
if (ret != VM_FAULT_FALLBACK)
|
||||||
u64 length = PMD_SIZE;
|
iter.status = iomap_iter_advance(&iter, PMD_SIZE);
|
||||||
iter.status = iomap_iter_advance(&iter, &length);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_entry:
|
unlock_entry:
|
||||||
|
|
@ -2163,7 +2159,6 @@ static int dax_range_compare_iter(struct iomap_iter *it_src,
|
||||||
const struct iomap *smap = &it_src->iomap;
|
const struct iomap *smap = &it_src->iomap;
|
||||||
const struct iomap *dmap = &it_dest->iomap;
|
const struct iomap *dmap = &it_dest->iomap;
|
||||||
loff_t pos1 = it_src->pos, pos2 = it_dest->pos;
|
loff_t pos1 = it_src->pos, pos2 = it_dest->pos;
|
||||||
u64 dest_len;
|
|
||||||
void *saddr, *daddr;
|
void *saddr, *daddr;
|
||||||
int id, ret;
|
int id, ret;
|
||||||
|
|
||||||
|
|
@ -2196,10 +2191,9 @@ static int dax_range_compare_iter(struct iomap_iter *it_src,
|
||||||
dax_read_unlock(id);
|
dax_read_unlock(id);
|
||||||
|
|
||||||
advance:
|
advance:
|
||||||
dest_len = len;
|
ret = iomap_iter_advance(it_src, len);
|
||||||
ret = iomap_iter_advance(it_src, &len);
|
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = iomap_iter_advance(it_dest, &dest_len);
|
ret = iomap_iter_advance(it_dest, len);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
|
|
|
||||||
|
|
@ -371,7 +371,8 @@ static int erofs_read_folio(struct file *file, struct folio *folio)
|
||||||
{
|
{
|
||||||
trace_erofs_read_folio(folio, true);
|
trace_erofs_read_folio(folio, true);
|
||||||
|
|
||||||
return iomap_read_folio(folio, &erofs_iomap_ops);
|
iomap_bio_read_folio(folio, &erofs_iomap_ops);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void erofs_readahead(struct readahead_control *rac)
|
static void erofs_readahead(struct readahead_control *rac)
|
||||||
|
|
@ -379,7 +380,7 @@ static void erofs_readahead(struct readahead_control *rac)
|
||||||
trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
|
trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
|
||||||
readahead_count(rac), true);
|
readahead_count(rac), true);
|
||||||
|
|
||||||
return iomap_readahead(rac, &erofs_iomap_ops);
|
iomap_bio_readahead(rac, &erofs_iomap_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
|
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
|
||||||
|
|
|
||||||
|
|
@ -1192,7 +1192,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
|
||||||
if (attr->blksize != 0)
|
if (attr->blksize != 0)
|
||||||
blkbits = ilog2(attr->blksize);
|
blkbits = ilog2(attr->blksize);
|
||||||
else
|
else
|
||||||
blkbits = fc->blkbits;
|
blkbits = inode->i_sb->s_blocksize_bits;
|
||||||
|
|
||||||
stat->blksize = 1 << blkbits;
|
stat->blksize = 1 << blkbits;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
286
fs/fuse/file.c
286
fs/fuse/file.c
|
|
@ -834,23 +834,142 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||||
|
unsigned int flags, struct iomap *iomap,
|
||||||
|
struct iomap *srcmap)
|
||||||
|
{
|
||||||
|
iomap->type = IOMAP_MAPPED;
|
||||||
|
iomap->length = length;
|
||||||
|
iomap->offset = offset;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct iomap_ops fuse_iomap_ops = {
|
||||||
|
.iomap_begin = fuse_iomap_begin,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct fuse_fill_read_data {
|
||||||
|
struct file *file;
|
||||||
|
|
||||||
|
/* Fields below are used if sending the read request asynchronously */
|
||||||
|
struct fuse_conn *fc;
|
||||||
|
struct fuse_io_args *ia;
|
||||||
|
unsigned int nr_bytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* forward declarations */
|
||||||
|
static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
|
||||||
|
unsigned len, struct fuse_args_pages *ap,
|
||||||
|
unsigned cur_bytes, bool write);
|
||||||
|
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
|
||||||
|
unsigned int count, bool async);
|
||||||
|
|
||||||
|
static int fuse_handle_readahead(struct folio *folio,
|
||||||
|
struct readahead_control *rac,
|
||||||
|
struct fuse_fill_read_data *data, loff_t pos,
|
||||||
|
size_t len)
|
||||||
|
{
|
||||||
|
struct fuse_io_args *ia = data->ia;
|
||||||
|
size_t off = offset_in_folio(folio, pos);
|
||||||
|
struct fuse_conn *fc = data->fc;
|
||||||
|
struct fuse_args_pages *ap;
|
||||||
|
unsigned int nr_pages;
|
||||||
|
|
||||||
|
if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
|
||||||
|
false)) {
|
||||||
|
fuse_send_readpages(ia, data->file, data->nr_bytes,
|
||||||
|
fc->async_read);
|
||||||
|
data->nr_bytes = 0;
|
||||||
|
data->ia = NULL;
|
||||||
|
ia = NULL;
|
||||||
|
}
|
||||||
|
if (!ia) {
|
||||||
|
if (fc->num_background >= fc->congestion_threshold &&
|
||||||
|
rac->ra->async_size >= readahead_count(rac))
|
||||||
|
/*
|
||||||
|
* Congested and only async pages left, so skip the
|
||||||
|
* rest.
|
||||||
|
*/
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
nr_pages = min(fc->max_pages, readahead_count(rac));
|
||||||
|
data->ia = fuse_io_alloc(NULL, nr_pages);
|
||||||
|
if (!data->ia)
|
||||||
|
return -ENOMEM;
|
||||||
|
ia = data->ia;
|
||||||
|
}
|
||||||
|
folio_get(folio);
|
||||||
|
ap = &ia->ap;
|
||||||
|
ap->folios[ap->num_folios] = folio;
|
||||||
|
ap->descs[ap->num_folios].offset = off;
|
||||||
|
ap->descs[ap->num_folios].length = len;
|
||||||
|
data->nr_bytes += len;
|
||||||
|
ap->num_folios++;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
|
||||||
|
struct iomap_read_folio_ctx *ctx,
|
||||||
|
size_t len)
|
||||||
|
{
|
||||||
|
struct fuse_fill_read_data *data = ctx->read_ctx;
|
||||||
|
struct folio *folio = ctx->cur_folio;
|
||||||
|
loff_t pos = iter->pos;
|
||||||
|
size_t off = offset_in_folio(folio, pos);
|
||||||
|
struct file *file = data->file;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (ctx->rac) {
|
||||||
|
ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* for non-readahead read requests, do reads synchronously
|
||||||
|
* since it's not guaranteed that the server can handle
|
||||||
|
* out-of-order reads
|
||||||
|
*/
|
||||||
|
ret = fuse_do_readfolio(file, folio, off, len);
|
||||||
|
if (!ret)
|
||||||
|
iomap_finish_folio_read(folio, off, len, ret);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct fuse_fill_read_data *data = ctx->read_ctx;
|
||||||
|
|
||||||
|
if (data->ia)
|
||||||
|
fuse_send_readpages(data->ia, data->file, data->nr_bytes,
|
||||||
|
data->fc->async_read);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct iomap_read_ops fuse_iomap_read_ops = {
|
||||||
|
.read_folio_range = fuse_iomap_read_folio_range_async,
|
||||||
|
.submit_read = fuse_iomap_read_submit,
|
||||||
|
};
|
||||||
|
|
||||||
static int fuse_read_folio(struct file *file, struct folio *folio)
|
static int fuse_read_folio(struct file *file, struct folio *folio)
|
||||||
{
|
{
|
||||||
struct inode *inode = folio->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
int err;
|
struct fuse_fill_read_data data = {
|
||||||
|
.file = file,
|
||||||
|
};
|
||||||
|
struct iomap_read_folio_ctx ctx = {
|
||||||
|
.cur_folio = folio,
|
||||||
|
.ops = &fuse_iomap_read_ops,
|
||||||
|
.read_ctx = &data,
|
||||||
|
|
||||||
err = -EIO;
|
};
|
||||||
if (fuse_is_bad(inode))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
err = fuse_do_readfolio(file, folio, 0, folio_size(folio));
|
if (fuse_is_bad(inode)) {
|
||||||
if (!err)
|
folio_unlock(folio);
|
||||||
folio_mark_uptodate(folio);
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
iomap_read_folio(&fuse_iomap_ops, &ctx);
|
||||||
fuse_invalidate_atime(inode);
|
fuse_invalidate_atime(inode);
|
||||||
out:
|
return 0;
|
||||||
folio_unlock(folio);
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
|
static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
|
||||||
|
|
@ -887,7 +1006,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
|
||||||
fuse_invalidate_atime(inode);
|
fuse_invalidate_atime(inode);
|
||||||
|
|
||||||
for (i = 0; i < ap->num_folios; i++) {
|
for (i = 0; i < ap->num_folios; i++) {
|
||||||
folio_end_read(ap->folios[i], !err);
|
iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
|
||||||
|
ap->descs[i].length, err);
|
||||||
folio_put(ap->folios[i]);
|
folio_put(ap->folios[i]);
|
||||||
}
|
}
|
||||||
if (ia->ff)
|
if (ia->ff)
|
||||||
|
|
@ -897,7 +1017,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
|
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
|
||||||
unsigned int count)
|
unsigned int count, bool async)
|
||||||
{
|
{
|
||||||
struct fuse_file *ff = file->private_data;
|
struct fuse_file *ff = file->private_data;
|
||||||
struct fuse_mount *fm = ff->fm;
|
struct fuse_mount *fm = ff->fm;
|
||||||
|
|
@ -919,7 +1039,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
|
||||||
|
|
||||||
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
|
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
|
||||||
ia->read.attr_ver = fuse_get_attr_version(fm->fc);
|
ia->read.attr_ver = fuse_get_attr_version(fm->fc);
|
||||||
if (fm->fc->async_read) {
|
if (async) {
|
||||||
ia->ff = fuse_file_get(ff);
|
ia->ff = fuse_file_get(ff);
|
||||||
ap->args.end = fuse_readpages_end;
|
ap->args.end = fuse_readpages_end;
|
||||||
err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
|
err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
|
||||||
|
|
@ -936,81 +1056,20 @@ static void fuse_readahead(struct readahead_control *rac)
|
||||||
{
|
{
|
||||||
struct inode *inode = rac->mapping->host;
|
struct inode *inode = rac->mapping->host;
|
||||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||||
unsigned int max_pages, nr_pages;
|
struct fuse_fill_read_data data = {
|
||||||
struct folio *folio = NULL;
|
.file = rac->file,
|
||||||
|
.fc = fc,
|
||||||
|
};
|
||||||
|
struct iomap_read_folio_ctx ctx = {
|
||||||
|
.ops = &fuse_iomap_read_ops,
|
||||||
|
.rac = rac,
|
||||||
|
.read_ctx = &data
|
||||||
|
};
|
||||||
|
|
||||||
if (fuse_is_bad(inode))
|
if (fuse_is_bad(inode))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
max_pages = min_t(unsigned int, fc->max_pages,
|
iomap_readahead(&fuse_iomap_ops, &ctx);
|
||||||
fc->max_read / PAGE_SIZE);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is only accurate the first time through, since readahead_folio()
|
|
||||||
* doesn't update readahead_count() from the previous folio until the
|
|
||||||
* next call. Grab nr_pages here so we know how many pages we're going
|
|
||||||
* to have to process. This means that we will exit here with
|
|
||||||
* readahead_count() == folio_nr_pages(last_folio), but we will have
|
|
||||||
* consumed all of the folios, and read_pages() will call
|
|
||||||
* readahead_folio() again which will clean up the rac.
|
|
||||||
*/
|
|
||||||
nr_pages = readahead_count(rac);
|
|
||||||
|
|
||||||
while (nr_pages) {
|
|
||||||
struct fuse_io_args *ia;
|
|
||||||
struct fuse_args_pages *ap;
|
|
||||||
unsigned cur_pages = min(max_pages, nr_pages);
|
|
||||||
unsigned int pages = 0;
|
|
||||||
|
|
||||||
if (fc->num_background >= fc->congestion_threshold &&
|
|
||||||
rac->ra->async_size >= readahead_count(rac))
|
|
||||||
/*
|
|
||||||
* Congested and only async pages left, so skip the
|
|
||||||
* rest.
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
|
|
||||||
ia = fuse_io_alloc(NULL, cur_pages);
|
|
||||||
if (!ia)
|
|
||||||
break;
|
|
||||||
ap = &ia->ap;
|
|
||||||
|
|
||||||
while (pages < cur_pages) {
|
|
||||||
unsigned int folio_pages;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This returns a folio with a ref held on it.
|
|
||||||
* The ref needs to be held until the request is
|
|
||||||
* completed, since the splice case (see
|
|
||||||
* fuse_try_move_page()) drops the ref after it's
|
|
||||||
* replaced in the page cache.
|
|
||||||
*/
|
|
||||||
if (!folio)
|
|
||||||
folio = __readahead_folio(rac);
|
|
||||||
|
|
||||||
folio_pages = folio_nr_pages(folio);
|
|
||||||
if (folio_pages > cur_pages - pages) {
|
|
||||||
/*
|
|
||||||
* Large folios belonging to fuse will never
|
|
||||||
* have more pages than max_pages.
|
|
||||||
*/
|
|
||||||
WARN_ON(!pages);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ap->folios[ap->num_folios] = folio;
|
|
||||||
ap->descs[ap->num_folios].length = folio_size(folio);
|
|
||||||
ap->num_folios++;
|
|
||||||
pages += folio_pages;
|
|
||||||
folio = NULL;
|
|
||||||
}
|
|
||||||
fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
|
|
||||||
nr_pages -= pages;
|
|
||||||
}
|
|
||||||
if (folio) {
|
|
||||||
folio_end_read(folio, false);
|
|
||||||
folio_put(folio);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||||
|
|
@ -1397,20 +1456,6 @@ static const struct iomap_write_ops fuse_iomap_write_ops = {
|
||||||
.read_folio_range = fuse_iomap_read_folio_range,
|
.read_folio_range = fuse_iomap_read_folio_range,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|
||||||
unsigned int flags, struct iomap *iomap,
|
|
||||||
struct iomap *srcmap)
|
|
||||||
{
|
|
||||||
iomap->type = IOMAP_MAPPED;
|
|
||||||
iomap->length = length;
|
|
||||||
iomap->offset = offset;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct iomap_ops fuse_iomap_ops = {
|
|
||||||
.iomap_begin = fuse_iomap_begin,
|
|
||||||
};
|
|
||||||
|
|
||||||
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
{
|
{
|
||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
|
|
@ -1834,7 +1879,8 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
|
||||||
* scope of the fi->lock alleviates xarray lock
|
* scope of the fi->lock alleviates xarray lock
|
||||||
* contention and noticeably improves performance.
|
* contention and noticeably improves performance.
|
||||||
*/
|
*/
|
||||||
iomap_finish_folio_write(inode, ap->folios[i], 1);
|
iomap_finish_folio_write(inode, ap->folios[i],
|
||||||
|
ap->descs[i].length);
|
||||||
|
|
||||||
wake_up(&fi->page_waitq);
|
wake_up(&fi->page_waitq);
|
||||||
}
|
}
|
||||||
|
|
@ -2047,7 +2093,7 @@ struct fuse_fill_wb_data {
|
||||||
struct fuse_file *ff;
|
struct fuse_file *ff;
|
||||||
unsigned int max_folios;
|
unsigned int max_folios;
|
||||||
/*
|
/*
|
||||||
* nr_bytes won't overflow since fuse_writepage_need_send() caps
|
* nr_bytes won't overflow since fuse_folios_need_send() caps
|
||||||
* wb requests to never exceed fc->max_pages (which has an upper bound
|
* wb requests to never exceed fc->max_pages (which has an upper bound
|
||||||
* of U16_MAX).
|
* of U16_MAX).
|
||||||
*/
|
*/
|
||||||
|
|
@ -2092,14 +2138,15 @@ static void fuse_writepages_send(struct inode *inode,
|
||||||
spin_unlock(&fi->lock);
|
spin_unlock(&fi->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
|
static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
|
||||||
unsigned len, struct fuse_args_pages *ap,
|
unsigned len, struct fuse_args_pages *ap,
|
||||||
struct fuse_fill_wb_data *data)
|
unsigned cur_bytes, bool write)
|
||||||
{
|
{
|
||||||
struct folio *prev_folio;
|
struct folio *prev_folio;
|
||||||
struct fuse_folio_desc prev_desc;
|
struct fuse_folio_desc prev_desc;
|
||||||
unsigned bytes = data->nr_bytes + len;
|
unsigned bytes = cur_bytes + len;
|
||||||
loff_t prev_pos;
|
loff_t prev_pos;
|
||||||
|
size_t max_bytes = write ? fc->max_write : fc->max_read;
|
||||||
|
|
||||||
WARN_ON(!ap->num_folios);
|
WARN_ON(!ap->num_folios);
|
||||||
|
|
||||||
|
|
@ -2107,8 +2154,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
|
||||||
if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
|
if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Reached max write bytes */
|
if (bytes > max_bytes)
|
||||||
if (bytes > fc->max_write)
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Discontinuity */
|
/* Discontinuity */
|
||||||
|
|
@ -2118,11 +2164,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
|
||||||
if (prev_pos != pos)
|
if (prev_pos != pos)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Need to grow the pages array? If so, did the expansion fail? */
|
|
||||||
if (ap->num_folios == data->max_folios &&
|
|
||||||
!fuse_pages_realloc(data, fc->max_pages))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2146,10 +2187,24 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
|
if (wpa) {
|
||||||
fuse_writepages_send(inode, data);
|
bool send = fuse_folios_need_send(fc, pos, len, ap,
|
||||||
data->wpa = NULL;
|
data->nr_bytes, true);
|
||||||
data->nr_bytes = 0;
|
|
||||||
|
if (!send) {
|
||||||
|
/*
|
||||||
|
* Need to grow the pages array? If so, did the
|
||||||
|
* expansion fail?
|
||||||
|
*/
|
||||||
|
send = (ap->num_folios == data->max_folios) &&
|
||||||
|
!fuse_pages_realloc(data, fc->max_pages);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (send) {
|
||||||
|
fuse_writepages_send(inode, data);
|
||||||
|
data->wpa = NULL;
|
||||||
|
data->nr_bytes = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data->wpa == NULL) {
|
if (data->wpa == NULL) {
|
||||||
|
|
@ -2161,7 +2216,6 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
|
||||||
ap = &wpa->ia.ap;
|
ap = &wpa->ia.ap;
|
||||||
}
|
}
|
||||||
|
|
||||||
iomap_start_folio_write(inode, folio, 1);
|
|
||||||
fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
|
fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
|
||||||
offset, len);
|
offset, len);
|
||||||
data->nr_bytes += len;
|
data->nr_bytes += len;
|
||||||
|
|
|
||||||
|
|
@ -981,14 +981,6 @@ struct fuse_conn {
|
||||||
/* Request timeout (in jiffies). 0 = no timeout */
|
/* Request timeout (in jiffies). 0 = no timeout */
|
||||||
unsigned int req_timeout;
|
unsigned int req_timeout;
|
||||||
} timeout;
|
} timeout;
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a workaround until fuse uses iomap for reads.
|
|
||||||
* For fuseblk servers, this represents the blocksize passed in at
|
|
||||||
* mount time and for regular fuse servers, this is equivalent to
|
|
||||||
* inode->i_blkbits.
|
|
||||||
*/
|
|
||||||
u8 blkbits;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -291,7 +291,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
|
||||||
if (attr->blksize)
|
if (attr->blksize)
|
||||||
fi->cached_i_blkbits = ilog2(attr->blksize);
|
fi->cached_i_blkbits = ilog2(attr->blksize);
|
||||||
else
|
else
|
||||||
fi->cached_i_blkbits = fc->blkbits;
|
fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't set the sticky bit in i_mode, unless we want the VFS
|
* Don't set the sticky bit in i_mode, unless we want the VFS
|
||||||
|
|
@ -1838,22 +1838,11 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
if (!sb_set_blocksize(sb, ctx->blksize))
|
if (!sb_set_blocksize(sb, ctx->blksize))
|
||||||
goto err;
|
goto err;
|
||||||
/*
|
|
||||||
* This is a workaround until fuse hooks into iomap for reads.
|
|
||||||
* Use PAGE_SIZE for the blocksize else if the writeback cache
|
|
||||||
* is enabled, buffered writes go through iomap and a read may
|
|
||||||
* overwrite partially written data if blocksize < PAGE_SIZE
|
|
||||||
*/
|
|
||||||
fc->blkbits = sb->s_blocksize_bits;
|
|
||||||
if (ctx->blksize != PAGE_SIZE &&
|
|
||||||
!sb_set_blocksize(sb, PAGE_SIZE))
|
|
||||||
goto err;
|
|
||||||
#endif
|
#endif
|
||||||
fc->sync_fs = 1;
|
fc->sync_fs = 1;
|
||||||
} else {
|
} else {
|
||||||
sb->s_blocksize = PAGE_SIZE;
|
sb->s_blocksize = PAGE_SIZE;
|
||||||
sb->s_blocksize_bits = PAGE_SHIFT;
|
sb->s_blocksize_bits = PAGE_SHIFT;
|
||||||
fc->blkbits = sb->s_blocksize_bits;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->s_subtype = ctx->subtype;
|
sb->s_subtype = ctx->subtype;
|
||||||
|
|
|
||||||
|
|
@ -424,11 +424,11 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
|
||||||
struct inode *inode = folio->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
struct gfs2_inode *ip = GFS2_I(inode);
|
struct gfs2_inode *ip = GFS2_I(inode);
|
||||||
struct gfs2_sbd *sdp = GFS2_SB(inode);
|
struct gfs2_sbd *sdp = GFS2_SB(inode);
|
||||||
int error;
|
int error = 0;
|
||||||
|
|
||||||
if (!gfs2_is_jdata(ip) ||
|
if (!gfs2_is_jdata(ip) ||
|
||||||
(i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
|
(i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
|
||||||
error = iomap_read_folio(folio, &gfs2_iomap_ops);
|
iomap_bio_read_folio(folio, &gfs2_iomap_ops);
|
||||||
} else if (gfs2_is_stuffed(ip)) {
|
} else if (gfs2_is_stuffed(ip)) {
|
||||||
error = stuffed_read_folio(ip, folio);
|
error = stuffed_read_folio(ip, folio);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -503,7 +503,7 @@ static void gfs2_readahead(struct readahead_control *rac)
|
||||||
else if (gfs2_is_jdata(ip))
|
else if (gfs2_is_jdata(ip))
|
||||||
mpage_readahead(rac, gfs2_block_map);
|
mpage_readahead(rac, gfs2_block_map);
|
||||||
else
|
else
|
||||||
iomap_readahead(rac, &gfs2_iomap_ops);
|
iomap_bio_readahead(rac, &gfs2_iomap_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -14,5 +14,6 @@ iomap-y += trace.o \
|
||||||
iomap-$(CONFIG_BLOCK) += direct-io.o \
|
iomap-$(CONFIG_BLOCK) += direct-io.o \
|
||||||
ioend.o \
|
ioend.o \
|
||||||
fiemap.o \
|
fiemap.o \
|
||||||
seek.o
|
seek.o \
|
||||||
|
bio.o
|
||||||
iomap-$(CONFIG_SWAP) += swapfile.o
|
iomap-$(CONFIG_SWAP) += swapfile.o
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2010 Red Hat, Inc.
|
||||||
|
* Copyright (C) 2016-2023 Christoph Hellwig.
|
||||||
|
*/
|
||||||
|
#include <linux/iomap.h>
|
||||||
|
#include <linux/pagemap.h>
|
||||||
|
#include "internal.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
|
static void iomap_read_end_io(struct bio *bio)
|
||||||
|
{
|
||||||
|
int error = blk_status_to_errno(bio->bi_status);
|
||||||
|
struct folio_iter fi;
|
||||||
|
|
||||||
|
bio_for_each_folio_all(fi, bio)
|
||||||
|
iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error);
|
||||||
|
bio_put(bio);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct bio *bio = ctx->read_ctx;
|
||||||
|
|
||||||
|
if (bio)
|
||||||
|
submit_bio(bio);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int iomap_bio_read_folio_range(const struct iomap_iter *iter,
|
||||||
|
struct iomap_read_folio_ctx *ctx, size_t plen)
|
||||||
|
{
|
||||||
|
struct folio *folio = ctx->cur_folio;
|
||||||
|
const struct iomap *iomap = &iter->iomap;
|
||||||
|
loff_t pos = iter->pos;
|
||||||
|
size_t poff = offset_in_folio(folio, pos);
|
||||||
|
loff_t length = iomap_length(iter);
|
||||||
|
sector_t sector;
|
||||||
|
struct bio *bio = ctx->read_ctx;
|
||||||
|
|
||||||
|
sector = iomap_sector(iomap, pos);
|
||||||
|
if (!bio || bio_end_sector(bio) != sector ||
|
||||||
|
!bio_add_folio(bio, folio, plen, poff)) {
|
||||||
|
gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
|
||||||
|
gfp_t orig_gfp = gfp;
|
||||||
|
unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
|
||||||
|
|
||||||
|
if (bio)
|
||||||
|
submit_bio(bio);
|
||||||
|
|
||||||
|
if (ctx->rac) /* same as readahead_gfp_mask */
|
||||||
|
gfp |= __GFP_NORETRY | __GFP_NOWARN;
|
||||||
|
bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ,
|
||||||
|
gfp);
|
||||||
|
/*
|
||||||
|
* If the bio_alloc fails, try it again for a single page to
|
||||||
|
* avoid having to deal with partial page reads. This emulates
|
||||||
|
* what do_mpage_read_folio does.
|
||||||
|
*/
|
||||||
|
if (!bio)
|
||||||
|
bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp);
|
||||||
|
if (ctx->rac)
|
||||||
|
bio->bi_opf |= REQ_RAHEAD;
|
||||||
|
bio->bi_iter.bi_sector = sector;
|
||||||
|
bio->bi_end_io = iomap_read_end_io;
|
||||||
|
bio_add_folio_nofail(bio, folio, plen, poff);
|
||||||
|
ctx->read_ctx = bio;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct iomap_read_ops iomap_bio_read_ops = {
|
||||||
|
.read_folio_range = iomap_bio_read_folio_range,
|
||||||
|
.submit_read = iomap_bio_submit_read,
|
||||||
|
};
|
||||||
|
EXPORT_SYMBOL_GPL(iomap_bio_read_ops);
|
||||||
|
|
||||||
|
int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter,
|
||||||
|
struct folio *folio, loff_t pos, size_t len)
|
||||||
|
{
|
||||||
|
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||||
|
struct bio_vec bvec;
|
||||||
|
struct bio bio;
|
||||||
|
|
||||||
|
bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ);
|
||||||
|
bio.bi_iter.bi_sector = iomap_sector(srcmap, pos);
|
||||||
|
bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos));
|
||||||
|
return submit_bio_wait(&bio);
|
||||||
|
}
|
||||||
|
|
@ -8,6 +8,7 @@
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
#include <linux/migrate.h>
|
#include <linux/migrate.h>
|
||||||
|
#include "internal.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
#include "../internal.h"
|
#include "../internal.h"
|
||||||
|
|
@ -37,10 +38,28 @@ static inline bool ifs_is_fully_uptodate(struct folio *folio,
|
||||||
return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio));
|
return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs,
|
/*
|
||||||
unsigned int block)
|
* Find the next uptodate block in the folio. end_blk is inclusive.
|
||||||
|
* If no uptodate block is found, this will return end_blk + 1.
|
||||||
|
*/
|
||||||
|
static unsigned ifs_next_uptodate_block(struct folio *folio,
|
||||||
|
unsigned start_blk, unsigned end_blk)
|
||||||
{
|
{
|
||||||
return test_bit(block, ifs->state);
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
|
|
||||||
|
return find_next_bit(ifs->state, end_blk + 1, start_blk);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the next non-uptodate block in the folio. end_blk is inclusive.
|
||||||
|
* If no non-uptodate block is found, this will return end_blk + 1.
|
||||||
|
*/
|
||||||
|
static unsigned ifs_next_nonuptodate_block(struct folio *folio,
|
||||||
|
unsigned start_blk, unsigned end_blk)
|
||||||
|
{
|
||||||
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
|
|
||||||
|
return find_next_zero_bit(ifs->state, end_blk + 1, start_blk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ifs_set_range_uptodate(struct folio *folio,
|
static bool ifs_set_range_uptodate(struct folio *folio,
|
||||||
|
|
@ -75,13 +94,34 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
|
||||||
folio_mark_uptodate(folio);
|
folio_mark_uptodate(folio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ifs_block_is_dirty(struct folio *folio,
|
/*
|
||||||
struct iomap_folio_state *ifs, int block)
|
* Find the next dirty block in the folio. end_blk is inclusive.
|
||||||
|
* If no dirty block is found, this will return end_blk + 1.
|
||||||
|
*/
|
||||||
|
static unsigned ifs_next_dirty_block(struct folio *folio,
|
||||||
|
unsigned start_blk, unsigned end_blk)
|
||||||
{
|
{
|
||||||
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
struct inode *inode = folio->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
|
unsigned int blks = i_blocks_per_folio(inode, folio);
|
||||||
|
|
||||||
return test_bit(block + blks_per_folio, ifs->state);
|
return find_next_bit(ifs->state, blks + end_blk + 1,
|
||||||
|
blks + start_blk) - blks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the next clean block in the folio. end_blk is inclusive.
|
||||||
|
* If no clean block is found, this will return end_blk + 1.
|
||||||
|
*/
|
||||||
|
static unsigned ifs_next_clean_block(struct folio *folio,
|
||||||
|
unsigned start_blk, unsigned end_blk)
|
||||||
|
{
|
||||||
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
|
struct inode *inode = folio->mapping->host;
|
||||||
|
unsigned int blks = i_blocks_per_folio(inode, folio);
|
||||||
|
|
||||||
|
return find_next_zero_bit(ifs->state, blks + end_blk + 1,
|
||||||
|
blks + start_blk) - blks;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned ifs_find_dirty_range(struct folio *folio,
|
static unsigned ifs_find_dirty_range(struct folio *folio,
|
||||||
|
|
@ -92,18 +132,17 @@ static unsigned ifs_find_dirty_range(struct folio *folio,
|
||||||
offset_in_folio(folio, *range_start) >> inode->i_blkbits;
|
offset_in_folio(folio, *range_start) >> inode->i_blkbits;
|
||||||
unsigned end_blk = min_not_zero(
|
unsigned end_blk = min_not_zero(
|
||||||
offset_in_folio(folio, range_end) >> inode->i_blkbits,
|
offset_in_folio(folio, range_end) >> inode->i_blkbits,
|
||||||
i_blocks_per_folio(inode, folio));
|
i_blocks_per_folio(inode, folio)) - 1;
|
||||||
unsigned nblks = 1;
|
unsigned nblks;
|
||||||
|
|
||||||
while (!ifs_block_is_dirty(folio, ifs, start_blk))
|
start_blk = ifs_next_dirty_block(folio, start_blk, end_blk);
|
||||||
if (++start_blk == end_blk)
|
if (start_blk > end_blk)
|
||||||
return 0;
|
return 0;
|
||||||
|
if (start_blk == end_blk)
|
||||||
while (start_blk + nblks < end_blk) {
|
nblks = 1;
|
||||||
if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks))
|
else
|
||||||
break;
|
nblks = ifs_next_clean_block(folio, start_blk + 1, end_blk) -
|
||||||
nblks++;
|
start_blk;
|
||||||
}
|
|
||||||
|
|
||||||
*range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
|
*range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
|
||||||
return nblks << inode->i_blkbits;
|
return nblks << inode->i_blkbits;
|
||||||
|
|
@ -217,6 +256,22 @@ static void ifs_free(struct folio *folio)
|
||||||
kfree(ifs);
|
kfree(ifs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate how many bytes to truncate based off the number of blocks to
|
||||||
|
* truncate and the end position to start truncating from.
|
||||||
|
*/
|
||||||
|
static size_t iomap_bytes_to_truncate(loff_t end_pos, unsigned block_bits,
|
||||||
|
unsigned blocks_truncated)
|
||||||
|
{
|
||||||
|
unsigned block_size = 1 << block_bits;
|
||||||
|
unsigned block_offset = end_pos & (block_size - 1);
|
||||||
|
|
||||||
|
if (!block_offset)
|
||||||
|
return blocks_truncated << block_bits;
|
||||||
|
|
||||||
|
return ((blocks_truncated - 1) << block_bits) + block_offset;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate the range inside the folio that we actually need to read.
|
* Calculate the range inside the folio that we actually need to read.
|
||||||
*/
|
*/
|
||||||
|
|
@ -240,24 +295,29 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
|
||||||
* to avoid reading in already uptodate ranges.
|
* to avoid reading in already uptodate ranges.
|
||||||
*/
|
*/
|
||||||
if (ifs) {
|
if (ifs) {
|
||||||
unsigned int i;
|
unsigned int next, blocks_skipped;
|
||||||
|
|
||||||
/* move forward for each leading block marked uptodate */
|
next = ifs_next_nonuptodate_block(folio, first, last);
|
||||||
for (i = first; i <= last; i++) {
|
blocks_skipped = next - first;
|
||||||
if (!ifs_block_is_uptodate(ifs, i))
|
|
||||||
break;
|
if (blocks_skipped) {
|
||||||
*pos += block_size;
|
unsigned long block_offset = *pos & (block_size - 1);
|
||||||
poff += block_size;
|
unsigned bytes_skipped =
|
||||||
plen -= block_size;
|
(blocks_skipped << block_bits) - block_offset;
|
||||||
first++;
|
|
||||||
|
*pos += bytes_skipped;
|
||||||
|
poff += bytes_skipped;
|
||||||
|
plen -= bytes_skipped;
|
||||||
}
|
}
|
||||||
|
first = next;
|
||||||
|
|
||||||
/* truncate len if we find any trailing uptodate block(s) */
|
/* truncate len if we find any trailing uptodate block(s) */
|
||||||
while (++i <= last) {
|
if (++next <= last) {
|
||||||
if (ifs_block_is_uptodate(ifs, i)) {
|
next = ifs_next_uptodate_block(folio, next, last);
|
||||||
plen -= (last - i + 1) * block_size;
|
if (next <= last) {
|
||||||
last = i - 1;
|
plen -= iomap_bytes_to_truncate(*pos + plen,
|
||||||
break;
|
block_bits, last - next + 1);
|
||||||
|
last = next - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -271,7 +331,8 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
|
||||||
unsigned end = offset_in_folio(folio, isize - 1) >> block_bits;
|
unsigned end = offset_in_folio(folio, isize - 1) >> block_bits;
|
||||||
|
|
||||||
if (first <= end && last > end)
|
if (first <= end && last > end)
|
||||||
plen -= (last - end) * block_size;
|
plen -= iomap_bytes_to_truncate(*pos + plen, block_bits,
|
||||||
|
last - end);
|
||||||
}
|
}
|
||||||
|
|
||||||
*offp = poff;
|
*offp = poff;
|
||||||
|
|
@ -320,9 +381,8 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BLOCK
|
void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
|
||||||
static void iomap_finish_folio_read(struct folio *folio, size_t off,
|
int error)
|
||||||
size_t len, int error)
|
|
||||||
{
|
{
|
||||||
struct iomap_folio_state *ifs = folio->private;
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
bool uptodate = !error;
|
bool uptodate = !error;
|
||||||
|
|
@ -342,169 +402,201 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off,
|
||||||
if (finished)
|
if (finished)
|
||||||
folio_end_read(folio, uptodate);
|
folio_end_read(folio, uptodate);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(iomap_finish_folio_read);
|
||||||
|
|
||||||
static void iomap_read_end_io(struct bio *bio)
|
static void iomap_read_init(struct folio *folio)
|
||||||
{
|
{
|
||||||
int error = blk_status_to_errno(bio->bi_status);
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
struct folio_iter fi;
|
|
||||||
|
|
||||||
bio_for_each_folio_all(fi, bio)
|
if (ifs) {
|
||||||
iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error);
|
size_t len = folio_size(folio);
|
||||||
bio_put(bio);
|
|
||||||
|
/*
|
||||||
|
* ifs->read_bytes_pending is used to track how many bytes are
|
||||||
|
* read in asynchronously by the IO helper. We need to track
|
||||||
|
* this so that we can know when the IO helper has finished
|
||||||
|
* reading in all the necessary ranges of the folio and can end
|
||||||
|
* the read.
|
||||||
|
*
|
||||||
|
* Increase ->read_bytes_pending by the folio size to start, and
|
||||||
|
* add a +1 bias. We'll subtract the bias and any uptodate /
|
||||||
|
* zeroed ranges that did not require IO in iomap_read_end()
|
||||||
|
* after we're done processing the folio.
|
||||||
|
*
|
||||||
|
* We do this because otherwise, we would have to increment
|
||||||
|
* ifs->read_bytes_pending every time a range in the folio needs
|
||||||
|
* to be read in, which can get expensive since the spinlock
|
||||||
|
* needs to be held whenever modifying ifs->read_bytes_pending.
|
||||||
|
*
|
||||||
|
* We add the bias to ensure the read has not been ended on the
|
||||||
|
* folio when iomap_read_end() is called, even if the IO helper
|
||||||
|
* has already finished reading in the entire folio.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&ifs->state_lock);
|
||||||
|
WARN_ON_ONCE(ifs->read_bytes_pending != 0);
|
||||||
|
ifs->read_bytes_pending = len + 1;
|
||||||
|
spin_unlock_irq(&ifs->state_lock);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct iomap_readpage_ctx {
|
/*
|
||||||
struct folio *cur_folio;
|
* This ends IO if no bytes were submitted to an IO helper.
|
||||||
bool cur_folio_in_bio;
|
*
|
||||||
struct bio *bio;
|
* Otherwise, this calibrates ifs->read_bytes_pending to represent only the
|
||||||
struct readahead_control *rac;
|
* submitted bytes (see comment in iomap_read_init()). If all bytes submitted
|
||||||
};
|
* have already been completed by the IO helper, then this will end the read.
|
||||||
|
* Else the IO helper will end the read after all submitted ranges have been
|
||||||
|
* read.
|
||||||
|
*/
|
||||||
|
static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
|
||||||
|
{
|
||||||
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
|
|
||||||
static int iomap_readpage_iter(struct iomap_iter *iter,
|
if (ifs) {
|
||||||
struct iomap_readpage_ctx *ctx)
|
bool end_read, uptodate;
|
||||||
|
|
||||||
|
spin_lock_irq(&ifs->state_lock);
|
||||||
|
if (!ifs->read_bytes_pending) {
|
||||||
|
WARN_ON_ONCE(bytes_submitted);
|
||||||
|
spin_unlock_irq(&ifs->state_lock);
|
||||||
|
folio_unlock(folio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Subtract any bytes that were initially accounted to
|
||||||
|
* read_bytes_pending but skipped for IO. The +1 accounts for
|
||||||
|
* the bias we added in iomap_read_init().
|
||||||
|
*/
|
||||||
|
ifs->read_bytes_pending -=
|
||||||
|
(folio_size(folio) + 1 - bytes_submitted);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If !ifs->read_bytes_pending, this means all pending reads by
|
||||||
|
* the IO helper have already completed, which means we need to
|
||||||
|
* end the folio read here. If ifs->read_bytes_pending != 0,
|
||||||
|
* the IO helper will end the folio read.
|
||||||
|
*/
|
||||||
|
end_read = !ifs->read_bytes_pending;
|
||||||
|
if (end_read)
|
||||||
|
uptodate = ifs_is_fully_uptodate(folio, ifs);
|
||||||
|
spin_unlock_irq(&ifs->state_lock);
|
||||||
|
if (end_read)
|
||||||
|
folio_end_read(folio, uptodate);
|
||||||
|
} else if (!bytes_submitted) {
|
||||||
|
/*
|
||||||
|
* If there were no bytes submitted, this means we are
|
||||||
|
* responsible for unlocking the folio here, since no IO helper
|
||||||
|
* has taken ownership of it. If there were bytes submitted,
|
||||||
|
* then the IO helper will end the read via
|
||||||
|
* iomap_finish_folio_read().
|
||||||
|
*/
|
||||||
|
folio_unlock(folio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int iomap_read_folio_iter(struct iomap_iter *iter,
|
||||||
|
struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted)
|
||||||
{
|
{
|
||||||
const struct iomap *iomap = &iter->iomap;
|
const struct iomap *iomap = &iter->iomap;
|
||||||
loff_t pos = iter->pos;
|
loff_t pos = iter->pos;
|
||||||
loff_t length = iomap_length(iter);
|
loff_t length = iomap_length(iter);
|
||||||
struct folio *folio = ctx->cur_folio;
|
struct folio *folio = ctx->cur_folio;
|
||||||
struct iomap_folio_state *ifs;
|
|
||||||
size_t poff, plen;
|
size_t poff, plen;
|
||||||
sector_t sector;
|
loff_t pos_diff;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (iomap->type == IOMAP_INLINE) {
|
if (iomap->type == IOMAP_INLINE) {
|
||||||
ret = iomap_read_inline_data(iter, folio);
|
ret = iomap_read_inline_data(iter, folio);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* zero post-eof blocks as the page may be mapped */
|
ifs_alloc(iter->inode, folio, iter->flags);
|
||||||
ifs = ifs_alloc(iter->inode, folio, iter->flags);
|
|
||||||
iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen);
|
|
||||||
if (plen == 0)
|
|
||||||
goto done;
|
|
||||||
|
|
||||||
if (iomap_block_needs_zeroing(iter, pos)) {
|
length = min_t(loff_t, length,
|
||||||
folio_zero_range(folio, poff, plen);
|
folio_size(folio) - offset_in_folio(folio, pos));
|
||||||
iomap_set_range_uptodate(folio, poff, plen);
|
while (length) {
|
||||||
goto done;
|
iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff,
|
||||||
}
|
&plen);
|
||||||
|
|
||||||
ctx->cur_folio_in_bio = true;
|
pos_diff = pos - iter->pos;
|
||||||
if (ifs) {
|
if (WARN_ON_ONCE(pos_diff + plen > length))
|
||||||
spin_lock_irq(&ifs->state_lock);
|
return -EIO;
|
||||||
ifs->read_bytes_pending += plen;
|
|
||||||
spin_unlock_irq(&ifs->state_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
sector = iomap_sector(iomap, pos);
|
ret = iomap_iter_advance(iter, pos_diff);
|
||||||
if (!ctx->bio ||
|
|
||||||
bio_end_sector(ctx->bio) != sector ||
|
|
||||||
!bio_add_folio(ctx->bio, folio, plen, poff)) {
|
|
||||||
gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
|
|
||||||
gfp_t orig_gfp = gfp;
|
|
||||||
unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
|
|
||||||
|
|
||||||
if (ctx->bio)
|
|
||||||
submit_bio(ctx->bio);
|
|
||||||
|
|
||||||
if (ctx->rac) /* same as readahead_gfp_mask */
|
|
||||||
gfp |= __GFP_NORETRY | __GFP_NOWARN;
|
|
||||||
ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs),
|
|
||||||
REQ_OP_READ, gfp);
|
|
||||||
/*
|
|
||||||
* If the bio_alloc fails, try it again for a single page to
|
|
||||||
* avoid having to deal with partial page reads. This emulates
|
|
||||||
* what do_mpage_read_folio does.
|
|
||||||
*/
|
|
||||||
if (!ctx->bio) {
|
|
||||||
ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ,
|
|
||||||
orig_gfp);
|
|
||||||
}
|
|
||||||
if (ctx->rac)
|
|
||||||
ctx->bio->bi_opf |= REQ_RAHEAD;
|
|
||||||
ctx->bio->bi_iter.bi_sector = sector;
|
|
||||||
ctx->bio->bi_end_io = iomap_read_end_io;
|
|
||||||
bio_add_folio_nofail(ctx->bio, folio, plen, poff);
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
|
||||||
/*
|
|
||||||
* Move the caller beyond our range so that it keeps making progress.
|
|
||||||
* For that, we have to include any leading non-uptodate ranges, but
|
|
||||||
* we can skip trailing ones as they will be handled in the next
|
|
||||||
* iteration.
|
|
||||||
*/
|
|
||||||
length = pos - iter->pos + plen;
|
|
||||||
return iomap_iter_advance(iter, &length);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int iomap_read_folio_iter(struct iomap_iter *iter,
|
|
||||||
struct iomap_readpage_ctx *ctx)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
while (iomap_length(iter)) {
|
|
||||||
ret = iomap_readpage_iter(iter, ctx);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
|
||||||
|
|
||||||
|
if (plen == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* zero post-eof blocks as the page may be mapped */
|
||||||
|
if (iomap_block_needs_zeroing(iter, pos)) {
|
||||||
|
folio_zero_range(folio, poff, plen);
|
||||||
|
iomap_set_range_uptodate(folio, poff, plen);
|
||||||
|
} else {
|
||||||
|
if (!*bytes_submitted)
|
||||||
|
iomap_read_init(folio);
|
||||||
|
ret = ctx->ops->read_folio_range(iter, ctx, plen);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
*bytes_submitted += plen;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = iomap_iter_advance(iter, plen);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
length -= pos_diff + plen;
|
||||||
|
pos = iter->pos;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
|
void iomap_read_folio(const struct iomap_ops *ops,
|
||||||
|
struct iomap_read_folio_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
struct folio *folio = ctx->cur_folio;
|
||||||
struct iomap_iter iter = {
|
struct iomap_iter iter = {
|
||||||
.inode = folio->mapping->host,
|
.inode = folio->mapping->host,
|
||||||
.pos = folio_pos(folio),
|
.pos = folio_pos(folio),
|
||||||
.len = folio_size(folio),
|
.len = folio_size(folio),
|
||||||
};
|
};
|
||||||
struct iomap_readpage_ctx ctx = {
|
size_t bytes_submitted = 0;
|
||||||
.cur_folio = folio,
|
|
||||||
};
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
trace_iomap_readpage(iter.inode, 1);
|
trace_iomap_readpage(iter.inode, 1);
|
||||||
|
|
||||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||||
iter.status = iomap_read_folio_iter(&iter, &ctx);
|
iter.status = iomap_read_folio_iter(&iter, ctx,
|
||||||
|
&bytes_submitted);
|
||||||
|
|
||||||
if (ctx.bio) {
|
if (ctx->ops->submit_read)
|
||||||
submit_bio(ctx.bio);
|
ctx->ops->submit_read(ctx);
|
||||||
WARN_ON_ONCE(!ctx.cur_folio_in_bio);
|
|
||||||
} else {
|
|
||||||
WARN_ON_ONCE(ctx.cur_folio_in_bio);
|
|
||||||
folio_unlock(folio);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
iomap_read_end(folio, bytes_submitted);
|
||||||
* Just like mpage_readahead and block_read_full_folio, we always
|
|
||||||
* return 0 and just set the folio error flag on errors. This
|
|
||||||
* should be cleaned up throughout the stack eventually.
|
|
||||||
*/
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_read_folio);
|
EXPORT_SYMBOL_GPL(iomap_read_folio);
|
||||||
|
|
||||||
static int iomap_readahead_iter(struct iomap_iter *iter,
|
static int iomap_readahead_iter(struct iomap_iter *iter,
|
||||||
struct iomap_readpage_ctx *ctx)
|
struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_submitted)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
while (iomap_length(iter)) {
|
while (iomap_length(iter)) {
|
||||||
if (ctx->cur_folio &&
|
if (ctx->cur_folio &&
|
||||||
offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
|
offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
|
||||||
if (!ctx->cur_folio_in_bio)
|
iomap_read_end(ctx->cur_folio, *cur_bytes_submitted);
|
||||||
folio_unlock(ctx->cur_folio);
|
|
||||||
ctx->cur_folio = NULL;
|
ctx->cur_folio = NULL;
|
||||||
}
|
}
|
||||||
if (!ctx->cur_folio) {
|
if (!ctx->cur_folio) {
|
||||||
ctx->cur_folio = readahead_folio(ctx->rac);
|
ctx->cur_folio = readahead_folio(ctx->rac);
|
||||||
ctx->cur_folio_in_bio = false;
|
if (WARN_ON_ONCE(!ctx->cur_folio))
|
||||||
|
return -EINVAL;
|
||||||
|
*cur_bytes_submitted = 0;
|
||||||
}
|
}
|
||||||
ret = iomap_readpage_iter(iter, ctx);
|
ret = iomap_read_folio_iter(iter, ctx, cur_bytes_submitted);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
@ -514,8 +606,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* iomap_readahead - Attempt to read pages from a file.
|
* iomap_readahead - Attempt to read pages from a file.
|
||||||
* @rac: Describes the pages to be read.
|
|
||||||
* @ops: The operations vector for the filesystem.
|
* @ops: The operations vector for the filesystem.
|
||||||
|
* @ctx: The ctx used for issuing readahead.
|
||||||
*
|
*
|
||||||
* This function is for filesystems to call to implement their readahead
|
* This function is for filesystems to call to implement their readahead
|
||||||
* address_space operation.
|
* address_space operation.
|
||||||
|
|
@ -527,52 +619,31 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
|
||||||
* function is called with memalloc_nofs set, so allocations will not cause
|
* function is called with memalloc_nofs set, so allocations will not cause
|
||||||
* the filesystem to be reentered.
|
* the filesystem to be reentered.
|
||||||
*/
|
*/
|
||||||
void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
|
void iomap_readahead(const struct iomap_ops *ops,
|
||||||
|
struct iomap_read_folio_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
struct readahead_control *rac = ctx->rac;
|
||||||
struct iomap_iter iter = {
|
struct iomap_iter iter = {
|
||||||
.inode = rac->mapping->host,
|
.inode = rac->mapping->host,
|
||||||
.pos = readahead_pos(rac),
|
.pos = readahead_pos(rac),
|
||||||
.len = readahead_length(rac),
|
.len = readahead_length(rac),
|
||||||
};
|
};
|
||||||
struct iomap_readpage_ctx ctx = {
|
size_t cur_bytes_submitted;
|
||||||
.rac = rac,
|
|
||||||
};
|
|
||||||
|
|
||||||
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
|
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
|
||||||
|
|
||||||
while (iomap_iter(&iter, ops) > 0)
|
while (iomap_iter(&iter, ops) > 0)
|
||||||
iter.status = iomap_readahead_iter(&iter, &ctx);
|
iter.status = iomap_readahead_iter(&iter, ctx,
|
||||||
|
&cur_bytes_submitted);
|
||||||
|
|
||||||
if (ctx.bio)
|
if (ctx->ops->submit_read)
|
||||||
submit_bio(ctx.bio);
|
ctx->ops->submit_read(ctx);
|
||||||
if (ctx.cur_folio) {
|
|
||||||
if (!ctx.cur_folio_in_bio)
|
if (ctx->cur_folio)
|
||||||
folio_unlock(ctx.cur_folio);
|
iomap_read_end(ctx->cur_folio, cur_bytes_submitted);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_readahead);
|
EXPORT_SYMBOL_GPL(iomap_readahead);
|
||||||
|
|
||||||
static int iomap_read_folio_range(const struct iomap_iter *iter,
|
|
||||||
struct folio *folio, loff_t pos, size_t len)
|
|
||||||
{
|
|
||||||
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
|
||||||
struct bio_vec bvec;
|
|
||||||
struct bio bio;
|
|
||||||
|
|
||||||
bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ);
|
|
||||||
bio.bi_iter.bi_sector = iomap_sector(srcmap, pos);
|
|
||||||
bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos));
|
|
||||||
return submit_bio_wait(&bio);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static int iomap_read_folio_range(const struct iomap_iter *iter,
|
|
||||||
struct folio *folio, loff_t pos, size_t len)
|
|
||||||
{
|
|
||||||
WARN_ON_ONCE(1);
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_BLOCK */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* iomap_is_partially_uptodate checks whether blocks within a folio are
|
* iomap_is_partially_uptodate checks whether blocks within a folio are
|
||||||
* uptodate or not.
|
* uptodate or not.
|
||||||
|
|
@ -584,7 +655,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
|
||||||
{
|
{
|
||||||
struct iomap_folio_state *ifs = folio->private;
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
struct inode *inode = folio->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
unsigned first, last, i;
|
unsigned first, last;
|
||||||
|
|
||||||
if (!ifs)
|
if (!ifs)
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -596,10 +667,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
|
||||||
first = from >> inode->i_blkbits;
|
first = from >> inode->i_blkbits;
|
||||||
last = (from + count - 1) >> inode->i_blkbits;
|
last = (from + count - 1) >> inode->i_blkbits;
|
||||||
|
|
||||||
for (i = first; i <= last; i++)
|
return ifs_next_nonuptodate_block(folio, first, last) > last;
|
||||||
if (!ifs_block_is_uptodate(ifs, i))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
|
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
|
||||||
|
|
||||||
|
|
@ -723,9 +791,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
|
||||||
if (plen == 0)
|
if (plen == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!(iter->flags & IOMAP_UNSHARE) &&
|
/*
|
||||||
(from <= poff || from >= poff + plen) &&
|
* If the read range will be entirely overwritten by the write,
|
||||||
(to <= poff || to >= poff + plen))
|
* we can skip having to zero/read it in.
|
||||||
|
*/
|
||||||
|
if (!(iter->flags & IOMAP_UNSHARE) && from <= poff &&
|
||||||
|
to >= poff + plen)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (iomap_block_needs_zeroing(iter, block_start)) {
|
if (iomap_block_needs_zeroing(iter, block_start)) {
|
||||||
|
|
@ -742,7 +813,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
|
||||||
status = write_ops->read_folio_range(iter,
|
status = write_ops->read_folio_range(iter,
|
||||||
folio, block_start, plen);
|
folio, block_start, plen);
|
||||||
else
|
else
|
||||||
status = iomap_read_folio_range(iter,
|
status = iomap_bio_read_folio_range_sync(iter,
|
||||||
folio, block_start, plen);
|
folio, block_start, plen);
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
|
|
@ -761,6 +832,28 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter,
|
||||||
if (!mapping_large_folio_support(iter->inode->i_mapping))
|
if (!mapping_large_folio_support(iter->inode->i_mapping))
|
||||||
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
|
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
|
||||||
|
|
||||||
|
if (iter->fbatch) {
|
||||||
|
struct folio *folio = folio_batch_next(iter->fbatch);
|
||||||
|
|
||||||
|
if (!folio)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The folio mapping generally shouldn't have changed based on
|
||||||
|
* fs locks, but be consistent with filemap lookup and retry
|
||||||
|
* the iter if it does.
|
||||||
|
*/
|
||||||
|
folio_lock(folio);
|
||||||
|
if (unlikely(folio->mapping != iter->inode->i_mapping)) {
|
||||||
|
iter->iomap.flags |= IOMAP_F_STALE;
|
||||||
|
folio_unlock(folio);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
folio_get(folio);
|
||||||
|
return folio;
|
||||||
|
}
|
||||||
|
|
||||||
if (write_ops && write_ops->get_folio)
|
if (write_ops && write_ops->get_folio)
|
||||||
return write_ops->get_folio(iter, pos, len);
|
return write_ops->get_folio(iter, pos, len);
|
||||||
return iomap_get_folio(iter, pos, len);
|
return iomap_get_folio(iter, pos, len);
|
||||||
|
|
@ -815,15 +908,14 @@ static int iomap_write_begin(struct iomap_iter *iter,
|
||||||
size_t *poffset, u64 *plen)
|
size_t *poffset, u64 *plen)
|
||||||
{
|
{
|
||||||
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||||
loff_t pos = iter->pos;
|
loff_t pos;
|
||||||
u64 len = min_t(u64, SIZE_MAX, iomap_length(iter));
|
u64 len = min_t(u64, SIZE_MAX, iomap_length(iter));
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
int status = 0;
|
int status = 0;
|
||||||
|
|
||||||
len = min_not_zero(len, *plen);
|
len = min_not_zero(len, *plen);
|
||||||
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
|
*foliop = NULL;
|
||||||
if (srcmap != &iter->iomap)
|
*plen = 0;
|
||||||
BUG_ON(pos + len > srcmap->offset + srcmap->length);
|
|
||||||
|
|
||||||
if (fatal_signal_pending(current))
|
if (fatal_signal_pending(current))
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
|
|
@ -832,6 +924,15 @@ static int iomap_write_begin(struct iomap_iter *iter,
|
||||||
if (IS_ERR(folio))
|
if (IS_ERR(folio))
|
||||||
return PTR_ERR(folio);
|
return PTR_ERR(folio);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No folio means we're done with a batch. We still have range to
|
||||||
|
* process so return and let the caller iterate and refill the batch.
|
||||||
|
*/
|
||||||
|
if (!folio) {
|
||||||
|
WARN_ON_ONCE(!iter->fbatch);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we have a locked folio, before we do anything with it we need to
|
* Now we have a locked folio, before we do anything with it we need to
|
||||||
* check that the iomap we have cached is not stale. The inode extent
|
* check that the iomap we have cached is not stale. The inode extent
|
||||||
|
|
@ -852,6 +953,22 @@ static int iomap_write_begin(struct iomap_iter *iter,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The folios in a batch may not be contiguous. If we've skipped
|
||||||
|
* forward, advance the iter to the pos of the current folio. If the
|
||||||
|
* folio starts beyond the end of the mapping, it may have been trimmed
|
||||||
|
* since the lookup for whatever reason. Return a NULL folio to
|
||||||
|
* terminate the op.
|
||||||
|
*/
|
||||||
|
if (folio_pos(folio) > iter->pos) {
|
||||||
|
len = min_t(u64, folio_pos(folio) - iter->pos,
|
||||||
|
iomap_length(iter));
|
||||||
|
status = iomap_iter_advance(iter, len);
|
||||||
|
len = iomap_length(iter);
|
||||||
|
if (status || !len)
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
pos = iomap_trim_folio_range(iter, folio, poffset, &len);
|
pos = iomap_trim_folio_range(iter, folio, poffset, &len);
|
||||||
|
|
||||||
if (srcmap->type == IOMAP_INLINE)
|
if (srcmap->type == IOMAP_INLINE)
|
||||||
|
|
@ -1041,7 +1158,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
total_written += written;
|
total_written += written;
|
||||||
iomap_iter_advance(iter, &written);
|
iomap_iter_advance(iter, written);
|
||||||
}
|
}
|
||||||
} while (iov_iter_count(i) && iomap_length(iter));
|
} while (iov_iter_count(i) && iomap_length(iter));
|
||||||
|
|
||||||
|
|
@ -1082,7 +1199,7 @@ static void iomap_write_delalloc_ifs_punch(struct inode *inode,
|
||||||
struct folio *folio, loff_t start_byte, loff_t end_byte,
|
struct folio *folio, loff_t start_byte, loff_t end_byte,
|
||||||
struct iomap *iomap, iomap_punch_t punch)
|
struct iomap *iomap, iomap_punch_t punch)
|
||||||
{
|
{
|
||||||
unsigned int first_blk, last_blk, i;
|
unsigned int first_blk, last_blk;
|
||||||
loff_t last_byte;
|
loff_t last_byte;
|
||||||
u8 blkbits = inode->i_blkbits;
|
u8 blkbits = inode->i_blkbits;
|
||||||
struct iomap_folio_state *ifs;
|
struct iomap_folio_state *ifs;
|
||||||
|
|
@ -1101,10 +1218,11 @@ static void iomap_write_delalloc_ifs_punch(struct inode *inode,
|
||||||
folio_pos(folio) + folio_size(folio) - 1);
|
folio_pos(folio) + folio_size(folio) - 1);
|
||||||
first_blk = offset_in_folio(folio, start_byte) >> blkbits;
|
first_blk = offset_in_folio(folio, start_byte) >> blkbits;
|
||||||
last_blk = offset_in_folio(folio, last_byte) >> blkbits;
|
last_blk = offset_in_folio(folio, last_byte) >> blkbits;
|
||||||
for (i = first_blk; i <= last_blk; i++) {
|
while ((first_blk = ifs_next_clean_block(folio, first_blk, last_blk))
|
||||||
if (!ifs_block_is_dirty(folio, ifs, i))
|
<= last_blk) {
|
||||||
punch(inode, folio_pos(folio) + (i << blkbits),
|
punch(inode, folio_pos(folio) + (first_blk << blkbits),
|
||||||
1 << blkbits, iomap);
|
1 << blkbits, iomap);
|
||||||
|
first_blk++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1310,7 +1428,7 @@ static int iomap_unshare_iter(struct iomap_iter *iter,
|
||||||
int status;
|
int status;
|
||||||
|
|
||||||
if (!iomap_want_unshare_iter(iter))
|
if (!iomap_want_unshare_iter(iter))
|
||||||
return iomap_iter_advance(iter, &bytes);
|
return iomap_iter_advance(iter, bytes);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
|
|
@ -1334,10 +1452,10 @@ static int iomap_unshare_iter(struct iomap_iter *iter,
|
||||||
|
|
||||||
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
|
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
|
||||||
|
|
||||||
status = iomap_iter_advance(iter, &bytes);
|
status = iomap_iter_advance(iter, bytes);
|
||||||
if (status)
|
if (status)
|
||||||
break;
|
break;
|
||||||
} while (bytes > 0);
|
} while ((bytes = iomap_length(iter)) > 0);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
@ -1398,6 +1516,12 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
|
||||||
if (iter->iomap.flags & IOMAP_F_STALE)
|
if (iter->iomap.flags & IOMAP_F_STALE)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* a NULL folio means we're done with a folio batch */
|
||||||
|
if (!folio) {
|
||||||
|
status = iomap_iter_advance_full(iter);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* warn about zeroing folios beyond eof that won't write back */
|
/* warn about zeroing folios beyond eof that won't write back */
|
||||||
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
|
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
|
||||||
|
|
||||||
|
|
@ -1412,16 +1536,36 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
|
||||||
if (WARN_ON_ONCE(!ret))
|
if (WARN_ON_ONCE(!ret))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
status = iomap_iter_advance(iter, &bytes);
|
status = iomap_iter_advance(iter, bytes);
|
||||||
if (status)
|
if (status)
|
||||||
break;
|
break;
|
||||||
} while (bytes > 0);
|
} while ((bytes = iomap_length(iter)) > 0);
|
||||||
|
|
||||||
if (did_zero)
|
if (did_zero)
|
||||||
*did_zero = true;
|
*did_zero = true;
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
loff_t
|
||||||
|
iomap_fill_dirty_folios(
|
||||||
|
struct iomap_iter *iter,
|
||||||
|
loff_t offset,
|
||||||
|
loff_t length)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = iter->inode->i_mapping;
|
||||||
|
pgoff_t start = offset >> PAGE_SHIFT;
|
||||||
|
pgoff_t end = (offset + length - 1) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
iter->fbatch = kmalloc(sizeof(struct folio_batch), GFP_KERNEL);
|
||||||
|
if (!iter->fbatch)
|
||||||
|
return offset + length;
|
||||||
|
folio_batch_init(iter->fbatch);
|
||||||
|
|
||||||
|
filemap_get_folios_dirty(mapping, &start, end, iter->fbatch);
|
||||||
|
return (start << PAGE_SHIFT);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios);
|
||||||
|
|
||||||
int
|
int
|
||||||
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||||
const struct iomap_ops *ops,
|
const struct iomap_ops *ops,
|
||||||
|
|
@ -1435,46 +1579,26 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||||
.private = private,
|
.private = private,
|
||||||
};
|
};
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
unsigned int blocksize = i_blocksize(inode);
|
|
||||||
unsigned int off = pos & (blocksize - 1);
|
|
||||||
loff_t plen = min_t(loff_t, len, blocksize - off);
|
|
||||||
int ret;
|
int ret;
|
||||||
bool range_dirty;
|
bool range_dirty;
|
||||||
|
|
||||||
/*
|
|
||||||
* Zero range can skip mappings that are zero on disk so long as
|
|
||||||
* pagecache is clean. If pagecache was dirty prior to zero range, the
|
|
||||||
* mapping converts on writeback completion and so must be zeroed.
|
|
||||||
*
|
|
||||||
* The simplest way to deal with this across a range is to flush
|
|
||||||
* pagecache and process the updated mappings. To avoid excessive
|
|
||||||
* flushing on partial eof zeroing, special case it to zero the
|
|
||||||
* unaligned start portion if already dirty in pagecache.
|
|
||||||
*/
|
|
||||||
if (off &&
|
|
||||||
filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
|
|
||||||
iter.len = plen;
|
|
||||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
|
||||||
iter.status = iomap_zero_iter(&iter, did_zero,
|
|
||||||
write_ops);
|
|
||||||
|
|
||||||
iter.len = len - (iter.pos - pos);
|
|
||||||
if (ret || !iter.len)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To avoid an unconditional flush, check pagecache state and only flush
|
* To avoid an unconditional flush, check pagecache state and only flush
|
||||||
* if dirty and the fs returns a mapping that might convert on
|
* if dirty and the fs returns a mapping that might convert on
|
||||||
* writeback.
|
* writeback.
|
||||||
*/
|
*/
|
||||||
range_dirty = filemap_range_needs_writeback(inode->i_mapping,
|
range_dirty = filemap_range_needs_writeback(mapping, iter.pos,
|
||||||
iter.pos, iter.pos + iter.len - 1);
|
iter.pos + iter.len - 1);
|
||||||
while ((ret = iomap_iter(&iter, ops)) > 0) {
|
while ((ret = iomap_iter(&iter, ops)) > 0) {
|
||||||
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
|
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
|
||||||
|
|
||||||
if (srcmap->type == IOMAP_HOLE ||
|
if (WARN_ON_ONCE(iter.fbatch &&
|
||||||
srcmap->type == IOMAP_UNWRITTEN) {
|
srcmap->type != IOMAP_UNWRITTEN))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
if (!iter.fbatch &&
|
||||||
|
(srcmap->type == IOMAP_HOLE ||
|
||||||
|
srcmap->type == IOMAP_UNWRITTEN)) {
|
||||||
s64 status;
|
s64 status;
|
||||||
|
|
||||||
if (range_dirty) {
|
if (range_dirty) {
|
||||||
|
|
@ -1526,7 +1650,7 @@ static int iomap_folio_mkwrite_iter(struct iomap_iter *iter,
|
||||||
folio_mark_dirty(folio);
|
folio_mark_dirty(folio);
|
||||||
}
|
}
|
||||||
|
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
||||||
|
|
@ -1559,16 +1683,25 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
|
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
|
||||||
|
|
||||||
void iomap_start_folio_write(struct inode *inode, struct folio *folio,
|
static void iomap_writeback_init(struct inode *inode, struct folio *folio)
|
||||||
size_t len)
|
|
||||||
{
|
{
|
||||||
struct iomap_folio_state *ifs = folio->private;
|
struct iomap_folio_state *ifs = folio->private;
|
||||||
|
|
||||||
WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
|
WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
|
||||||
if (ifs)
|
if (ifs) {
|
||||||
atomic_add(len, &ifs->write_bytes_pending);
|
WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
|
||||||
|
/*
|
||||||
|
* Set this to the folio size. After processing the folio for
|
||||||
|
* writeback in iomap_writeback_folio(), we'll subtract any
|
||||||
|
* ranges not written back.
|
||||||
|
*
|
||||||
|
* We do this because otherwise, we would have to atomically
|
||||||
|
* increment ifs->write_bytes_pending every time a range in the
|
||||||
|
* folio needs to be written back.
|
||||||
|
*/
|
||||||
|
atomic_set(&ifs->write_bytes_pending, folio_size(folio));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_start_folio_write);
|
|
||||||
|
|
||||||
void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
|
void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
|
||||||
size_t len)
|
size_t len)
|
||||||
|
|
@ -1585,7 +1718,7 @@ EXPORT_SYMBOL_GPL(iomap_finish_folio_write);
|
||||||
|
|
||||||
static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
|
static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
|
||||||
struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
|
struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
|
||||||
bool *wb_pending)
|
size_t *bytes_submitted)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
@ -1599,11 +1732,11 @@ static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
|
||||||
pos += ret;
|
pos += ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Holes are not be written back by ->writeback_range, so track
|
* Holes are not written back by ->writeback_range, so track
|
||||||
* if we did handle anything that is not a hole here.
|
* if we did handle anything that is not a hole here.
|
||||||
*/
|
*/
|
||||||
if (wpc->iomap.type != IOMAP_HOLE)
|
if (wpc->iomap.type != IOMAP_HOLE)
|
||||||
*wb_pending = true;
|
*bytes_submitted += ret;
|
||||||
} while (rlen);
|
} while (rlen);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -1674,7 +1807,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
|
||||||
u64 pos = folio_pos(folio);
|
u64 pos = folio_pos(folio);
|
||||||
u64 end_pos = pos + folio_size(folio);
|
u64 end_pos = pos + folio_size(folio);
|
||||||
u64 end_aligned = 0;
|
u64 end_aligned = 0;
|
||||||
bool wb_pending = false;
|
size_t bytes_submitted = 0;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
u32 rlen;
|
u32 rlen;
|
||||||
|
|
||||||
|
|
@ -1694,14 +1827,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
|
||||||
iomap_set_range_dirty(folio, 0, end_pos - pos);
|
iomap_set_range_dirty(folio, 0, end_pos - pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
iomap_writeback_init(inode, folio);
|
||||||
* Keep the I/O completion handler from clearing the writeback
|
|
||||||
* bit until we have submitted all blocks by adding a bias to
|
|
||||||
* ifs->write_bytes_pending, which is dropped after submitting
|
|
||||||
* all blocks.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
|
|
||||||
iomap_start_folio_write(inode, folio, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1716,13 +1842,13 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
|
||||||
end_aligned = round_up(end_pos, i_blocksize(inode));
|
end_aligned = round_up(end_pos, i_blocksize(inode));
|
||||||
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
|
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
|
||||||
error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
|
error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
|
||||||
&wb_pending);
|
&bytes_submitted);
|
||||||
if (error)
|
if (error)
|
||||||
break;
|
break;
|
||||||
pos += rlen;
|
pos += rlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wb_pending)
|
if (bytes_submitted)
|
||||||
wpc->nr_folios++;
|
wpc->nr_folios++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1740,12 +1866,20 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
|
||||||
* bit ourselves right after unlocking the page.
|
* bit ourselves right after unlocking the page.
|
||||||
*/
|
*/
|
||||||
if (ifs) {
|
if (ifs) {
|
||||||
if (atomic_dec_and_test(&ifs->write_bytes_pending))
|
/*
|
||||||
folio_end_writeback(folio);
|
* Subtract any bytes that were initially accounted to
|
||||||
} else {
|
* write_bytes_pending but skipped for writeback.
|
||||||
if (!wb_pending)
|
*/
|
||||||
folio_end_writeback(folio);
|
size_t bytes_not_submitted = folio_size(folio) -
|
||||||
|
bytes_submitted;
|
||||||
|
|
||||||
|
if (bytes_not_submitted)
|
||||||
|
iomap_finish_folio_write(inode, folio,
|
||||||
|
bytes_not_submitted);
|
||||||
|
} else if (!bytes_submitted) {
|
||||||
|
folio_end_writeback(folio);
|
||||||
}
|
}
|
||||||
|
|
||||||
mapping_set_error(inode->i_mapping, error);
|
mapping_set_error(inode->i_mapping, error);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,8 @@
|
||||||
* Private flags for iomap_dio, must not overlap with the public ones in
|
* Private flags for iomap_dio, must not overlap with the public ones in
|
||||||
* iomap.h:
|
* iomap.h:
|
||||||
*/
|
*/
|
||||||
#define IOMAP_DIO_NO_INVALIDATE (1U << 25)
|
#define IOMAP_DIO_NO_INVALIDATE (1U << 26)
|
||||||
#define IOMAP_DIO_CALLER_COMP (1U << 26)
|
#define IOMAP_DIO_COMP_WORK (1U << 27)
|
||||||
#define IOMAP_DIO_INLINE_COMP (1U << 27)
|
|
||||||
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
|
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
|
||||||
#define IOMAP_DIO_NEED_SYNC (1U << 29)
|
#define IOMAP_DIO_NEED_SYNC (1U << 29)
|
||||||
#define IOMAP_DIO_WRITE (1U << 30)
|
#define IOMAP_DIO_WRITE (1U << 30)
|
||||||
|
|
@ -140,11 +139,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_dio_complete);
|
EXPORT_SYMBOL_GPL(iomap_dio_complete);
|
||||||
|
|
||||||
static ssize_t iomap_dio_deferred_complete(void *data)
|
|
||||||
{
|
|
||||||
return iomap_dio_complete(data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void iomap_dio_complete_work(struct work_struct *work)
|
static void iomap_dio_complete_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
|
struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
|
||||||
|
|
@ -179,33 +173,33 @@ static void iomap_dio_done(struct iomap_dio *dio)
|
||||||
|
|
||||||
WRITE_ONCE(dio->submit.waiter, NULL);
|
WRITE_ONCE(dio->submit.waiter, NULL);
|
||||||
blk_wake_io_task(waiter);
|
blk_wake_io_task(waiter);
|
||||||
} else if (dio->flags & IOMAP_DIO_INLINE_COMP) {
|
return;
|
||||||
WRITE_ONCE(iocb->private, NULL);
|
}
|
||||||
iomap_dio_complete_work(&dio->aio.work);
|
|
||||||
} else if (dio->flags & IOMAP_DIO_CALLER_COMP) {
|
|
||||||
/*
|
|
||||||
* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then
|
|
||||||
* schedule our completion that way to avoid an async punt to a
|
|
||||||
* workqueue.
|
|
||||||
*/
|
|
||||||
/* only polled IO cares about private cleared */
|
|
||||||
iocb->private = dio;
|
|
||||||
iocb->dio_complete = iomap_dio_deferred_complete;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Invoke ->ki_complete() directly. We've assigned our
|
* Always run error completions in user context. These are not
|
||||||
* dio_complete callback handler, and since the issuer set
|
* performance critical and some code relies on taking sleeping locks
|
||||||
* IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
|
* for error handling.
|
||||||
* notice ->dio_complete being set and will defer calling that
|
*/
|
||||||
* handler until it can be done from a safe task context.
|
if (dio->error)
|
||||||
*
|
dio->flags |= IOMAP_DIO_COMP_WORK;
|
||||||
* Note that the 'res' being passed in here is not important
|
|
||||||
* for this case. The actual completion value of the request
|
/*
|
||||||
* will be gotten from dio_complete when that is run by the
|
* Never invalidate pages from this context to avoid deadlocks with
|
||||||
* issuer.
|
* buffered I/O completions when called from the ioend workqueue,
|
||||||
*/
|
* or avoid sleeping when called directly from ->bi_end_io.
|
||||||
iocb->ki_complete(iocb, 0);
|
* Tough luck if you hit the tiny race with someone dirtying the range
|
||||||
} else {
|
* right between this check and the actual completion.
|
||||||
|
*/
|
||||||
|
if ((dio->flags & IOMAP_DIO_WRITE) &&
|
||||||
|
!(dio->flags & IOMAP_DIO_COMP_WORK)) {
|
||||||
|
if (dio->iocb->ki_filp->f_mapping->nrpages)
|
||||||
|
dio->flags |= IOMAP_DIO_COMP_WORK;
|
||||||
|
else
|
||||||
|
dio->flags |= IOMAP_DIO_NO_INVALIDATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dio->flags & IOMAP_DIO_COMP_WORK) {
|
||||||
struct inode *inode = file_inode(iocb->ki_filp);
|
struct inode *inode = file_inode(iocb->ki_filp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -216,7 +210,11 @@ static void iomap_dio_done(struct iomap_dio *dio)
|
||||||
*/
|
*/
|
||||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||||
queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
|
queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WRITE_ONCE(iocb->private, NULL);
|
||||||
|
iomap_dio_complete_work(&dio->aio.work);
|
||||||
}
|
}
|
||||||
|
|
||||||
void iomap_dio_bio_end_io(struct bio *bio)
|
void iomap_dio_bio_end_io(struct bio *bio)
|
||||||
|
|
@ -252,16 +250,9 @@ u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend)
|
||||||
/*
|
/*
|
||||||
* Try to avoid another context switch for the completion given
|
* Try to avoid another context switch for the completion given
|
||||||
* that we are already called from the ioend completion
|
* that we are already called from the ioend completion
|
||||||
* workqueue, but never invalidate pages from this thread to
|
* workqueue.
|
||||||
* avoid deadlocks with buffered I/O completions. Tough luck if
|
|
||||||
* you hit the tiny race with someone dirtying the range now
|
|
||||||
* between this check and the actual completion.
|
|
||||||
*/
|
*/
|
||||||
if (!dio->iocb->ki_filp->f_mapping->nrpages) {
|
dio->flags &= ~IOMAP_DIO_COMP_WORK;
|
||||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
|
||||||
dio->flags |= IOMAP_DIO_NO_INVALIDATE;
|
|
||||||
}
|
|
||||||
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
|
|
||||||
iomap_dio_done(dio);
|
iomap_dio_done(dio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -306,23 +297,6 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Use a FUA write if we need datasync semantics and this is a pure data I/O
|
|
||||||
* that doesn't require any metadata updates (including after I/O completion
|
|
||||||
* such as unwritten extent conversion) and the underlying device either
|
|
||||||
* doesn't have a volatile write cache or supports FUA.
|
|
||||||
* This allows us to avoid cache flushes on I/O completion.
|
|
||||||
*/
|
|
||||||
static inline bool iomap_dio_can_use_fua(const struct iomap *iomap,
|
|
||||||
struct iomap_dio *dio)
|
|
||||||
{
|
|
||||||
if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_DIRTY))
|
|
||||||
return false;
|
|
||||||
if (!(dio->flags & IOMAP_DIO_WRITE_THROUGH))
|
|
||||||
return false;
|
|
||||||
return !bdev_write_cache(iomap->bdev) || bdev_fua(iomap->bdev);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
{
|
{
|
||||||
const struct iomap *iomap = &iter->iomap;
|
const struct iomap *iomap = &iter->iomap;
|
||||||
|
|
@ -336,12 +310,39 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
int nr_pages, ret = 0;
|
int nr_pages, ret = 0;
|
||||||
u64 copied = 0;
|
u64 copied = 0;
|
||||||
size_t orig_count;
|
size_t orig_count;
|
||||||
|
unsigned int alignment;
|
||||||
|
|
||||||
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1))
|
/*
|
||||||
|
* File systems that write out of place and always allocate new blocks
|
||||||
|
* need each bio to be block aligned as that's the unit of allocation.
|
||||||
|
*/
|
||||||
|
if (dio->flags & IOMAP_DIO_FSBLOCK_ALIGNED)
|
||||||
|
alignment = fs_block_size;
|
||||||
|
else
|
||||||
|
alignment = bdev_logical_block_size(iomap->bdev);
|
||||||
|
|
||||||
|
if ((pos | length) & (alignment - 1))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (dio->flags & IOMAP_DIO_WRITE) {
|
if (dio->flags & IOMAP_DIO_WRITE) {
|
||||||
bio_opf |= REQ_OP_WRITE;
|
bool need_completion_work = true;
|
||||||
|
|
||||||
|
switch (iomap->type) {
|
||||||
|
case IOMAP_MAPPED:
|
||||||
|
/*
|
||||||
|
* Directly mapped I/O does not inherently need to do
|
||||||
|
* work at I/O completion time. But there are various
|
||||||
|
* cases below where this will get set again.
|
||||||
|
*/
|
||||||
|
need_completion_work = false;
|
||||||
|
break;
|
||||||
|
case IOMAP_UNWRITTEN:
|
||||||
|
dio->flags |= IOMAP_DIO_UNWRITTEN;
|
||||||
|
need_zeroout = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (iomap->flags & IOMAP_F_ATOMIC_BIO) {
|
if (iomap->flags & IOMAP_F_ATOMIC_BIO) {
|
||||||
/*
|
/*
|
||||||
|
|
@ -354,35 +355,54 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
bio_opf |= REQ_ATOMIC;
|
bio_opf |= REQ_ATOMIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iomap->type == IOMAP_UNWRITTEN) {
|
if (iomap->flags & IOMAP_F_SHARED) {
|
||||||
dio->flags |= IOMAP_DIO_UNWRITTEN;
|
/*
|
||||||
|
* Unsharing of needs to update metadata at I/O
|
||||||
|
* completion time.
|
||||||
|
*/
|
||||||
|
need_completion_work = true;
|
||||||
|
dio->flags |= IOMAP_DIO_COW;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iomap->flags & IOMAP_F_NEW) {
|
||||||
|
/*
|
||||||
|
* Newly allocated blocks might need recording in
|
||||||
|
* metadata at I/O completion time.
|
||||||
|
*/
|
||||||
|
need_completion_work = true;
|
||||||
need_zeroout = true;
|
need_zeroout = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iomap->flags & IOMAP_F_SHARED)
|
/*
|
||||||
dio->flags |= IOMAP_DIO_COW;
|
* Use a FUA write if we need datasync semantics and this is a
|
||||||
|
* pure overwrite that doesn't require any metadata updates.
|
||||||
if (iomap->flags & IOMAP_F_NEW)
|
*
|
||||||
need_zeroout = true;
|
* This allows us to avoid cache flushes on I/O completion.
|
||||||
else if (iomap->type == IOMAP_MAPPED &&
|
*/
|
||||||
iomap_dio_can_use_fua(iomap, dio))
|
if (dio->flags & IOMAP_DIO_WRITE_THROUGH) {
|
||||||
bio_opf |= REQ_FUA;
|
if (!need_completion_work &&
|
||||||
|
!(iomap->flags & IOMAP_F_DIRTY) &&
|
||||||
if (!(bio_opf & REQ_FUA))
|
(!bdev_write_cache(iomap->bdev) ||
|
||||||
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
|
bdev_fua(iomap->bdev)))
|
||||||
|
bio_opf |= REQ_FUA;
|
||||||
|
else
|
||||||
|
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can only do deferred completion for pure overwrites that
|
* We can only do inline completion for pure overwrites that
|
||||||
* don't require additional I/O at completion time.
|
* don't require additional I/O at completion time.
|
||||||
*
|
*
|
||||||
* This rules out writes that need zeroing or extent conversion,
|
* This rules out writes that need zeroing or metdata updates to
|
||||||
* extend the file size, or issue metadata I/O or cache flushes
|
* convert unwritten or shared extents.
|
||||||
* during completion processing.
|
*
|
||||||
|
* Writes that extend i_size are also not supported, but this is
|
||||||
|
* handled in __iomap_dio_rw().
|
||||||
*/
|
*/
|
||||||
if (need_zeroout || (pos >= i_size_read(inode)) ||
|
if (need_completion_work)
|
||||||
((dio->flags & IOMAP_DIO_NEED_SYNC) &&
|
dio->flags |= IOMAP_DIO_COMP_WORK;
|
||||||
!(bio_opf & REQ_FUA)))
|
|
||||||
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
|
bio_opf |= REQ_OP_WRITE;
|
||||||
} else {
|
} else {
|
||||||
bio_opf |= REQ_OP_READ;
|
bio_opf |= REQ_OP_READ;
|
||||||
}
|
}
|
||||||
|
|
@ -403,7 +423,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
* ones we set for inline and deferred completions. If none of those
|
* ones we set for inline and deferred completions. If none of those
|
||||||
* are available for this IO, clear the polled flag.
|
* are available for this IO, clear the polled flag.
|
||||||
*/
|
*/
|
||||||
if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP)))
|
if (dio->flags & IOMAP_DIO_COMP_WORK)
|
||||||
dio->iocb->ki_flags &= ~IOCB_HIPRI;
|
dio->iocb->ki_flags &= ~IOCB_HIPRI;
|
||||||
|
|
||||||
if (need_zeroout) {
|
if (need_zeroout) {
|
||||||
|
|
@ -434,7 +454,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
bio->bi_end_io = iomap_dio_bio_end_io;
|
bio->bi_end_io = iomap_dio_bio_end_io;
|
||||||
|
|
||||||
ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
|
ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
|
||||||
bdev_logical_block_size(iomap->bdev) - 1);
|
alignment - 1);
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
/*
|
/*
|
||||||
* We have to stop part way through an IO. We must fall
|
* We have to stop part way through an IO. We must fall
|
||||||
|
|
@ -496,7 +516,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
/* Undo iter limitation to current extent */
|
/* Undo iter limitation to current extent */
|
||||||
iov_iter_reexpand(dio->submit.iter, orig_count - copied);
|
iov_iter_reexpand(dio->submit.iter, orig_count - copied);
|
||||||
if (copied)
|
if (copied)
|
||||||
return iomap_iter_advance(iter, &copied);
|
return iomap_iter_advance(iter, copied);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -507,7 +527,7 @@ static int iomap_dio_hole_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
dio->size += length;
|
dio->size += length;
|
||||||
if (!length)
|
if (!length)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
|
static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
|
||||||
|
|
@ -542,7 +562,7 @@ static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
|
||||||
dio->size += copied;
|
dio->size += copied;
|
||||||
if (!copied)
|
if (!copied)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return iomap_iter_advance(iomi, &copied);
|
return iomap_iter_advance(iomi, copied);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int iomap_dio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
static int iomap_dio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||||
|
|
@ -639,10 +659,10 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||||
iomi.flags |= IOMAP_NOWAIT;
|
iomi.flags |= IOMAP_NOWAIT;
|
||||||
|
|
||||||
if (iov_iter_rw(iter) == READ) {
|
if (dio_flags & IOMAP_DIO_FSBLOCK_ALIGNED)
|
||||||
/* reads can always complete inline */
|
dio->flags |= IOMAP_DIO_FSBLOCK_ALIGNED;
|
||||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
|
||||||
|
|
||||||
|
if (iov_iter_rw(iter) == READ) {
|
||||||
if (iomi.pos >= dio->i_size)
|
if (iomi.pos >= dio->i_size)
|
||||||
goto out_free_dio;
|
goto out_free_dio;
|
||||||
|
|
||||||
|
|
@ -656,15 +676,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
iomi.flags |= IOMAP_WRITE;
|
iomi.flags |= IOMAP_WRITE;
|
||||||
dio->flags |= IOMAP_DIO_WRITE;
|
dio->flags |= IOMAP_DIO_WRITE;
|
||||||
|
|
||||||
/*
|
|
||||||
* Flag as supporting deferred completions, if the issuer
|
|
||||||
* groks it. This can avoid a workqueue punt for writes.
|
|
||||||
* We may later clear this flag if we need to do other IO
|
|
||||||
* as part of this IO completion.
|
|
||||||
*/
|
|
||||||
if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
|
|
||||||
dio->flags |= IOMAP_DIO_CALLER_COMP;
|
|
||||||
|
|
||||||
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
|
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
if (iomi.pos >= dio->i_size ||
|
if (iomi.pos >= dio->i_size ||
|
||||||
|
|
@ -693,6 +704,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
dio->flags |= IOMAP_DIO_WRITE_THROUGH;
|
dio->flags |= IOMAP_DIO_WRITE_THROUGH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* i_size updates must to happen from process context.
|
||||||
|
*/
|
||||||
|
if (iomi.pos + iomi.len > dio->i_size)
|
||||||
|
dio->flags |= IOMAP_DIO_COMP_WORK;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to invalidate cache pages for the range we are writing.
|
* Try to invalidate cache pages for the range we are writing.
|
||||||
* If this invalidation fails, let the caller fall back to
|
* If this invalidation fails, let the caller fall back to
|
||||||
|
|
@ -717,12 +734,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
}
|
}
|
||||||
goto out_free_dio;
|
goto out_free_dio;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!wait_for_completion && !inode->i_sb->s_dio_done_wq) {
|
if (!wait_for_completion && !inode->i_sb->s_dio_done_wq) {
|
||||||
ret = sb_init_dio_done_wq(inode->i_sb);
|
ret = sb_init_dio_done_wq(inode->i_sb);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out_free_dio;
|
goto out_free_dio;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inode_dio_begin(inode);
|
inode_dio_begin(inode);
|
||||||
|
|
@ -765,9 +782,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
* If all the writes we issued were already written through to the
|
* If all the writes we issued were already written through to the
|
||||||
* media, we don't need to flush the cache on IO completion. Clear the
|
* media, we don't need to flush the cache on IO completion. Clear the
|
||||||
* sync flag for this case.
|
* sync flag for this case.
|
||||||
|
*
|
||||||
|
* Otherwise clear the inline completion flag if any sync work is
|
||||||
|
* needed, as that needs to be performed from process context.
|
||||||
*/
|
*/
|
||||||
if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
|
if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
|
||||||
dio->flags &= ~IOMAP_DIO_NEED_SYNC;
|
dio->flags &= ~IOMAP_DIO_NEED_SYNC;
|
||||||
|
else if (dio->flags & IOMAP_DIO_NEED_SYNC)
|
||||||
|
dio->flags |= IOMAP_DIO_COMP_WORK;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are about to drop our additional submission reference, which
|
* We are about to drop our additional submission reference, which
|
||||||
|
|
|
||||||
|
|
@ -6,4 +6,16 @@
|
||||||
|
|
||||||
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
|
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
|
||||||
|
|
||||||
|
#ifdef CONFIG_BLOCK
|
||||||
|
int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter,
|
||||||
|
struct folio *folio, loff_t pos, size_t len);
|
||||||
|
#else
|
||||||
|
static inline int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter,
|
||||||
|
struct folio *folio, loff_t pos, size_t len)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_BLOCK */
|
||||||
|
|
||||||
#endif /* _IOMAP_INTERNAL_H */
|
#endif /* _IOMAP_INTERNAL_H */
|
||||||
|
|
|
||||||
|
|
@ -194,8 +194,6 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
|
||||||
if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff))
|
if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff))
|
||||||
goto new_ioend;
|
goto new_ioend;
|
||||||
|
|
||||||
iomap_start_folio_write(wpc->inode, folio, map_len);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clamp io_offset and io_size to the incore EOF so that ondisk
|
* Clamp io_offset and io_size to the incore EOF so that ondisk
|
||||||
* file size updates in the ioend completion are byte-accurate.
|
* file size updates in the ioend completion are byte-accurate.
|
||||||
|
|
|
||||||
|
|
@ -8,22 +8,24 @@
|
||||||
|
|
||||||
static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
|
static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
|
||||||
{
|
{
|
||||||
|
if (iter->fbatch) {
|
||||||
|
folio_batch_release(iter->fbatch);
|
||||||
|
kfree(iter->fbatch);
|
||||||
|
iter->fbatch = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
iter->status = 0;
|
iter->status = 0;
|
||||||
memset(&iter->iomap, 0, sizeof(iter->iomap));
|
memset(&iter->iomap, 0, sizeof(iter->iomap));
|
||||||
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
|
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Advance the current iterator position and decrement the remaining length */
|
||||||
* Advance the current iterator position and output the length remaining for the
|
int iomap_iter_advance(struct iomap_iter *iter, u64 count)
|
||||||
* current mapping.
|
|
||||||
*/
|
|
||||||
int iomap_iter_advance(struct iomap_iter *iter, u64 *count)
|
|
||||||
{
|
{
|
||||||
if (WARN_ON_ONCE(*count > iomap_length(iter)))
|
if (WARN_ON_ONCE(count > iomap_length(iter)))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
iter->pos += *count;
|
iter->pos += count;
|
||||||
iter->len -= *count;
|
iter->len -= count;
|
||||||
*count = iomap_length(iter);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,13 +16,13 @@ static int iomap_seek_hole_iter(struct iomap_iter *iter,
|
||||||
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
|
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
|
||||||
iter->pos, iter->pos + length, SEEK_HOLE);
|
iter->pos, iter->pos + length, SEEK_HOLE);
|
||||||
if (*hole_pos == iter->pos + length)
|
if (*hole_pos == iter->pos + length)
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
return 0;
|
return 0;
|
||||||
case IOMAP_HOLE:
|
case IOMAP_HOLE:
|
||||||
*hole_pos = iter->pos;
|
*hole_pos = iter->pos;
|
||||||
return 0;
|
return 0;
|
||||||
default:
|
default:
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -59,12 +59,12 @@ static int iomap_seek_data_iter(struct iomap_iter *iter,
|
||||||
|
|
||||||
switch (iter->iomap.type) {
|
switch (iter->iomap.type) {
|
||||||
case IOMAP_HOLE:
|
case IOMAP_HOLE:
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
case IOMAP_UNWRITTEN:
|
case IOMAP_UNWRITTEN:
|
||||||
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
|
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
|
||||||
iter->pos, iter->pos + length, SEEK_DATA);
|
iter->pos, iter->pos + length, SEEK_DATA);
|
||||||
if (*hole_pos < 0)
|
if (*hole_pos < 0)
|
||||||
return iomap_iter_advance(iter, &length);
|
return iomap_iter_advance(iter, length);
|
||||||
return 0;
|
return 0;
|
||||||
default:
|
default:
|
||||||
*hole_pos = iter->pos;
|
*hole_pos = iter->pos;
|
||||||
|
|
|
||||||
|
|
@ -122,9 +122,10 @@ DEFINE_RANGE_EVENT(iomap_zero_iter);
|
||||||
|
|
||||||
|
|
||||||
#define IOMAP_DIO_STRINGS \
|
#define IOMAP_DIO_STRINGS \
|
||||||
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
|
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
|
||||||
{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
|
{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
|
||||||
{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }
|
{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }, \
|
||||||
|
{IOMAP_DIO_FSBLOCK_ALIGNED, "DIO_FSBLOCK_ALIGNED" }
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(iomap_class,
|
DECLARE_EVENT_CLASS(iomap_class,
|
||||||
TP_PROTO(struct inode *inode, struct iomap *iomap),
|
TP_PROTO(struct inode *inode, struct iomap *iomap),
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,8 @@
|
||||||
#define XFS_ERRTAG_WRITE_DELAY_MS 43
|
#define XFS_ERRTAG_WRITE_DELAY_MS 43
|
||||||
#define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44
|
#define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44
|
||||||
#define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45
|
#define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45
|
||||||
#define XFS_ERRTAG_MAX 46
|
#define XFS_ERRTAG_FORCE_ZERO_RANGE 46
|
||||||
|
#define XFS_ERRTAG_MAX 47
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
|
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
|
||||||
|
|
@ -133,7 +134,8 @@ XFS_ERRTAG(ATTR_LEAF_TO_NODE, attr_leaf_to_node, 1) \
|
||||||
XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \
|
XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \
|
||||||
XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \
|
XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \
|
||||||
XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \
|
XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \
|
||||||
XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4)
|
XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4) \
|
||||||
|
XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4)
|
||||||
#endif /* XFS_ERRTAG */
|
#endif /* XFS_ERRTAG */
|
||||||
|
|
||||||
#endif /* __XFS_ERRORTAG_H_ */
|
#endif /* __XFS_ERRORTAG_H_ */
|
||||||
|
|
|
||||||
|
|
@ -742,14 +742,15 @@ xfs_vm_read_folio(
|
||||||
struct file *unused,
|
struct file *unused,
|
||||||
struct folio *folio)
|
struct folio *folio)
|
||||||
{
|
{
|
||||||
return iomap_read_folio(folio, &xfs_read_iomap_ops);
|
iomap_bio_read_folio(folio, &xfs_read_iomap_ops);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC void
|
STATIC void
|
||||||
xfs_vm_readahead(
|
xfs_vm_readahead(
|
||||||
struct readahead_control *rac)
|
struct readahead_control *rac)
|
||||||
{
|
{
|
||||||
iomap_readahead(rac, &xfs_read_iomap_ops);
|
iomap_bio_readahead(rac, &xfs_read_iomap_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,8 @@
|
||||||
#include "xfs_file.h"
|
#include "xfs_file.h"
|
||||||
#include "xfs_aops.h"
|
#include "xfs_aops.h"
|
||||||
#include "xfs_zone_alloc.h"
|
#include "xfs_zone_alloc.h"
|
||||||
|
#include "xfs_error.h"
|
||||||
|
#include "xfs_errortag.h"
|
||||||
|
|
||||||
#include <linux/dax.h>
|
#include <linux/dax.h>
|
||||||
#include <linux/falloc.h>
|
#include <linux/falloc.h>
|
||||||
|
|
@ -674,8 +676,17 @@ xfs_file_dio_write_aligned(
|
||||||
struct xfs_zone_alloc_ctx *ac)
|
struct xfs_zone_alloc_ctx *ac)
|
||||||
{
|
{
|
||||||
unsigned int iolock = XFS_IOLOCK_SHARED;
|
unsigned int iolock = XFS_IOLOCK_SHARED;
|
||||||
|
unsigned int dio_flags = 0;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For always COW inodes, each bio must be aligned to the file system
|
||||||
|
* block size and not just the device sector size because we need to
|
||||||
|
* allocate a block-aligned amount of space for each write.
|
||||||
|
*/
|
||||||
|
if (xfs_is_always_cow_inode(ip))
|
||||||
|
dio_flags |= IOMAP_DIO_FSBLOCK_ALIGNED;
|
||||||
|
|
||||||
ret = xfs_ilock_iocb_for_write(iocb, &iolock);
|
ret = xfs_ilock_iocb_for_write(iocb, &iolock);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -693,7 +704,7 @@ xfs_file_dio_write_aligned(
|
||||||
iolock = XFS_IOLOCK_SHARED;
|
iolock = XFS_IOLOCK_SHARED;
|
||||||
}
|
}
|
||||||
trace_xfs_file_direct_write(iocb, from);
|
trace_xfs_file_direct_write(iocb, from);
|
||||||
ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
|
ret = iomap_dio_rw(iocb, from, ops, dops, dio_flags, ac, 0);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
xfs_iunlock(ip, iolock);
|
xfs_iunlock(ip, iolock);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -890,15 +901,7 @@ xfs_file_dio_write(
|
||||||
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
|
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
|
||||||
* For always COW inodes we also must check the alignment of each
|
|
||||||
* individual iovec segment, as they could end up with different
|
|
||||||
* I/Os due to the way bio_iov_iter_get_pages works, and we'd
|
|
||||||
* then overwrite an already written block.
|
|
||||||
*/
|
|
||||||
if (((iocb->ki_pos | count) & ip->i_mount->m_blockmask) ||
|
|
||||||
(xfs_is_always_cow_inode(ip) &&
|
|
||||||
(iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
|
|
||||||
return xfs_file_dio_write_unaligned(ip, iocb, from);
|
return xfs_file_dio_write_unaligned(ip, iocb, from);
|
||||||
if (xfs_is_zoned_inode(ip))
|
if (xfs_is_zoned_inode(ip))
|
||||||
return xfs_file_dio_write_zoned(ip, iocb, from);
|
return xfs_file_dio_write_zoned(ip, iocb, from);
|
||||||
|
|
@ -1254,23 +1257,36 @@ xfs_falloc_zero_range(
|
||||||
struct xfs_zone_alloc_ctx *ac)
|
struct xfs_zone_alloc_ctx *ac)
|
||||||
{
|
{
|
||||||
struct inode *inode = file_inode(file);
|
struct inode *inode = file_inode(file);
|
||||||
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
unsigned int blksize = i_blocksize(inode);
|
unsigned int blksize = i_blocksize(inode);
|
||||||
loff_t new_size = 0;
|
loff_t new_size = 0;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
trace_xfs_zero_file_space(XFS_I(inode));
|
trace_xfs_zero_file_space(ip);
|
||||||
|
|
||||||
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
|
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
error = xfs_free_file_space(XFS_I(inode), offset, len, ac);
|
/*
|
||||||
if (error)
|
* Zero range implements a full zeroing mechanism but is only used in
|
||||||
return error;
|
* limited situations. It is more efficient to allocate unwritten
|
||||||
|
* extents than to perform zeroing here, so use an errortag to randomly
|
||||||
|
* force zeroing on DEBUG kernels for added test coverage.
|
||||||
|
*/
|
||||||
|
if (XFS_TEST_ERROR(ip->i_mount,
|
||||||
|
XFS_ERRTAG_FORCE_ZERO_RANGE)) {
|
||||||
|
error = xfs_zero_range(ip, offset, len, ac, NULL);
|
||||||
|
} else {
|
||||||
|
error = xfs_free_file_space(ip, offset, len, ac);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
len = round_up(offset + len, blksize) - round_down(offset, blksize);
|
len = round_up(offset + len, blksize) -
|
||||||
offset = round_down(offset, blksize);
|
round_down(offset, blksize);
|
||||||
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
|
offset = round_down(offset, blksize);
|
||||||
|
error = xfs_alloc_file_space(ip, offset, len);
|
||||||
|
}
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
return xfs_falloc_setsize(file, new_size);
|
return xfs_falloc_setsize(file, new_size);
|
||||||
|
|
|
||||||
|
|
@ -1758,6 +1758,8 @@ xfs_buffered_write_iomap_begin(
|
||||||
struct iomap *iomap,
|
struct iomap *iomap,
|
||||||
struct iomap *srcmap)
|
struct iomap *srcmap)
|
||||||
{
|
{
|
||||||
|
struct iomap_iter *iter = container_of(iomap, struct iomap_iter,
|
||||||
|
iomap);
|
||||||
struct xfs_inode *ip = XFS_I(inode);
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
struct xfs_mount *mp = ip->i_mount;
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
|
|
@ -1823,21 +1825,41 @@ xfs_buffered_write_iomap_begin(
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For zeroing, trim a delalloc extent that extends beyond the EOF
|
* For zeroing, trim extents that extend beyond the EOF block. If a
|
||||||
* block. If it starts beyond the EOF block, convert it to an
|
* delalloc extent starts beyond the EOF block, convert it to an
|
||||||
* unwritten extent.
|
* unwritten extent.
|
||||||
*/
|
*/
|
||||||
if ((flags & IOMAP_ZERO) && imap.br_startoff <= offset_fsb &&
|
if (flags & IOMAP_ZERO) {
|
||||||
isnullstartblock(imap.br_startblock)) {
|
|
||||||
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
|
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
|
||||||
|
u64 end;
|
||||||
|
|
||||||
if (offset_fsb >= eof_fsb)
|
if (isnullstartblock(imap.br_startblock) &&
|
||||||
|
offset_fsb >= eof_fsb)
|
||||||
goto convert_delay;
|
goto convert_delay;
|
||||||
if (end_fsb > eof_fsb) {
|
if (offset_fsb < eof_fsb && end_fsb > eof_fsb)
|
||||||
end_fsb = eof_fsb;
|
end_fsb = eof_fsb;
|
||||||
xfs_trim_extent(&imap, offset_fsb,
|
|
||||||
end_fsb - offset_fsb);
|
/*
|
||||||
|
* Look up dirty folios for unwritten mappings within EOF.
|
||||||
|
* Providing this bypasses the flush iomap uses to trigger
|
||||||
|
* extent conversion when unwritten mappings have dirty
|
||||||
|
* pagecache in need of zeroing.
|
||||||
|
*
|
||||||
|
* Trim the mapping to the end pos of the lookup, which in turn
|
||||||
|
* was trimmed to the end of the batch if it became full before
|
||||||
|
* the end of the mapping.
|
||||||
|
*/
|
||||||
|
if (imap.br_state == XFS_EXT_UNWRITTEN &&
|
||||||
|
offset_fsb < eof_fsb) {
|
||||||
|
loff_t len = min(count,
|
||||||
|
XFS_FSB_TO_B(mp, imap.br_blockcount));
|
||||||
|
|
||||||
|
end = iomap_fill_dirty_folios(iter, offset, len);
|
||||||
|
end_fsb = min_t(xfs_fileoff_t, end_fsb,
|
||||||
|
XFS_B_TO_FSB(mp, end));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -112,12 +112,13 @@ static const struct iomap_ops zonefs_write_iomap_ops = {
|
||||||
|
|
||||||
static int zonefs_read_folio(struct file *unused, struct folio *folio)
|
static int zonefs_read_folio(struct file *unused, struct folio *folio)
|
||||||
{
|
{
|
||||||
return iomap_read_folio(folio, &zonefs_read_iomap_ops);
|
iomap_bio_read_folio(folio, &zonefs_read_iomap_ops);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void zonefs_readahead(struct readahead_control *rac)
|
static void zonefs_readahead(struct readahead_control *rac)
|
||||||
{
|
{
|
||||||
iomap_readahead(rac, &zonefs_read_iomap_ops);
|
iomap_bio_readahead(rac, &zonefs_read_iomap_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -367,23 +367,9 @@ struct readahead_control;
|
||||||
#define IOCB_NOIO (1 << 20)
|
#define IOCB_NOIO (1 << 20)
|
||||||
/* can use bio alloc cache */
|
/* can use bio alloc cache */
|
||||||
#define IOCB_ALLOC_CACHE (1 << 21)
|
#define IOCB_ALLOC_CACHE (1 << 21)
|
||||||
/*
|
|
||||||
* IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
|
|
||||||
* iocb completion can be passed back to the owner for execution from a safe
|
|
||||||
* context rather than needing to be punted through a workqueue. If this
|
|
||||||
* flag is set, the bio completion handling may set iocb->dio_complete to a
|
|
||||||
* handler function and iocb->private to context information for that handler.
|
|
||||||
* The issuer should call the handler with that context information from task
|
|
||||||
* context to complete the processing of the iocb. Note that while this
|
|
||||||
* provides a task context for the dio_complete() callback, it should only be
|
|
||||||
* used on the completion side for non-IO generating completions. It's fine to
|
|
||||||
* call blocking functions from this callback, but they should not wait for
|
|
||||||
* unrelated IO (like cache flushing, new IO generation, etc).
|
|
||||||
*/
|
|
||||||
#define IOCB_DIO_CALLER_COMP (1 << 22)
|
|
||||||
/* kiocb is a read or write operation submitted by fs/aio.c. */
|
/* kiocb is a read or write operation submitted by fs/aio.c. */
|
||||||
#define IOCB_AIO_RW (1 << 23)
|
#define IOCB_AIO_RW (1 << 22)
|
||||||
#define IOCB_HAS_METADATA (1 << 24)
|
#define IOCB_HAS_METADATA (1 << 23)
|
||||||
|
|
||||||
/* for use in trace events */
|
/* for use in trace events */
|
||||||
#define TRACE_IOCB_STRINGS \
|
#define TRACE_IOCB_STRINGS \
|
||||||
|
|
@ -400,7 +386,6 @@ struct readahead_control;
|
||||||
{ IOCB_WAITQ, "WAITQ" }, \
|
{ IOCB_WAITQ, "WAITQ" }, \
|
||||||
{ IOCB_NOIO, "NOIO" }, \
|
{ IOCB_NOIO, "NOIO" }, \
|
||||||
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
|
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
|
||||||
{ IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
|
|
||||||
{ IOCB_AIO_RW, "AIO_RW" }, \
|
{ IOCB_AIO_RW, "AIO_RW" }, \
|
||||||
{ IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
|
{ IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
|
||||||
|
|
||||||
|
|
@ -412,23 +397,13 @@ struct kiocb {
|
||||||
int ki_flags;
|
int ki_flags;
|
||||||
u16 ki_ioprio; /* See linux/ioprio.h */
|
u16 ki_ioprio; /* See linux/ioprio.h */
|
||||||
u8 ki_write_stream;
|
u8 ki_write_stream;
|
||||||
union {
|
|
||||||
/*
|
/*
|
||||||
* Only used for async buffered reads, where it denotes the
|
* Only used for async buffered reads, where it denotes the page
|
||||||
* page waitqueue associated with completing the read. Valid
|
* waitqueue associated with completing the read.
|
||||||
* IFF IOCB_WAITQ is set.
|
* Valid IFF IOCB_WAITQ is set.
|
||||||
*/
|
*/
|
||||||
struct wait_page_queue *ki_waitq;
|
struct wait_page_queue *ki_waitq;
|
||||||
/*
|
|
||||||
* Can be used for O_DIRECT IO, where the completion handling
|
|
||||||
* is punted back to the issuer of the IO. May only be set
|
|
||||||
* if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
|
|
||||||
* must then check for presence of this handler when ki_complete
|
|
||||||
* is invoked. The data passed in to this handler must be
|
|
||||||
* assigned to ->private when dio_complete is assigned.
|
|
||||||
*/
|
|
||||||
ssize_t (*dio_complete)(void *data);
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool is_sync_kiocb(struct kiocb *kiocb)
|
static inline bool is_sync_kiocb(struct kiocb *kiocb)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/pagevec.h>
|
||||||
|
|
||||||
struct address_space;
|
struct address_space;
|
||||||
struct fiemap_extent_info;
|
struct fiemap_extent_info;
|
||||||
|
|
@ -16,6 +17,7 @@ struct inode;
|
||||||
struct iomap_iter;
|
struct iomap_iter;
|
||||||
struct iomap_dio;
|
struct iomap_dio;
|
||||||
struct iomap_writepage_ctx;
|
struct iomap_writepage_ctx;
|
||||||
|
struct iomap_read_folio_ctx;
|
||||||
struct iov_iter;
|
struct iov_iter;
|
||||||
struct kiocb;
|
struct kiocb;
|
||||||
struct page;
|
struct page;
|
||||||
|
|
@ -241,11 +243,12 @@ struct iomap_iter {
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
struct iomap iomap;
|
struct iomap iomap;
|
||||||
struct iomap srcmap;
|
struct iomap srcmap;
|
||||||
|
struct folio_batch *fbatch;
|
||||||
void *private;
|
void *private;
|
||||||
};
|
};
|
||||||
|
|
||||||
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
|
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
|
||||||
int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
|
int iomap_iter_advance(struct iomap_iter *iter, u64 count);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* iomap_length_trim - trimmed length of the current iomap iteration
|
* iomap_length_trim - trimmed length of the current iomap iteration
|
||||||
|
|
@ -282,9 +285,7 @@ static inline u64 iomap_length(const struct iomap_iter *iter)
|
||||||
*/
|
*/
|
||||||
static inline int iomap_iter_advance_full(struct iomap_iter *iter)
|
static inline int iomap_iter_advance_full(struct iomap_iter *iter)
|
||||||
{
|
{
|
||||||
u64 length = iomap_length(iter);
|
return iomap_iter_advance(iter, iomap_length(iter));
|
||||||
|
|
||||||
return iomap_iter_advance(iter, &length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -339,8 +340,10 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
|
||||||
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
|
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
|
||||||
const struct iomap_ops *ops,
|
const struct iomap_ops *ops,
|
||||||
const struct iomap_write_ops *write_ops, void *private);
|
const struct iomap_write_ops *write_ops, void *private);
|
||||||
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
|
void iomap_read_folio(const struct iomap_ops *ops,
|
||||||
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
|
struct iomap_read_folio_ctx *ctx);
|
||||||
|
void iomap_readahead(const struct iomap_ops *ops,
|
||||||
|
struct iomap_read_folio_ctx *ctx);
|
||||||
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
|
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
|
||||||
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
|
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
|
||||||
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
|
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
|
||||||
|
|
@ -349,6 +352,8 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
|
||||||
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||||
const struct iomap_ops *ops,
|
const struct iomap_ops *ops,
|
||||||
const struct iomap_write_ops *write_ops);
|
const struct iomap_write_ops *write_ops);
|
||||||
|
loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset,
|
||||||
|
loff_t length);
|
||||||
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
|
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
|
||||||
bool *did_zero, const struct iomap_ops *ops,
|
bool *did_zero, const struct iomap_ops *ops,
|
||||||
const struct iomap_write_ops *write_ops, void *private);
|
const struct iomap_write_ops *write_ops, void *private);
|
||||||
|
|
@ -430,6 +435,10 @@ struct iomap_writeback_ops {
|
||||||
* An existing mapping from a previous call to this method can be reused
|
* An existing mapping from a previous call to this method can be reused
|
||||||
* by the file system if it is still valid.
|
* by the file system if it is still valid.
|
||||||
*
|
*
|
||||||
|
* If this succeeds, iomap_finish_folio_write() must be called once
|
||||||
|
* writeback completes for the range, regardless of whether the
|
||||||
|
* writeback succeeded or failed.
|
||||||
|
*
|
||||||
* Returns the number of bytes processed or a negative errno.
|
* Returns the number of bytes processed or a negative errno.
|
||||||
*/
|
*/
|
||||||
ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc,
|
ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc,
|
||||||
|
|
@ -467,14 +476,41 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
|
||||||
loff_t pos, loff_t end_pos, unsigned int dirty_len);
|
loff_t pos, loff_t end_pos, unsigned int dirty_len);
|
||||||
int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error);
|
int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error);
|
||||||
|
|
||||||
void iomap_start_folio_write(struct inode *inode, struct folio *folio,
|
void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
|
||||||
size_t len);
|
int error);
|
||||||
void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
|
void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
|
||||||
size_t len);
|
size_t len);
|
||||||
|
|
||||||
int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio);
|
int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio);
|
||||||
int iomap_writepages(struct iomap_writepage_ctx *wpc);
|
int iomap_writepages(struct iomap_writepage_ctx *wpc);
|
||||||
|
|
||||||
|
struct iomap_read_folio_ctx {
|
||||||
|
const struct iomap_read_ops *ops;
|
||||||
|
struct folio *cur_folio;
|
||||||
|
struct readahead_control *rac;
|
||||||
|
void *read_ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct iomap_read_ops {
|
||||||
|
/*
|
||||||
|
* Read in a folio range.
|
||||||
|
*
|
||||||
|
* If this succeeds, iomap_finish_folio_read() must be called after the
|
||||||
|
* range is read in, regardless of whether the read succeeded or failed.
|
||||||
|
*
|
||||||
|
* Returns 0 on success or a negative error on failure.
|
||||||
|
*/
|
||||||
|
int (*read_folio_range)(const struct iomap_iter *iter,
|
||||||
|
struct iomap_read_folio_ctx *ctx, size_t len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Submit any pending read requests.
|
||||||
|
*
|
||||||
|
* This is optional.
|
||||||
|
*/
|
||||||
|
void (*submit_read)(struct iomap_read_folio_ctx *ctx);
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags for direct I/O ->end_io:
|
* Flags for direct I/O ->end_io:
|
||||||
*/
|
*/
|
||||||
|
|
@ -518,6 +554,14 @@ struct iomap_dio_ops {
|
||||||
*/
|
*/
|
||||||
#define IOMAP_DIO_PARTIAL (1 << 2)
|
#define IOMAP_DIO_PARTIAL (1 << 2)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure each bio is aligned to fs block size.
|
||||||
|
*
|
||||||
|
* For filesystems which need to calculate/verify the checksum of each fs
|
||||||
|
* block. Otherwise they may not be able to handle unaligned bios.
|
||||||
|
*/
|
||||||
|
#define IOMAP_DIO_FSBLOCK_ALIGNED (1 << 3)
|
||||||
|
|
||||||
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||||
unsigned int dio_flags, void *private, size_t done_before);
|
unsigned int dio_flags, void *private, size_t done_before);
|
||||||
|
|
@ -540,4 +584,30 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
|
||||||
|
|
||||||
extern struct bio_set iomap_ioend_bioset;
|
extern struct bio_set iomap_ioend_bioset;
|
||||||
|
|
||||||
|
#ifdef CONFIG_BLOCK
|
||||||
|
extern const struct iomap_read_ops iomap_bio_read_ops;
|
||||||
|
|
||||||
|
static inline void iomap_bio_read_folio(struct folio *folio,
|
||||||
|
const struct iomap_ops *ops)
|
||||||
|
{
|
||||||
|
struct iomap_read_folio_ctx ctx = {
|
||||||
|
.ops = &iomap_bio_read_ops,
|
||||||
|
.cur_folio = folio,
|
||||||
|
};
|
||||||
|
|
||||||
|
iomap_read_folio(ops, &ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void iomap_bio_readahead(struct readahead_control *rac,
|
||||||
|
const struct iomap_ops *ops)
|
||||||
|
{
|
||||||
|
struct iomap_read_folio_ctx ctx = {
|
||||||
|
.ops = &iomap_bio_read_ops,
|
||||||
|
.rac = rac,
|
||||||
|
};
|
||||||
|
|
||||||
|
iomap_readahead(ops, &ctx);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_BLOCK */
|
||||||
|
|
||||||
#endif /* LINUX_IOMAP_H */
|
#endif /* LINUX_IOMAP_H */
|
||||||
|
|
|
||||||
|
|
@ -977,6 +977,8 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
|
||||||
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
|
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
|
||||||
unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
|
unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
|
||||||
pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
|
pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
|
||||||
|
unsigned filemap_get_folios_dirty(struct address_space *mapping,
|
||||||
|
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
|
||||||
|
|
||||||
struct folio *read_cache_folio(struct address_space *, pgoff_t index,
|
struct folio *read_cache_folio(struct address_space *, pgoff_t index,
|
||||||
filler_t *filler, struct file *file);
|
filler_t *filler, struct file *file);
|
||||||
|
|
|
||||||
|
|
@ -277,7 +277,6 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
} else {
|
} else {
|
||||||
rw->kiocb.ki_ioprio = get_current_ioprio();
|
rw->kiocb.ki_ioprio = get_current_ioprio();
|
||||||
}
|
}
|
||||||
rw->kiocb.dio_complete = NULL;
|
|
||||||
rw->kiocb.ki_flags = 0;
|
rw->kiocb.ki_flags = 0;
|
||||||
rw->kiocb.ki_write_stream = READ_ONCE(sqe->write_stream);
|
rw->kiocb.ki_write_stream = READ_ONCE(sqe->write_stream);
|
||||||
|
|
||||||
|
|
@ -569,15 +568,6 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
|
||||||
|
|
||||||
void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw)
|
void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||||
{
|
{
|
||||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
|
||||||
struct kiocb *kiocb = &rw->kiocb;
|
|
||||||
|
|
||||||
if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
|
|
||||||
long res = kiocb->dio_complete(rw->kiocb.private);
|
|
||||||
|
|
||||||
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
io_req_io_end(req);
|
io_req_io_end(req);
|
||||||
|
|
||||||
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
|
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
|
||||||
|
|
@ -592,10 +582,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
|
||||||
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
|
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
|
||||||
struct io_kiocb *req = cmd_to_io_kiocb(rw);
|
struct io_kiocb *req = cmd_to_io_kiocb(rw);
|
||||||
|
|
||||||
if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
|
__io_complete_rw_common(req, res);
|
||||||
__io_complete_rw_common(req, res);
|
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
|
||||||
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
|
|
||||||
}
|
|
||||||
req->io_task_work.func = io_req_rw_complete;
|
req->io_task_work.func = io_req_rw_complete;
|
||||||
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
|
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
58
mm/filemap.c
58
mm/filemap.c
|
|
@ -2366,6 +2366,64 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(filemap_get_folios_tag);
|
EXPORT_SYMBOL(filemap_get_folios_tag);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* filemap_get_folios_dirty - Get a batch of dirty folios
|
||||||
|
* @mapping: The address_space to search
|
||||||
|
* @start: The starting folio index
|
||||||
|
* @end: The final folio index (inclusive)
|
||||||
|
* @fbatch: The batch to fill
|
||||||
|
*
|
||||||
|
* filemap_get_folios_dirty() works exactly like filemap_get_folios(), except
|
||||||
|
* the returned folios are presumed to be dirty or undergoing writeback. Dirty
|
||||||
|
* state is presumed because we don't block on folio lock nor want to miss
|
||||||
|
* folios. Callers that need to can recheck state upon locking the folio.
|
||||||
|
*
|
||||||
|
* This may not return all dirty folios if the batch gets filled up.
|
||||||
|
*
|
||||||
|
* Return: The number of folios found.
|
||||||
|
* Also update @start to be positioned for traversal of the next folio.
|
||||||
|
*/
|
||||||
|
unsigned filemap_get_folios_dirty(struct address_space *mapping, pgoff_t *start,
|
||||||
|
pgoff_t end, struct folio_batch *fbatch)
|
||||||
|
{
|
||||||
|
XA_STATE(xas, &mapping->i_pages, *start);
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
|
||||||
|
if (xa_is_value(folio))
|
||||||
|
continue;
|
||||||
|
if (folio_trylock(folio)) {
|
||||||
|
bool clean = !folio_test_dirty(folio) &&
|
||||||
|
!folio_test_writeback(folio);
|
||||||
|
folio_unlock(folio);
|
||||||
|
if (clean) {
|
||||||
|
folio_put(folio);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!folio_batch_add(fbatch, folio)) {
|
||||||
|
unsigned long nr = folio_nr_pages(folio);
|
||||||
|
*start = folio->index + nr;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* We come here when there is no folio beyond @end. We take care to not
|
||||||
|
* overflow the index @start as it confuses some of the callers. This
|
||||||
|
* breaks the iteration when there is a folio at index -1 but that is
|
||||||
|
* already broke anyway.
|
||||||
|
*/
|
||||||
|
if (end == (pgoff_t)-1)
|
||||||
|
*start = (pgoff_t)-1;
|
||||||
|
else
|
||||||
|
*start = end + 1;
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return folio_batch_count(fbatch);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
|
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
|
||||||
* a _large_ part of the i/o request. Imagine the worst scenario:
|
* a _large_ part of the i/o request. Imagine the worst scenario:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue