New ext4 features:

* Add support so tune2fs can modify/update the superblock using an
     ioctl, without needing write access to the block device.
   * Add support for 32-bit reserved uid's and gid's.
 
 Bug fixes:
 
   * Fix potential warnings and other failures caused by corrupted / fuzzed
     file systems.
   * Fail unaligned direct I/O write with EINVAL instead of silently
     falling back to buffered I/O
   * Correectly handle fsmap queries for metadata mappings
   * Avoid journal stalls caused by writeback throttling
   * Add some missing GFP_NOFAIL flags to avoid potential deadlocks
     under extremem memory pressure
 
 Cleanups:
 
   * Remove obsolete EXT3 Kconfigs
 -----BEGIN PGP SIGNATURE-----
 
 iQEyBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmjclvEACgkQ8vlZVpUN
 gaPjJgf4vnWF6DdV/eQfD9d41h+cOuBv0w/pLBMP5nsJn1NtI057hnIEs4DyWqIn
 M5O6qT4ktgoeS2zsKDnhdXWLjpnWJfqWKnYR76CoaZjNzg/2A3aT5+/H5fFRpBcT
 gkoh1xJbcdo5rglktAyAqYGIUAgRIimNPaLyeffMqHAOdhaiBpzIVU0D4Z24kGUg
 nBEMhQ6Km8Bvp1mJUiT9EsFXdC9BakUVrXLiliJsCBWitEYpBk/nScs7U/QQ4KVU
 IvK7jiacYapLHwRm/7d9rlr2VQw1rWa584B4seq7H+FWNNAuQcV5Bml05bbUeKGc
 9KKZDPA55UqcMBDkcHwro2GkYIFc
 =8Z1N
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "New ext4 features:

   - Add support so tune2fs can modify/update the superblock using an
     ioctl, without needing write access to the block device

   - Add support for 32-bit reserved uid's and gid's

  Bug fixes:

   - Fix potential warnings and other failures caused by corrupted /
     fuzzed file systems

   - Fail unaligned direct I/O write with EINVAL instead of silently
     falling back to buffered I/O

   - Correectly handle fsmap queries for metadata mappings

   - Avoid journal stalls caused by writeback throttling

   - Add some missing GFP_NOFAIL flags to avoid potential deadlocks
     under extremem memory pressure

  Cleanups:

   - Remove obsolete EXT3 Kconfigs"

* tag 'ext4_for_linus-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix checks for orphan inodes
  ext4: validate ea_ino and size in check_xattrs
  ext4: guard against EA inode refcount underflow in xattr update
  ext4: implemet new ioctls to set and get superblock parameters
  ext4: add support for 32-bit default reserved uid and gid values
  ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
  ext4: fix an off-by-one issue during moving extents
  ext4: increase i_disksize to offset + len in ext4_update_disksize_before_punch()
  ext4: verify orphan file size is not too big
  ext4: fail unaligned direct IO write with EINVAL
  ext4: correctly handle queries for metadata mappings
  ext4: increase IO priority of fastcommit
  ext4: remove obsolete EXT3 config options
  jbd2: increase IO priority of checkpoint
  ext4: fix potential null deref in ext4_mb_init()
  ext4: add ext4_sb_bread_nofail() helper function for ext4_free_branches()
  ext4: replace min/max nesting with clamp()
  fs: ext4: change GFP_KERNEL to GFP_NOFS to avoid deadlock
This commit is contained in:
Linus Torvalds 2025-10-03 13:47:10 -07:00
commit 65989db7f8
16 changed files with 467 additions and 118 deletions

View File

@ -1,31 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
# Ext3 configs are here for backward compatibility with old configs which may
# have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable
# kernels after the removal of ext3 driver.
config EXT3_FS
tristate "The Extended 3 (ext3) filesystem"
select EXT4_FS
help
This config option is here only for backward compatibility. ext3
filesystem is now handled by the ext4 driver.
config EXT3_FS_POSIX_ACL
bool "Ext3 POSIX Access Control Lists"
depends on EXT3_FS
select EXT4_FS_POSIX_ACL
select FS_POSIX_ACL
help
This config option is here only for backward compatibility. ext3
filesystem is now handled by the ext4 driver.
config EXT3_FS_SECURITY
bool "Ext3 Security Labels"
depends on EXT3_FS
select EXT4_FS_SECURITY
help
This config option is here only for backward compatibility. ext3
filesystem is now handled by the ext4 driver.
config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
select BUFFER_HEAD

View File

@ -1450,7 +1450,9 @@ struct ext4_super_block {
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_orphan_file_inum; /* Inode for tracking orphan inodes */
__le32 s_reserved[94]; /* Padding to the end of the block */
__le16 s_def_resuid_hi;
__le16 s_def_resgid_hi;
__le32 s_reserved[93]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
@ -1820,6 +1822,18 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}
static inline int ext4_get_resuid(struct ext4_super_block *es)
{
return le16_to_cpu(es->s_def_resuid) |
le16_to_cpu(es->s_def_resuid_hi) << 16;
}
static inline int ext4_get_resgid(struct ext4_super_block *es)
{
return le16_to_cpu(es->s_def_resgid) |
le16_to_cpu(es->s_def_resgid_hi) << 16;
}
/*
* Returns: sbi->field[index]
* Used to access an array element from the following sbi fields which require
@ -1989,6 +2003,16 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
* Check whether the inode is tracked as orphan (either in orphan file or
* orphan list).
*/
static inline bool ext4_inode_orphan_tracked(struct inode *inode)
{
return ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
!list_empty(&EXT4_I(inode)->i_orphan);
}
/*
* Codes for operating systems
*/
@ -3142,6 +3166,8 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
sector_t block, blk_opf_t op_flags);
extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block);
extern struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
sector_t block);
extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
bh_end_io_t *end_io, bool simu_fail);
extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,

View File

@ -663,7 +663,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
{
blk_opf_t write_flags = REQ_SYNC;
blk_opf_t write_flags = JBD2_JOURNAL_REQ_FLAGS;
struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
/* Add REQ_FUA | REQ_PREFLUSH only its tail */

View File

@ -354,7 +354,7 @@ static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
* to cleanup the orphan list in ext4_handle_inode_extension(). Do it
* now.
*/
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
if (ext4_inode_orphan_tracked(inode) && inode->i_nlink) {
handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {

View File

@ -74,7 +74,8 @@ static int ext4_getfsmap_dev_compare(const void *p1, const void *p2)
static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info,
struct ext4_fsmap *rec)
{
return rec->fmr_physical < info->gfi_low.fmr_physical;
return rec->fmr_physical + rec->fmr_length <=
info->gfi_low.fmr_physical;
}
/*
@ -200,15 +201,18 @@ static int ext4_getfsmap_meta_helper(struct super_block *sb,
ext4_group_first_block_no(sb, agno));
fs_end = fs_start + EXT4_C2B(sbi, len);
/* Return relevant extents from the meta_list */
/*
* Return relevant extents from the meta_list. We emit all extents that
* partially/fully overlap with the query range
*/
list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) {
if (p->fmr_physical < info->gfi_next_fsblk) {
if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) {
list_del(&p->fmr_list);
kfree(p);
continue;
}
if (p->fmr_physical <= fs_start ||
p->fmr_physical + p->fmr_length <= fs_end) {
if (p->fmr_physical <= fs_end &&
p->fmr_physical + p->fmr_length > fs_start) {
/* Emit the retained free extent record if present */
if (info->gfi_lastfree.fmr_owner) {
error = ext4_getfsmap_helper(sb, info,

View File

@ -1025,7 +1025,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
}
/* Go read the buffer for the next level down */
bh = ext4_sb_bread(inode->i_sb, nr, 0);
bh = ext4_sb_bread_nofail(inode->i_sb, nr);
/*
* A read failure? Report error and clear slot

View File

@ -3872,47 +3872,12 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
return ret;
}
static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
{
/* must be a directio to fall back to buffered */
if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) !=
(IOMAP_WRITE | IOMAP_DIRECT))
return false;
/* atomic writes are all-or-nothing */
if (flags & IOMAP_ATOMIC)
return false;
/* can only try again if we wrote nothing */
return written == 0;
}
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap)
{
/*
* Check to see whether an error occurred while writing out the data to
* the allocated blocks. If so, return the magic error code for
* non-atomic write so that we fallback to buffered I/O and attempt to
* complete the remainder of the I/O.
* For non-atomic writes, any blocks that may have been
* allocated in preparation for the direct I/O will be reused during
* buffered I/O. For atomic write, we never fallback to buffered-io.
*/
if (ext4_want_directio_fallback(flags, written))
return -ENOTBLK;
return 0;
}
const struct iomap_ops ext4_iomap_ops = {
.iomap_begin = ext4_iomap_begin,
.iomap_end = ext4_iomap_end,
};
const struct iomap_ops ext4_iomap_overwrite_ops = {
.iomap_begin = ext4_iomap_overwrite_begin,
.iomap_end = ext4_iomap_end,
};
static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
@ -4287,7 +4252,11 @@ int ext4_can_truncate(struct inode *inode)
* We have to make sure i_disksize gets properly updated before we truncate
* page cache due to hole punching or zero range. Otherwise i_disksize update
* can get lost as it may have been postponed to submission of writeback but
* that will never happen after we truncate page cache.
* that will never happen if we remove the folio containing i_size from the
* page cache. Also if we punch hole within i_size but above i_disksize,
* following ext4_page_mkwrite() may mistakenly allocate written blocks over
* the hole and thus introduce allocated blocks beyond i_disksize which is
* not allowed (e2fsck would complain in case of crash).
*/
int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t len)
@ -4298,9 +4267,11 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t size = i_size_read(inode);
WARN_ON(!inode_is_locked(inode));
if (offset > size || offset + len < size)
if (offset > size)
return 0;
if (offset + len < size)
size = offset + len;
if (EXT4_I(inode)->i_disksize >= size)
return 0;
@ -4748,7 +4719,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
* old inodes get re-used with the upper 16 bits of the
* uid/gid intact.
*/
if (ei->i_dtime && list_empty(&ei->i_orphan)) {
if (ei->i_dtime && !ext4_inode_orphan_tracked(inode)) {
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
} else {

View File

@ -27,14 +27,16 @@
#include "fsmap.h"
#include <trace/events/ext4.h>
typedef void ext4_update_sb_callback(struct ext4_super_block *es,
const void *arg);
typedef void ext4_update_sb_callback(struct ext4_sb_info *sbi,
struct ext4_super_block *es,
const void *arg);
/*
* Superblock modification callback function for changing file system
* label
*/
static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
static void ext4_sb_setlabel(struct ext4_sb_info *sbi,
struct ext4_super_block *es, const void *arg)
{
/* Sanity check, this should never happen */
BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
@ -46,7 +48,8 @@ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
* Superblock modification callback function for changing file system
* UUID.
*/
static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg)
static void ext4_sb_setuuid(struct ext4_sb_info *sbi,
struct ext4_super_block *es, const void *arg)
{
memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE);
}
@ -71,7 +74,7 @@ int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
goto out_err;
lock_buffer(bh);
func(es, arg);
func(sbi, es, arg);
ext4_superblock_csum_set(sb);
unlock_buffer(bh);
@ -149,7 +152,7 @@ static int ext4_update_backup_sb(struct super_block *sb,
unlock_buffer(bh);
goto out_bh;
}
func(es, arg);
func(EXT4_SB(sb), es, arg);
if (ext4_has_feature_metadata_csum(sb))
es->s_checksum = ext4_superblock_csum(es);
set_buffer_uptodate(bh);
@ -1230,6 +1233,295 @@ static int ext4_ioctl_setuuid(struct file *filp,
return ret;
}
#define TUNE_OPS_SUPPORTED (EXT4_TUNE_FL_ERRORS_BEHAVIOR | \
EXT4_TUNE_FL_MNT_COUNT | EXT4_TUNE_FL_MAX_MNT_COUNT | \
EXT4_TUNE_FL_CHECKINTRVAL | EXT4_TUNE_FL_LAST_CHECK_TIME | \
EXT4_TUNE_FL_RESERVED_BLOCKS | EXT4_TUNE_FL_RESERVED_UID | \
EXT4_TUNE_FL_RESERVED_GID | EXT4_TUNE_FL_DEFAULT_MNT_OPTS | \
EXT4_TUNE_FL_DEF_HASH_ALG | EXT4_TUNE_FL_RAID_STRIDE | \
EXT4_TUNE_FL_RAID_STRIPE_WIDTH | EXT4_TUNE_FL_MOUNT_OPTS | \
EXT4_TUNE_FL_FEATURES | EXT4_TUNE_FL_EDIT_FEATURES | \
EXT4_TUNE_FL_FORCE_FSCK | EXT4_TUNE_FL_ENCODING | \
EXT4_TUNE_FL_ENCODING_FLAGS)
#define EXT4_TUNE_SET_COMPAT_SUPP \
(EXT4_FEATURE_COMPAT_DIR_INDEX | \
EXT4_FEATURE_COMPAT_STABLE_INODES)
#define EXT4_TUNE_SET_INCOMPAT_SUPP \
(EXT4_FEATURE_INCOMPAT_EXTENTS | \
EXT4_FEATURE_INCOMPAT_EA_INODE | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
EXT4_FEATURE_INCOMPAT_LARGEDIR | \
EXT4_FEATURE_INCOMPAT_CASEFOLD)
#define EXT4_TUNE_SET_RO_COMPAT_SUPP \
(EXT4_FEATURE_RO_COMPAT_LARGE_FILE | \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_PROJECT | \
EXT4_FEATURE_RO_COMPAT_VERITY)
#define EXT4_TUNE_CLEAR_COMPAT_SUPP (0)
#define EXT4_TUNE_CLEAR_INCOMPAT_SUPP (0)
#define EXT4_TUNE_CLEAR_RO_COMPAT_SUPP (0)
#define SB_ENC_SUPP_MASK (SB_ENC_STRICT_MODE_FL | \
SB_ENC_NO_COMPAT_FALLBACK_FL)
static int ext4_ioctl_get_tune_sb(struct ext4_sb_info *sbi,
struct ext4_tune_sb_params __user *params)
{
struct ext4_tune_sb_params ret;
struct ext4_super_block *es = sbi->s_es;
memset(&ret, 0, sizeof(ret));
ret.set_flags = TUNE_OPS_SUPPORTED;
ret.errors_behavior = le16_to_cpu(es->s_errors);
ret.mnt_count = le16_to_cpu(es->s_mnt_count);
ret.max_mnt_count = le16_to_cpu(es->s_max_mnt_count);
ret.checkinterval = le32_to_cpu(es->s_checkinterval);
ret.last_check_time = le32_to_cpu(es->s_lastcheck);
ret.reserved_blocks = ext4_r_blocks_count(es);
ret.blocks_count = ext4_blocks_count(es);
ret.reserved_uid = ext4_get_resuid(es);
ret.reserved_gid = ext4_get_resgid(es);
ret.default_mnt_opts = le32_to_cpu(es->s_default_mount_opts);
ret.def_hash_alg = es->s_def_hash_version;
ret.raid_stride = le16_to_cpu(es->s_raid_stride);
ret.raid_stripe_width = le32_to_cpu(es->s_raid_stripe_width);
ret.encoding = le16_to_cpu(es->s_encoding);
ret.encoding_flags = le16_to_cpu(es->s_encoding_flags);
strscpy_pad(ret.mount_opts, es->s_mount_opts);
ret.feature_compat = le32_to_cpu(es->s_feature_compat);
ret.feature_incompat = le32_to_cpu(es->s_feature_incompat);
ret.feature_ro_compat = le32_to_cpu(es->s_feature_ro_compat);
ret.set_feature_compat_mask = EXT4_TUNE_SET_COMPAT_SUPP;
ret.set_feature_incompat_mask = EXT4_TUNE_SET_INCOMPAT_SUPP;
ret.set_feature_ro_compat_mask = EXT4_TUNE_SET_RO_COMPAT_SUPP;
ret.clear_feature_compat_mask = EXT4_TUNE_CLEAR_COMPAT_SUPP;
ret.clear_feature_incompat_mask = EXT4_TUNE_CLEAR_INCOMPAT_SUPP;
ret.clear_feature_ro_compat_mask = EXT4_TUNE_CLEAR_RO_COMPAT_SUPP;
if (copy_to_user(params, &ret, sizeof(ret)))
return -EFAULT;
return 0;
}
static void ext4_sb_setparams(struct ext4_sb_info *sbi,
struct ext4_super_block *es, const void *arg)
{
const struct ext4_tune_sb_params *params = arg;
if (params->set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR)
es->s_errors = cpu_to_le16(params->errors_behavior);
if (params->set_flags & EXT4_TUNE_FL_MNT_COUNT)
es->s_mnt_count = cpu_to_le16(params->mnt_count);
if (params->set_flags & EXT4_TUNE_FL_MAX_MNT_COUNT)
es->s_max_mnt_count = cpu_to_le16(params->max_mnt_count);
if (params->set_flags & EXT4_TUNE_FL_CHECKINTRVAL)
es->s_checkinterval = cpu_to_le32(params->checkinterval);
if (params->set_flags & EXT4_TUNE_FL_LAST_CHECK_TIME)
es->s_lastcheck = cpu_to_le32(params->last_check_time);
if (params->set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) {
ext4_fsblk_t blk = params->reserved_blocks;
es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
}
if (params->set_flags & EXT4_TUNE_FL_RESERVED_UID) {
int uid = params->reserved_uid;
es->s_def_resuid = cpu_to_le16(uid & 0xFFFF);
es->s_def_resuid_hi = cpu_to_le16(uid >> 16);
}
if (params->set_flags & EXT4_TUNE_FL_RESERVED_GID) {
int gid = params->reserved_gid;
es->s_def_resgid = cpu_to_le16(gid & 0xFFFF);
es->s_def_resgid_hi = cpu_to_le16(gid >> 16);
}
if (params->set_flags & EXT4_TUNE_FL_DEFAULT_MNT_OPTS)
es->s_default_mount_opts = cpu_to_le32(params->default_mnt_opts);
if (params->set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
es->s_def_hash_version = params->def_hash_alg;
if (params->set_flags & EXT4_TUNE_FL_RAID_STRIDE)
es->s_raid_stride = cpu_to_le16(params->raid_stride);
if (params->set_flags & EXT4_TUNE_FL_RAID_STRIPE_WIDTH)
es->s_raid_stripe_width =
cpu_to_le32(params->raid_stripe_width);
if (params->set_flags & EXT4_TUNE_FL_ENCODING)
es->s_encoding = cpu_to_le16(params->encoding);
if (params->set_flags & EXT4_TUNE_FL_ENCODING_FLAGS)
es->s_encoding_flags = cpu_to_le16(params->encoding_flags);
strscpy_pad(es->s_mount_opts, params->mount_opts);
if (params->set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
es->s_feature_compat |=
cpu_to_le32(params->set_feature_compat_mask);
es->s_feature_incompat |=
cpu_to_le32(params->set_feature_incompat_mask);
es->s_feature_ro_compat |=
cpu_to_le32(params->set_feature_ro_compat_mask);
es->s_feature_compat &=
~cpu_to_le32(params->clear_feature_compat_mask);
es->s_feature_incompat &=
~cpu_to_le32(params->clear_feature_incompat_mask);
es->s_feature_ro_compat &=
~cpu_to_le32(params->clear_feature_ro_compat_mask);
if (params->set_feature_compat_mask &
EXT4_FEATURE_COMPAT_DIR_INDEX)
es->s_def_hash_version = sbi->s_def_hash_version;
if (params->set_feature_incompat_mask &
EXT4_FEATURE_INCOMPAT_CSUM_SEED)
es->s_checksum_seed = cpu_to_le32(sbi->s_csum_seed);
}
if (params->set_flags & EXT4_TUNE_FL_FORCE_FSCK)
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
}
static int ext4_ioctl_set_tune_sb(struct file *filp,
struct ext4_tune_sb_params __user *in)
{
struct ext4_tune_sb_params params;
struct super_block *sb = file_inode(filp)->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int enabling_casefold = 0;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&params, in, sizeof(params)))
return -EFAULT;
if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0)
return -EOPNOTSUPP;
if ((params.set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR) &&
(params.errors_behavior > EXT4_ERRORS_PANIC))
return -EINVAL;
if ((params.set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) &&
(params.reserved_blocks > ext4_blocks_count(sbi->s_es) / 2))
return -EINVAL;
if ((params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) &&
((params.def_hash_alg > DX_HASH_LAST) ||
(params.def_hash_alg == DX_HASH_SIPHASH)))
return -EINVAL;
if ((params.set_flags & EXT4_TUNE_FL_FEATURES) &&
(params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES))
return -EINVAL;
if (params.set_flags & EXT4_TUNE_FL_FEATURES) {
params.set_feature_compat_mask =
params.feature_compat &
~le32_to_cpu(es->s_feature_compat);
params.set_feature_incompat_mask =
params.feature_incompat &
~le32_to_cpu(es->s_feature_incompat);
params.set_feature_ro_compat_mask =
params.feature_ro_compat &
~le32_to_cpu(es->s_feature_ro_compat);
params.clear_feature_compat_mask =
~params.feature_compat &
le32_to_cpu(es->s_feature_compat);
params.clear_feature_incompat_mask =
~params.feature_incompat &
le32_to_cpu(es->s_feature_incompat);
params.clear_feature_ro_compat_mask =
~params.feature_ro_compat &
le32_to_cpu(es->s_feature_ro_compat);
params.set_flags |= EXT4_TUNE_FL_EDIT_FEATURES;
}
if (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
if ((params.set_feature_compat_mask &
~EXT4_TUNE_SET_COMPAT_SUPP) ||
(params.set_feature_incompat_mask &
~EXT4_TUNE_SET_INCOMPAT_SUPP) ||
(params.set_feature_ro_compat_mask &
~EXT4_TUNE_SET_RO_COMPAT_SUPP) ||
(params.clear_feature_compat_mask &
~EXT4_TUNE_CLEAR_COMPAT_SUPP) ||
(params.clear_feature_incompat_mask &
~EXT4_TUNE_CLEAR_INCOMPAT_SUPP) ||
(params.clear_feature_ro_compat_mask &
~EXT4_TUNE_CLEAR_RO_COMPAT_SUPP))
return -EOPNOTSUPP;
/*
* Filter out the features that are already set from
* the set_mask.
*/
params.set_feature_compat_mask &=
~le32_to_cpu(es->s_feature_compat);
params.set_feature_incompat_mask &=
~le32_to_cpu(es->s_feature_incompat);
params.set_feature_ro_compat_mask &=
~le32_to_cpu(es->s_feature_ro_compat);
if ((params.set_feature_incompat_mask &
EXT4_FEATURE_INCOMPAT_CASEFOLD)) {
enabling_casefold = 1;
if (!(params.set_flags & EXT4_TUNE_FL_ENCODING)) {
params.encoding = EXT4_ENC_UTF8_12_1;
params.set_flags |= EXT4_TUNE_FL_ENCODING;
}
if (!(params.set_flags & EXT4_TUNE_FL_ENCODING_FLAGS)) {
params.encoding_flags = 0;
params.set_flags |= EXT4_TUNE_FL_ENCODING_FLAGS;
}
}
if ((params.set_feature_compat_mask &
EXT4_FEATURE_COMPAT_DIR_INDEX)) {
uuid_t uu;
memcpy(&uu, sbi->s_hash_seed, UUID_SIZE);
if (uuid_is_null(&uu))
generate_random_uuid((char *)
&sbi->s_hash_seed);
if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
sbi->s_def_hash_version = params.def_hash_alg;
else if (sbi->s_def_hash_version == 0)
sbi->s_def_hash_version = DX_HASH_HALF_MD4;
if (!(es->s_flags &
cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH)) &&
!(es->s_flags &
cpu_to_le32(EXT2_FLAGS_SIGNED_HASH))) {
#ifdef __CHAR_UNSIGNED__
sbi->s_hash_unsigned = 3;
#else
sbi->s_hash_unsigned = 0;
#endif
}
}
}
if (params.set_flags & EXT4_TUNE_FL_ENCODING) {
if (!enabling_casefold)
return -EINVAL;
if (params.encoding == 0)
params.encoding = EXT4_ENC_UTF8_12_1;
else if (params.encoding != EXT4_ENC_UTF8_12_1)
return -EINVAL;
}
if (params.set_flags & EXT4_TUNE_FL_ENCODING_FLAGS) {
if (!enabling_casefold)
return -EINVAL;
if (params.encoding_flags & ~SB_ENC_SUPP_MASK)
return -EINVAL;
}
ret = mnt_want_write_file(filp);
if (ret)
return ret;
ret = ext4_update_superblocks_fn(sb, ext4_sb_setparams, &params);
mnt_drop_write_file(filp);
if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
sbi->s_def_hash_version = params.def_hash_alg;
return ret;
}
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@ -1616,6 +1908,11 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg);
case EXT4_IOC_SETFSUUID:
return ext4_ioctl_setuuid(filp, (const void __user *)arg);
case EXT4_IOC_GET_TUNE_SB_PARAM:
return ext4_ioctl_get_tune_sb(EXT4_SB(sb),
(void __user *)arg);
case EXT4_IOC_SET_TUNE_SB_PARAM:
return ext4_ioctl_set_tune_sb(filp, (void __user *)arg);
default:
return -ENOTTY;
}
@ -1703,7 +2000,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
#endif
static void set_overhead(struct ext4_super_block *es, const void *arg)
static void set_overhead(struct ext4_sb_info *sbi,
struct ext4_super_block *es, const void *arg)
{
es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
}

View File

@ -3655,16 +3655,26 @@ static void ext4_discard_work(struct work_struct *work)
static inline void ext4_mb_avg_fragment_size_destroy(struct ext4_sb_info *sbi)
{
if (!sbi->s_mb_avg_fragment_size)
return;
for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
xa_destroy(&sbi->s_mb_avg_fragment_size[i]);
kfree(sbi->s_mb_avg_fragment_size);
sbi->s_mb_avg_fragment_size = NULL;
}
static inline void ext4_mb_largest_free_orders_destroy(struct ext4_sb_info *sbi)
{
if (!sbi->s_mb_largest_free_orders)
return;
for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
xa_destroy(&sbi->s_mb_largest_free_orders[i]);
kfree(sbi->s_mb_largest_free_orders);
sbi->s_mb_largest_free_orders = NULL;
}
int ext4_mb_init(struct super_block *sb)

View File

@ -231,9 +231,9 @@ static int kmmpd(void *data)
* Adjust the mmp_check_interval depending on how much time
* it took for the MMP block to be written.
*/
mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
EXT4_MMP_MAX_CHECK_INTERVAL),
EXT4_MMP_MIN_CHECK_INTERVAL);
mmp_check_interval = clamp(EXT4_MMP_CHECK_MULT * diff / HZ,
EXT4_MMP_MIN_CHECK_INTERVAL,
EXT4_MMP_MAX_CHECK_INTERVAL);
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
}

View File

@ -225,7 +225,7 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
do {
if (bh_offset(bh) + blocksize <= from)
continue;
if (bh_offset(bh) > to)
if (bh_offset(bh) >= to)
break;
wait_on_buffer(bh);
if (buffer_uptodate(bh))

View File

@ -109,11 +109,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
!inode_is_locked(inode));
/*
* Inode orphaned in orphan file or in orphan list?
*/
if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
!list_empty(&EXT4_I(inode)->i_orphan))
if (ext4_inode_orphan_tracked(inode))
return 0;
/*
@ -587,9 +583,20 @@ int ext4_init_orphan_info(struct super_block *sb)
ext4_msg(sb, KERN_ERR, "get orphan inode failed");
return PTR_ERR(inode);
}
/*
* This is just an artificial limit to prevent corrupted fs from
* consuming absurd amounts of memory when pinning blocks of orphan
* file in memory.
*/
if (inode->i_size > 8 << 20) {
ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
(unsigned long long)inode->i_size);
ret = -EFSCORRUPTED;
goto out_put;
}
oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
oi->of_binfo = kmalloc_array(oi->of_blocks,
oi->of_binfo = kvmalloc_array(oi->of_blocks,
sizeof(struct ext4_orphan_block),
GFP_KERNEL);
if (!oi->of_binfo) {

View File

@ -265,6 +265,15 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
sector_t block)
{
gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
~__GFP_FS) | __GFP_MOVABLE | __GFP_NOFAIL;
return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
{
struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
@ -1438,9 +1447,9 @@ static void ext4_free_in_core_inode(struct inode *inode)
static void ext4_destroy_inode(struct inode *inode)
{
if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
if (ext4_inode_orphan_tracked(inode)) {
ext4_msg(inode->i_sb, KERN_ERR,
"Inode %lu (%p): orphan list check failed!",
"Inode %lu (%p): inode tracked as orphan!",
inode->i_ino, EXT4_I(inode));
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
EXT4_I(inode), sizeof(struct ext4_inode_info),
@ -2466,7 +2475,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
char *s_mount_opts = NULL;
char s_mount_opts[65];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@ -2474,15 +2483,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
sizeof(sbi->s_es->s_mount_opts),
GFP_KERNEL);
if (!s_mount_opts)
return ret;
strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
goto out_free;
return -ENOMEM;
s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
if (!s_ctx)
@ -2514,11 +2519,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
ret = 0;
out_free:
if (fc) {
ext4_fc_free(fc);
kfree(fc);
}
kfree(s_mount_opts);
ext4_fc_free(fc);
kfree(fc);
return ret;
}
@ -2964,11 +2966,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
}
if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
ext4_get_resuid(es) != EXT4_DEF_RESUID)
SEQ_OPTS_PRINT("resuid=%u",
from_kuid_munged(&init_user_ns, sbi->s_resuid));
if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
ext4_get_resgid(es) != EXT4_DEF_RESGID)
SEQ_OPTS_PRINT("resgid=%u",
from_kgid_munged(&init_user_ns, sbi->s_resgid));
def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
@ -5283,8 +5285,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_set_def_opts(sb, es);
sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es));
sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es));
sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;

View File

@ -251,6 +251,10 @@ check_xattrs(struct inode *inode, struct buffer_head *bh,
err_str = "invalid ea_ino";
goto errout;
}
if (ea_ino && !size) {
err_str = "invalid size in ea xattr";
goto errout;
}
if (size > EXT4_XATTR_SIZE_MAX) {
err_str = "e_value size too large";
goto errout;
@ -1019,7 +1023,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
struct ext4_iloc iloc;
s64 ref_count;
u64 ref_count;
int ret;
inode_lock_nested(ea_inode, I_MUTEX_XATTR);
@ -1029,13 +1033,17 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
goto out;
ref_count = ext4_xattr_inode_get_ref(ea_inode);
if ((ref_count == 0 && ref_change < 0) || (ref_count == U64_MAX && ref_change > 0)) {
ext4_error_inode(ea_inode, __func__, __LINE__, 0,
"EA inode %lu ref wraparound: ref_count=%lld ref_change=%d",
ea_inode->i_ino, ref_count, ref_change);
ret = -EFSCORRUPTED;
goto out;
}
ref_count += ref_change;
ext4_xattr_inode_set_ref(ea_inode, ref_count);
if (ref_change > 0) {
WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
ea_inode->i_ino, ref_count);
if (ref_count == 1) {
WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
ea_inode->i_ino, ea_inode->i_nlink);
@ -1044,9 +1052,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
ext4_orphan_del(handle, ea_inode);
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
ea_inode->i_ino, ref_count);
if (ref_count == 0) {
WARN_ONCE(ea_inode->i_nlink != 1,
"EA inode %lu i_nlink=%u",
@ -1530,7 +1535,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) &&
!(current->flags & PF_MEMALLOC_NOFS));
ea_data = kvmalloc(value_len, GFP_KERNEL);
ea_data = kvmalloc(value_len, GFP_NOFS);
if (!ea_data) {
mb_cache_entry_put(ea_inode_cache, ce);
return NULL;

View File

@ -131,7 +131,7 @@ __flush_batch(journal_t *journal, int *batch_count)
blk_start_plug(&plug);
for (i = 0; i < *batch_count; i++)
write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
write_dirty_buffer(journal->j_chkpt_bhs[i], JBD2_JOURNAL_REQ_FLAGS);
blk_finish_plug(&plug);
for (i = 0; i < *batch_count; i++) {

View File

@ -33,6 +33,8 @@
#define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32)
#define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid)
#define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid)
#define EXT4_IOC_GET_TUNE_SB_PARAM _IOR('f', 45, struct ext4_tune_sb_params)
#define EXT4_IOC_SET_TUNE_SB_PARAM _IOW('f', 46, struct ext4_tune_sb_params)
#define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32)
@ -108,6 +110,57 @@ struct ext4_new_group_input {
__u16 unused;
};
struct ext4_tune_sb_params {
__u32 set_flags;
__u32 checkinterval;
__u16 errors_behavior;
__u16 mnt_count;
__u16 max_mnt_count;
__u16 raid_stride;
__u64 last_check_time;
__u64 reserved_blocks;
__u64 blocks_count;
__u32 default_mnt_opts;
__u32 reserved_uid;
__u32 reserved_gid;
__u32 raid_stripe_width;
__u16 encoding;
__u16 encoding_flags;
__u8 def_hash_alg;
__u8 pad_1;
__u16 pad_2;
__u32 feature_compat;
__u32 feature_incompat;
__u32 feature_ro_compat;
__u32 set_feature_compat_mask;
__u32 set_feature_incompat_mask;
__u32 set_feature_ro_compat_mask;
__u32 clear_feature_compat_mask;
__u32 clear_feature_incompat_mask;
__u32 clear_feature_ro_compat_mask;
__u8 mount_opts[64];
__u8 pad[64];
};
#define EXT4_TUNE_FL_ERRORS_BEHAVIOR 0x00000001
#define EXT4_TUNE_FL_MNT_COUNT 0x00000002
#define EXT4_TUNE_FL_MAX_MNT_COUNT 0x00000004
#define EXT4_TUNE_FL_CHECKINTRVAL 0x00000008
#define EXT4_TUNE_FL_LAST_CHECK_TIME 0x00000010
#define EXT4_TUNE_FL_RESERVED_BLOCKS 0x00000020
#define EXT4_TUNE_FL_RESERVED_UID 0x00000040
#define EXT4_TUNE_FL_RESERVED_GID 0x00000080
#define EXT4_TUNE_FL_DEFAULT_MNT_OPTS 0x00000100
#define EXT4_TUNE_FL_DEF_HASH_ALG 0x00000200
#define EXT4_TUNE_FL_RAID_STRIDE 0x00000400
#define EXT4_TUNE_FL_RAID_STRIPE_WIDTH 0x00000800
#define EXT4_TUNE_FL_MOUNT_OPTS 0x00001000
#define EXT4_TUNE_FL_FEATURES 0x00002000
#define EXT4_TUNE_FL_EDIT_FEATURES 0x00004000
#define EXT4_TUNE_FL_FORCE_FSCK 0x00008000
#define EXT4_TUNE_FL_ENCODING 0x00010000
#define EXT4_TUNE_FL_ENCODING_FLAGS 0x00020000
/*
* Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
* It indicates that the entry in extent status cache is for a hole.