ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag

The Coverity Scan service has detected potential
race conditions in ceph_block_o_direct(), ceph_start_io_read(),
ceph_block_buffered(), and ceph_start_io_direct() [1 - 4].

The CID 1590942, 1590665, 1589664, 1590377 contain explanation:
"The value of the shared data will be determined by
the interleaving of thread execution. Thread shared data is accessed
without holding an appropriate lock, possibly causing
a race condition (CWE-366)".

This patch reworks the pattern of accessing/modification of
CEPH_I_ODIRECT flag by means of adding smp_mb__before_atomic()
before reading the status of CEPH_I_ODIRECT flag and
smp_mb__after_atomic() after clearing set/clear this flag.
Also, it was reworked the pattern of using of ci->i_ceph_lock
in ceph_block_o_direct(), ceph_start_io_read(),
ceph_block_buffered(), and ceph_start_io_direct() methods.

[1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590942
[2] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590665
[3] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1589664
[4] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590377

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Viacheslav Dubeyko 2025-07-16 11:40:49 -07:00 committed by Ilya Dryomov
parent 53db6f25ee
commit fbeafe782b
2 changed files with 44 additions and 12 deletions

View File

@ -21,14 +21,23 @@
/* Call with exclusively locked inode->i_rwsem */ /* Call with exclusively locked inode->i_rwsem */
static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
{ {
bool is_odirect;
lockdep_assert_held_write(&inode->i_rwsem); lockdep_assert_held_write(&inode->i_rwsem);
if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) { spin_lock(&ci->i_ceph_lock);
spin_lock(&ci->i_ceph_lock); /* ensure that bit state is consistent */
ci->i_ceph_flags &= ~CEPH_I_ODIRECT; smp_mb__before_atomic();
spin_unlock(&ci->i_ceph_lock); is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
inode_dio_wait(inode); if (is_odirect) {
clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
/* ensure modified bit is visible */
smp_mb__after_atomic();
} }
spin_unlock(&ci->i_ceph_lock);
if (is_odirect)
inode_dio_wait(inode);
} }
/** /**
@ -50,6 +59,7 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
int ceph_start_io_read(struct inode *inode) int ceph_start_io_read(struct inode *inode)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
bool is_odirect;
int err; int err;
/* Be an optimist! */ /* Be an optimist! */
@ -57,7 +67,12 @@ int ceph_start_io_read(struct inode *inode)
if (err) if (err)
return err; return err;
if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) spin_lock(&ci->i_ceph_lock);
/* ensure that bit state is consistent */
smp_mb__before_atomic();
is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
spin_unlock(&ci->i_ceph_lock);
if (!is_odirect)
return 0; return 0;
up_read(&inode->i_rwsem); up_read(&inode->i_rwsem);
@ -116,12 +131,22 @@ ceph_end_io_write(struct inode *inode)
/* Call with exclusively locked inode->i_rwsem */ /* Call with exclusively locked inode->i_rwsem */
static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
{ {
bool is_odirect;
lockdep_assert_held_write(&inode->i_rwsem); lockdep_assert_held_write(&inode->i_rwsem);
if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) { spin_lock(&ci->i_ceph_lock);
spin_lock(&ci->i_ceph_lock); /* ensure that bit state is consistent */
ci->i_ceph_flags |= CEPH_I_ODIRECT; smp_mb__before_atomic();
spin_unlock(&ci->i_ceph_lock); is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
if (!is_odirect) {
set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
/* ensure modified bit is visible */
smp_mb__after_atomic();
}
spin_unlock(&ci->i_ceph_lock);
if (!is_odirect) {
/* FIXME: unmap_mapping_range? */ /* FIXME: unmap_mapping_range? */
filemap_write_and_wait(inode->i_mapping); filemap_write_and_wait(inode->i_mapping);
} }
@ -146,6 +171,7 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
int ceph_start_io_direct(struct inode *inode) int ceph_start_io_direct(struct inode *inode)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
bool is_odirect;
int err; int err;
/* Be an optimist! */ /* Be an optimist! */
@ -153,7 +179,12 @@ int ceph_start_io_direct(struct inode *inode)
if (err) if (err)
return err; return err;
if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) spin_lock(&ci->i_ceph_lock);
/* ensure that bit state is consistent */
smp_mb__before_atomic();
is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
spin_unlock(&ci->i_ceph_lock);
if (is_odirect)
return 0; return 0;
up_read(&inode->i_rwsem); up_read(&inode->i_rwsem);

View File

@ -638,7 +638,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ #define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */
#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ #define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */
#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ #define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */
#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ #define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */
#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT)
#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */
#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) #define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT)
#define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */ #define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */