mirror of https://github.com/torvalds/linux.git
netfs: fix reference leak
Commit20d72b00ca("netfs: Fix the request's work item to not require a ref") modified netfs_alloc_request() to initialize the reference counter to 2 instead of 1. The rationale was that the requet's "work" would release the second reference after completion (via netfs_{read,write}_collection_worker()). That works most of the time if all goes well. However, it leaks this additional reference if the request is released before the I/O operation has been submitted: the error code path only decrements the reference counter once and the work item will never be queued because there will never be a completion. This has caused outages of our whole server cluster today because tasks were blocked in netfs_wait_for_outstanding_io(), leading to deadlocks in Ceph (another bug that I will address soon in another patch). This was caused by a netfs_pgpriv2_begin_copy_to_cache() call which failed in fscache_begin_write_operation(). The leaked netfs_io_request was never completed, leaving `netfs_inode.io_count` with a positive value forever. All of this is super-fragile code. Finding out which code paths will lead to an eventual completion and which do not is hard to see: - Some functions like netfs_create_write_req() allocate a request, but will never submit any I/O. - netfs_unbuffered_read_iter_locked() calls netfs_unbuffered_read() and then netfs_put_request(); however, netfs_unbuffered_read() can also fail early before submitting the I/O request, therefore another netfs_put_request() call must be added there. A rule of thumb is that functions that return a `netfs_io_request` do not submit I/O, and all of their callers must be checked. For my taste, the whole netfs code needs an overhaul to make reference counting easier to understand and less fragile & obscure. But to fix this bug here and now and produce a patch that is adequate for a stable backport, I tried a minimal approach that quickly frees the request object upon early failure. I decided against adding a second netfs_put_request() each time because that would cause code duplication which obscures the code further. Instead, I added the function netfs_put_failed_request() which frees such a failed request synchronously under the assumption that the reference count is exactly 2 (as initially set by netfs_alloc_request() and never touched), verified by a WARN_ON_ONCE(). It then deinitializes the request object (without going through the "cleanup_work" indirection) and frees the allocation (with RCU protection to protect against concurrent access by netfs_requests_seq_start()). All code paths that fail early have been changed to call netfs_put_failed_request() instead of netfs_put_request(). Additionally, I have added a netfs_put_request() call to netfs_unbuffered_read() as explained above because the netfs_put_failed_request() approach does not work there. Fixes:20d72b00ca("netfs: Fix the request's work item to not require a ref") Signed-off-by: Max Kellermann <max.kellermann@ionos.com> Signed-off-by: David Howells <dhowells@redhat.com> cc: Paulo Alcantara <pc@manguebit.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
parent
9158c6bb24
commit
4d428dca25
|
|
@ -369,7 +369,7 @@ void netfs_readahead(struct readahead_control *ractl)
|
||||||
return netfs_put_request(rreq, netfs_rreq_trace_put_return);
|
return netfs_put_request(rreq, netfs_rreq_trace_put_return);
|
||||||
|
|
||||||
cleanup_free:
|
cleanup_free:
|
||||||
return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
|
return netfs_put_failed_request(rreq);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(netfs_readahead);
|
EXPORT_SYMBOL(netfs_readahead);
|
||||||
|
|
||||||
|
|
@ -472,7 +472,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
|
||||||
return ret < 0 ? ret : 0;
|
return ret < 0 ? ret : 0;
|
||||||
|
|
||||||
discard:
|
discard:
|
||||||
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
|
netfs_put_failed_request(rreq);
|
||||||
alloc_error:
|
alloc_error:
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -532,7 +532,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
|
||||||
return ret < 0 ? ret : 0;
|
return ret < 0 ? ret : 0;
|
||||||
|
|
||||||
discard:
|
discard:
|
||||||
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
|
netfs_put_failed_request(rreq);
|
||||||
alloc_error:
|
alloc_error:
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -699,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error_put:
|
error_put:
|
||||||
netfs_put_request(rreq, netfs_rreq_trace_put_failed);
|
netfs_put_failed_request(rreq);
|
||||||
error:
|
error:
|
||||||
if (folio) {
|
if (folio) {
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
|
|
@ -754,7 +754,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
||||||
return ret < 0 ? ret : 0;
|
return ret < 0 ? ret : 0;
|
||||||
|
|
||||||
error_put:
|
error_put:
|
||||||
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
|
netfs_put_failed_request(rreq);
|
||||||
error:
|
error:
|
||||||
_leave(" = %d", ret);
|
_leave(" = %d", ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
|
||||||
|
|
||||||
if (rreq->len == 0) {
|
if (rreq->len == 0) {
|
||||||
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
|
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
|
||||||
|
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -205,7 +206,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
|
||||||
if (user_backed_iter(iter)) {
|
if (user_backed_iter(iter)) {
|
||||||
ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
|
ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto error_put;
|
||||||
rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
|
rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
|
||||||
rreq->direct_bv_count = ret;
|
rreq->direct_bv_count = ret;
|
||||||
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
|
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
|
||||||
|
|
@ -238,6 +239,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
orig_count -= ret;
|
orig_count -= ret;
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
error_put:
|
||||||
|
netfs_put_failed_request(rreq);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
|
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
|
||||||
n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
|
n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
ret = n;
|
ret = n;
|
||||||
goto out;
|
goto error_put;
|
||||||
}
|
}
|
||||||
wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
|
wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
|
||||||
wreq->direct_bv_count = n;
|
wreq->direct_bv_count = n;
|
||||||
|
|
@ -101,6 +101,10 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
|
||||||
out:
|
out:
|
||||||
netfs_put_request(wreq, netfs_rreq_trace_put_return);
|
netfs_put_request(wreq, netfs_rreq_trace_put_return);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
error_put:
|
||||||
|
netfs_put_failed_request(wreq);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
|
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
||||||
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
|
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
|
||||||
void netfs_clear_subrequests(struct netfs_io_request *rreq);
|
void netfs_clear_subrequests(struct netfs_io_request *rreq);
|
||||||
void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
|
void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
|
||||||
|
void netfs_put_failed_request(struct netfs_io_request *rreq);
|
||||||
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
|
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
|
||||||
|
|
||||||
static inline void netfs_see_request(struct netfs_io_request *rreq,
|
static inline void netfs_see_request(struct netfs_io_request *rreq,
|
||||||
|
|
|
||||||
|
|
@ -116,10 +116,8 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
|
||||||
netfs_stat_d(&netfs_n_rh_rreq);
|
netfs_stat_d(&netfs_n_rh_rreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void netfs_free_request(struct work_struct *work)
|
static void netfs_deinit_request(struct netfs_io_request *rreq)
|
||||||
{
|
{
|
||||||
struct netfs_io_request *rreq =
|
|
||||||
container_of(work, struct netfs_io_request, cleanup_work);
|
|
||||||
struct netfs_inode *ictx = netfs_inode(rreq->inode);
|
struct netfs_inode *ictx = netfs_inode(rreq->inode);
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
|
|
@ -149,6 +147,14 @@ static void netfs_free_request(struct work_struct *work)
|
||||||
|
|
||||||
if (atomic_dec_and_test(&ictx->io_count))
|
if (atomic_dec_and_test(&ictx->io_count))
|
||||||
wake_up_var(&ictx->io_count);
|
wake_up_var(&ictx->io_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void netfs_free_request(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct netfs_io_request *rreq =
|
||||||
|
container_of(work, struct netfs_io_request, cleanup_work);
|
||||||
|
|
||||||
|
netfs_deinit_request(rreq);
|
||||||
call_rcu(&rreq->rcu, netfs_free_request_rcu);
|
call_rcu(&rreq->rcu, netfs_free_request_rcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -167,6 +173,24 @@ void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free a request (synchronously) that was just allocated but has
|
||||||
|
* failed before it could be submitted.
|
||||||
|
*/
|
||||||
|
void netfs_put_failed_request(struct netfs_io_request *rreq)
|
||||||
|
{
|
||||||
|
int r = refcount_read(&rreq->ref);
|
||||||
|
|
||||||
|
/* new requests have two references (see
|
||||||
|
* netfs_alloc_request(), and this function is only allowed on
|
||||||
|
* new request objects
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(r != 2);
|
||||||
|
|
||||||
|
trace_netfs_rreq_ref(rreq->debug_id, r, netfs_rreq_trace_put_failed);
|
||||||
|
netfs_free_request(&rreq->cleanup_work);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and partially initialise an I/O request structure.
|
* Allocate and partially initialise an I/O request structure.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -118,7 +118,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
|
||||||
return creq;
|
return creq;
|
||||||
|
|
||||||
cancel_put:
|
cancel_put:
|
||||||
netfs_put_request(creq, netfs_rreq_trace_put_return);
|
netfs_put_failed_request(creq);
|
||||||
cancel:
|
cancel:
|
||||||
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
|
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
|
||||||
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
|
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
|
||||||
|
|
|
||||||
|
|
@ -189,7 +189,7 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
cleanup_free:
|
cleanup_free:
|
||||||
netfs_put_request(rreq, netfs_rreq_trace_put_failed);
|
netfs_put_failed_request(rreq);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(netfs_read_single);
|
EXPORT_SYMBOL(netfs_read_single);
|
||||||
|
|
|
||||||
|
|
@ -133,8 +133,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
||||||
|
|
||||||
return wreq;
|
return wreq;
|
||||||
nomem:
|
nomem:
|
||||||
wreq->error = -ENOMEM;
|
netfs_put_failed_request(wreq);
|
||||||
netfs_put_request(wreq, netfs_rreq_trace_put_failed);
|
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue