blk-mq: fix elevator depth_updated method

Current depth_updated has some problems:

1) depth_updated() will be called for each hctx, while all elevators
will update async_depth for the disk level, this is not related to hctx;
2) In blk_mq_update_nr_requests(), if previous hctx update succeed and
this hctx update failed, q->nr_requests will not be updated, while
async_depth is already updated with new nr_reqeuests in previous
depth_updated();
3) All elevators are using q->nr_requests to calculate async_depth now,
however, q->nr_requests is still the old value when depth_updated() is
called from blk_mq_update_nr_requests();

Those problems are first from error path, then mq-deadline, and recently
for bfq and kyber, fix those problems by:

- pass in request_queue instead of hctx;
- move depth_updated() after q->nr_requests is updated in
  blk_mq_update_nr_requests();
- add depth_updated() call inside init_sched() method to initialize
  async_depth;
- remove init_hctx() method for mq-deadline and bfq that is useless now;

Fixes: 77f1e0a52d ("bfq: update internal depth state when queue depth changes")
Fixes: 39823b47bb ("block/mq-deadline: Fix the tag reservation code")
Fixes: 42e6c6ce03 ("lib/sbitmap: convert shallow_depth from one word to the whole sbitmap")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Li Nan <linan122@huawei.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Link: https://lore.kernel.org/r/20250821060612.1729939-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Yu Kuai 2025-08-21 14:06:11 +08:00 committed by Jens Axboe
parent 225dc96f35
commit 7d337eef4a
6 changed files with 42 additions and 53 deletions

View File

@ -7109,9 +7109,10 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
* See the comments on bfq_limit_depth for the purpose of
* the depths set in the function. Return minimum shallow depth we'll use.
*/
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
static void bfq_depth_updated(struct request_queue *q)
{
unsigned int nr_requests = bfqd->queue->nr_requests;
struct bfq_data *bfqd = q->elevator->elevator_data;
unsigned int nr_requests = q->nr_requests;
/*
* In-word depths if no bfq_queue is being weight-raised:
@ -7143,21 +7144,8 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U);
/* no more than ~37% of tags for sync writes (~20% extra tags) */
bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U);
}
static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
bfq_update_depths(bfqd, &tags->bitmap_tags);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
}
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
{
bfq_depth_updated(hctx);
return 0;
blk_mq_set_min_shallow_depth(q, 1);
}
static void bfq_exit_queue(struct elevator_queue *e)
@ -7369,6 +7357,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_queue *eq)
goto out_free;
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
bfq_depth_updated(q);
/* We dispatch from request queue wide instead of hw queue */
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
@ -7628,7 +7617,6 @@ static struct elevator_type iosched_bfq_mq = {
.request_merged = bfq_request_merged,
.has_work = bfq_has_work,
.depth_updated = bfq_depth_updated,
.init_hctx = bfq_init_hctx,
.init_sched = bfq_init_queue,
.exit_sched = bfq_exit_queue,
},

View File

@ -92,4 +92,15 @@ static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static inline void blk_mq_set_min_shallow_depth(struct request_queue *q,
unsigned int depth)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
depth);
}
#endif

View File

@ -4951,20 +4951,21 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
false);
}
if (ret)
break;
if (q->elevator && q->elevator->type->ops.depth_updated)
q->elevator->type->ops.depth_updated(hctx);
goto out;
}
if (!ret) {
q->nr_requests = nr;
if (q->elevator && q->elevator->type->ops.depth_updated)
q->elevator->type->ops.depth_updated(q);
if (blk_mq_is_shared_tags(set->flags)) {
if (q->elevator)
blk_mq_tag_update_sched_shared_tags(q);
else
blk_mq_tag_resize_shared_tags(set, nr);
}
}
out:
blk_mq_unquiesce_queue(q);
return ret;

View File

@ -37,7 +37,7 @@ struct elevator_mq_ops {
void (*exit_sched)(struct elevator_queue *);
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*depth_updated)(struct blk_mq_hw_ctx *);
void (*depth_updated)(struct request_queue *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);

View File

@ -399,6 +399,14 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
return ERR_PTR(ret);
}
static void kyber_depth_updated(struct request_queue *q)
{
struct kyber_queue_data *kqd = q->elevator->elevator_data;
kqd->async_depth = q->nr_requests * KYBER_ASYNC_PERCENT / 100U;
blk_mq_set_min_shallow_depth(q, kqd->async_depth);
}
static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq)
{
struct kyber_queue_data *kqd;
@ -413,6 +421,7 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq)
eq->elevator_data = kqd;
q->elevator = eq;
kyber_depth_updated(q);
return 0;
}
@ -440,15 +449,6 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
INIT_LIST_HEAD(&kcq->rq_list[i]);
}
static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
{
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U;
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
}
static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
struct kyber_hctx_data *khd;
@ -493,7 +493,6 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
khd->batching = 0;
hctx->sched_data = khd;
kyber_depth_updated(hctx);
return 0;

View File

@ -507,22 +507,12 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
}
/* Called by blk_mq_update_nr_requests(). */
static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
static void dd_depth_updated(struct request_queue *q)
{
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
dd->async_depth = q->nr_requests;
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
}
/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
dd_depth_updated(hctx);
return 0;
blk_mq_set_min_shallow_depth(q, 1);
}
static void dd_exit_sched(struct elevator_queue *e)
@ -587,6 +577,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = eq;
dd_depth_updated(q);
return 0;
}
@ -1048,7 +1039,6 @@ static struct elevator_type mq_deadline = {
.has_work = dd_has_work,
.init_sched = dd_init_sched,
.exit_sched = dd_exit_sched,
.init_hctx = dd_init_hctx,
},
#ifdef CONFIG_BLK_DEBUG_FS