block: make bio auto-integrity deadlock safe

The current block layer automatic integrity protection allocates the
actual integrity buffer, which has three problems:

 - because it happens at the bottom of the I/O stack and doesn't use a
   mempool it can deadlock under load
 - because the data size in a bio is almost unbounded when using lage
   folios it can relatively easily exceed the maximum kmalloc size
 - even when it does not exceed the maximum kmalloc size, it could
   exceed the maximum segment size of the device

Fix this by limiting the I/O size so that we can allocate at least a
2MiB integrity buffer, i.e. 128MiB for 8 byte PI and 512 byte integrity
intervals, and create a mempool as a last resort for this maximum size,
mirroring the scheme used for bvecs.  As a nice upside none of this
can fail now, so we remove the error handling and open code the
trivial addition of the bip vec.

The new allocation helpers sit outside of bio-integrity-auto.c because
I plan to reuse them for file system based PI in the near future.

Fixes: 7ba1ba12ee ("block: Block layer data integrity support")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2025-11-03 05:16:45 -05:00 committed by Jens Axboe
parent eef09f742b
commit ec7f31b2a2
5 changed files with 83 additions and 19 deletions

View File

@ -29,7 +29,7 @@ static void bio_integrity_finish(struct bio_integrity_data *bid)
{ {
bid->bio->bi_integrity = NULL; bid->bio->bi_integrity = NULL;
bid->bio->bi_opf &= ~REQ_INTEGRITY; bid->bio->bi_opf &= ~REQ_INTEGRITY;
kfree(bvec_virt(bid->bip.bip_vec)); bio_integrity_free_buf(&bid->bip);
mempool_free(bid, &bid_pool); mempool_free(bid, &bid_pool);
} }
@ -110,8 +110,6 @@ bool bio_integrity_prep(struct bio *bio)
struct bio_integrity_data *bid; struct bio_integrity_data *bid;
bool set_flags = true; bool set_flags = true;
gfp_t gfp = GFP_NOIO; gfp_t gfp = GFP_NOIO;
unsigned int len;
void *buf;
if (!bi) if (!bi)
return true; return true;
@ -152,17 +150,12 @@ bool bio_integrity_prep(struct bio *bio)
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
return true; return true;
/* Allocate kernel buffer for protection data */
len = bio_integrity_bytes(bi, bio_sectors(bio));
buf = kmalloc(len, gfp);
if (!buf)
goto err_end_io;
bid = mempool_alloc(&bid_pool, GFP_NOIO); bid = mempool_alloc(&bid_pool, GFP_NOIO);
bio_integrity_init(bio, &bid->bip, &bid->bvec, 1); bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
bid->bio = bio; bid->bio = bio;
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
if (set_flags) { if (set_flags) {
@ -174,21 +167,12 @@ bool bio_integrity_prep(struct bio *bio)
bid->bip.bip_flags |= BIP_CHECK_REFTAG; bid->bip.bip_flags |= BIP_CHECK_REFTAG;
} }
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf)) < len)
goto err_end_io;
/* Auto-generate integrity metadata if this is a write */ /* Auto-generate integrity metadata if this is a write */
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
blk_integrity_generate(bio); blk_integrity_generate(bio);
else else
bid->saved_bio_iter = bio->bi_iter; bid->saved_bio_iter = bio->bi_iter;
return true; return true;
err_end_io:
bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return false;
} }
EXPORT_SYMBOL(bio_integrity_prep); EXPORT_SYMBOL(bio_integrity_prep);

View File

@ -14,6 +14,45 @@ struct bio_integrity_alloc {
struct bio_vec bvecs[]; struct bio_vec bvecs[];
}; };
static mempool_t integrity_buf_pool;
void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio));
gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0);
void *buf;
buf = kmalloc(len, (gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN);
if (unlikely(!buf)) {
struct page *page;
page = mempool_alloc(&integrity_buf_pool, GFP_NOFS);
if (zero_buffer)
memset(page_address(page), 0, len);
bvec_set_page(&bip->bip_vec[0], page, len, 0);
bip->bip_flags |= BIP_MEMPOOL;
} else {
bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len,
offset_in_page(buf));
}
bip->bip_vcnt = 1;
bip->bip_iter.bi_size = len;
}
void bio_integrity_free_buf(struct bio_integrity_payload *bip)
{
struct bio_vec *bv = &bip->bip_vec[0];
if (bip->bip_flags & BIP_MEMPOOL)
mempool_free(bv->bv_page, &integrity_buf_pool);
else
kfree(bvec_virt(bv));
}
/** /**
* bio_integrity_free - Free bio integrity payload * bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed * @bio: bio containing bip to be freed
@ -438,3 +477,12 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0; return 0;
} }
static int __init bio_integrity_initfn(void)
{
if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE,
get_order(BLK_INTEGRITY_MAX_SIZE)))
panic("bio: can't create integrity buf pool\n");
return 0;
}
subsys_initcall(bio_integrity_initfn);

View File

@ -123,6 +123,19 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
return 0; return 0;
} }
/*
* Maximum size of I/O that needs a block layer integrity buffer. Limited
* by the number of intervals for which we can fit the integrity buffer into
* the buffer size. Because the buffer is a single segment it is also limited
* by the maximum segment size.
*/
static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
{
return min_t(unsigned int, lim->max_segment_size,
(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
lim->integrity.interval_exp);
}
static int blk_validate_integrity_limits(struct queue_limits *lim) static int blk_validate_integrity_limits(struct queue_limits *lim)
{ {
struct blk_integrity *bi = &lim->integrity; struct blk_integrity *bi = &lim->integrity;
@ -184,6 +197,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
if (!bi->interval_exp) if (!bi->interval_exp)
bi->interval_exp = ilog2(lim->logical_block_size); bi->interval_exp = ilog2(lim->logical_block_size);
/*
* The block layer automatically adds integrity data for bios that don't
* already have it. Limit the I/O size so that a single maximum size
* metadata segment can cover the integrity data for the entire I/O.
*/
lim->max_sectors = min(lim->max_sectors,
max_integrity_io_size(lim) >> SECTOR_SHIFT);
return 0; return 0;
} }

View File

@ -14,6 +14,8 @@ enum bip_flags {
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
BIP_P2P_DMA = 1 << 8, /* using P2P address */ BIP_P2P_DMA = 1 << 8, /* using P2P address */
BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */
}; };
struct bio_integrity_payload { struct bio_integrity_payload {
@ -140,4 +142,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0; return 0;
} }
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */
void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
void bio_integrity_free_buf(struct bio_integrity_payload *bip);
#endif /* _LINUX_BIO_INTEGRITY_H */ #endif /* _LINUX_BIO_INTEGRITY_H */

View File

@ -8,6 +8,11 @@
struct request; struct request;
/*
* Maximum contiguous integrity buffer allocation.
*/
#define BLK_INTEGRITY_MAX_SIZE SZ_2M
enum blk_integrity_flags { enum blk_integrity_flags {
BLK_INTEGRITY_NOVERIFY = 1 << 0, BLK_INTEGRITY_NOVERIFY = 1 << 0,
BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_NOGENERATE = 1 << 1,