From c6603b1d6556cc02d0169f74508ab0e3e3e4bd76 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:45 +0530 Subject: [PATCH 1/6] block: rename tuple_size field in blk_integrity to metadata_size The tuple_size field in blk_integrity currently represents the total size of metadata associated with each data interval. To make the meaning more explicit, rename tuple_size to metadata_size. This is a purely mechanical rename with no functional changes. Suggested-by: Martin K. Petersen Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/20250630090548.3317-2-anuj20.g@samsung.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Christian Brauner --- block/bio-integrity-auto.c | 4 ++-- block/blk-integrity.c | 2 +- block/blk-settings.c | 6 +++--- block/t10-pi.c | 16 ++++++++-------- drivers/md/dm-crypt.c | 4 ++-- drivers/md/dm-integrity.c | 12 ++++++------ drivers/nvdimm/btt.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/scsi/sd_dif.c | 2 +- include/linux/blk-integrity.h | 4 ++-- include/linux/blkdev.h | 2 +- 12 files changed, 29 insertions(+), 29 deletions(-) diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c index 9c6657664792..687952f63bbb 100644 --- a/block/bio-integrity-auto.c +++ b/block/bio-integrity-auto.c @@ -54,10 +54,10 @@ static bool bi_offload_capable(struct blk_integrity *bi) { switch (bi->csum_type) { case BLK_INTEGRITY_CSUM_CRC64: - return bi->tuple_size == sizeof(struct crc64_pi_tuple); + return bi->metadata_size == sizeof(struct crc64_pi_tuple); case BLK_INTEGRITY_CSUM_CRC: case BLK_INTEGRITY_CSUM_IP: - return bi->tuple_size == sizeof(struct t10_pi_tuple); + return bi->metadata_size == sizeof(struct t10_pi_tuple); default: pr_warn_once("%s: unknown integrity checksum type:%d\n", __func__, bi->csum_type); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index e4e2567061f9..c1102bf4cd8d 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -239,7 +239,7 @@ static ssize_t format_show(struct device *dev, struct device_attribute *attr, { struct blk_integrity *bi = dev_to_bi(dev); - if (!bi->tuple_size) + if (!bi->metadata_size) return sysfs_emit(page, "none\n"); return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi)); } diff --git a/block/blk-settings.c b/block/blk-settings.c index a000daafbfb4..787500ff00c3 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -114,7 +114,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) { struct blk_integrity *bi = &lim->integrity; - if (!bi->tuple_size) { + if (!bi->metadata_size) { if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE || bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) { pr_warn("invalid PI settings.\n"); @@ -875,7 +875,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t, return true; if (ti->flags & BLK_INTEGRITY_STACKED) { - if (ti->tuple_size != bi->tuple_size) + if (ti->metadata_size != bi->metadata_size) goto incompatible; if (ti->interval_exp != bi->interval_exp) goto incompatible; @@ -891,7 +891,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t, ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | (bi->flags & BLK_INTEGRITY_REF_TAG); ti->csum_type = bi->csum_type; - ti->tuple_size = bi->tuple_size; + ti->metadata_size = bi->metadata_size; ti->pi_offset = bi->pi_offset; ti->interval_exp = bi->interval_exp; ti->tag_size = bi->tag_size; diff --git a/block/t10-pi.c b/block/t10-pi.c index 851db518ee5e..0c4ed9702146 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -56,7 +56,7 @@ static void t10_pi_generate(struct blk_integrity_iter *iter, pi->ref_tag = 0; iter->data_buf += iter->interval; - iter->prot_buf += bi->tuple_size; + iter->prot_buf += bi->metadata_size; iter->seed++; } } @@ -105,7 +105,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += bi->tuple_size; + iter->prot_buf += bi->metadata_size; iter->seed++; } @@ -125,7 +125,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, static void t10_pi_type1_prepare(struct request *rq) { struct blk_integrity *bi = &rq->q->limits.integrity; - const int tuple_sz = bi->tuple_size; + const int tuple_sz = bi->metadata_size; u32 ref_tag = t10_pi_ref_tag(rq); u8 offset = bi->pi_offset; struct bio *bio; @@ -177,7 +177,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) { struct blk_integrity *bi = &rq->q->limits.integrity; unsigned intervals = nr_bytes >> bi->interval_exp; - const int tuple_sz = bi->tuple_size; + const int tuple_sz = bi->metadata_size; u32 ref_tag = t10_pi_ref_tag(rq); u8 offset = bi->pi_offset; struct bio *bio; @@ -234,7 +234,7 @@ static void ext_pi_crc64_generate(struct blk_integrity_iter *iter, put_unaligned_be48(0ULL, pi->ref_tag); iter->data_buf += iter->interval; - iter->prot_buf += bi->tuple_size; + iter->prot_buf += bi->metadata_size; iter->seed++; } } @@ -289,7 +289,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += bi->tuple_size; + iter->prot_buf += bi->metadata_size; iter->seed++; } @@ -299,7 +299,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, static void ext_pi_type1_prepare(struct request *rq) { struct blk_integrity *bi = &rq->q->limits.integrity; - const int tuple_sz = bi->tuple_size; + const int tuple_sz = bi->metadata_size; u64 ref_tag = ext_pi_ref_tag(rq); u8 offset = bi->pi_offset; struct bio *bio; @@ -340,7 +340,7 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) { struct blk_integrity *bi = &rq->q->limits.integrity; unsigned intervals = nr_bytes >> bi->interval_exp; - const int tuple_sz = bi->tuple_size; + const int tuple_sz = bi->metadata_size; u64 ref_tag = ext_pi_ref_tag(rq); u8 offset = bi->pi_offset; struct bio *bio; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9dfdb63220d7..3d6d06b94c9f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1189,11 +1189,11 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) return -EINVAL; } - if (bi->tuple_size < cc->used_tag_size) { + if (bi->metadata_size < cc->used_tag_size) { ti->error = "Integrity profile tag size mismatch."; return -EINVAL; } - cc->tuple_size = bi->tuple_size; + cc->tuple_size = bi->metadata_size; if (1 << bi->interval_exp != cc->sector_size) { ti->error = "Integrity profile sector size mismatch."; return -EINVAL; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 4395657fa583..efeee0a873c0 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3906,8 +3906,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim struct blk_integrity *bi = &limits->integrity; memset(bi, 0, sizeof(*bi)); - bi->tuple_size = ic->tag_size; - bi->tag_size = bi->tuple_size; + bi->metadata_size = ic->tag_size; + bi->tag_size = bi->metadata_size; bi->interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; } @@ -4746,18 +4746,18 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv ti->error = "Integrity profile not supported"; goto bad; } - /*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/ - if (bi->tuple_size < ic->tag_size) { + /*printk("tag_size: %u, metadata_size: %u\n", bi->tag_size, bi->metadata_size);*/ + if (bi->metadata_size < ic->tag_size) { r = -EINVAL; ti->error = "The integrity profile is smaller than tag size"; goto bad; } - if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) { + if ((unsigned long)bi->metadata_size > PAGE_SIZE / 2) { r = -EINVAL; ti->error = "Too big tuple size"; goto bad; } - ic->tuple_size = bi->tuple_size; + ic->tuple_size = bi->metadata_size; if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) { r = -EINVAL; ti->error = "Integrity profile sector size mismatch"; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 423dcd190906..2a1aa32e6693 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1506,7 +1506,7 @@ static int btt_blk_init(struct btt *btt) int rc; if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { - lim.integrity.tuple_size = btt_meta_size(btt); + lim.integrity.metadata_size = btt_meta_size(btt); lim.integrity.tag_size = btt_meta_size(btt); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 92697f98c601..b027dda38e69 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1866,7 +1866,7 @@ static bool nvme_init_integrity(struct nvme_ns_head *head, break; } - bi->tuple_size = head->ms; + bi->metadata_size = head->ms; bi->pi_offset = info->pi_offset; return true; } diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index eba42df2f821..42fb19f94ab8 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -65,7 +65,7 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) return; if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) { - ns->metadata_size = bi->tuple_size; + ns->metadata_size = bi->metadata_size; if (bi->flags & BLK_INTEGRITY_REF_TAG) ns->pi_type = NVME_NS_DPS_PI_TYPE1; else diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index ae6ce6f5d622..18bfca1f1c78 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -52,7 +52,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim) if (type != T10_PI_TYPE3_PROTECTION) bi->flags |= BLK_INTEGRITY_REF_TAG; - bi->tuple_size = sizeof(struct t10_pi_tuple); + bi->metadata_size = sizeof(struct t10_pi_tuple); if (dif && type) { bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index c7eae0bfb013..d27730da47f3 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -33,7 +33,7 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { - return q->limits.integrity.tuple_size; + return q->limits.integrity.metadata_size; } static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) @@ -74,7 +74,7 @@ static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, unsigned int sectors) { - return bio_integrity_intervals(bi, sectors) * bi->tuple_size; + return bio_integrity_intervals(bi, sectors) * bi->metadata_size; } static inline bool blk_integrity_rq(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..ccda87d06a38 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -116,7 +116,7 @@ enum blk_integrity_checksum { struct blk_integrity { unsigned char flags; enum blk_integrity_checksum csum_type; - unsigned char tuple_size; + unsigned char metadata_size; unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; From 76e45252a4cefa205439eb6610a244771e7d88da Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:46 +0530 Subject: [PATCH 2/6] block: introduce pi_tuple_size field in blk_integrity Introduce a new pi_tuple_size field in struct blk_integrity to explicitly represent the size (in bytes) of the protection information (PI) tuple. This is a prep patch. Add validation in blk_validate_integrity_limits() to ensure that pi size matches the expected size for known checksum types and never exceeds the pi_tuple_size. Suggested-by: Christoph Hellwig Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/20250630090548.3317-3-anuj20.g@samsung.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Christian Brauner --- block/blk-settings.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/core.c | 2 ++ drivers/scsi/sd_dif.c | 1 + include/linux/blkdev.h | 1 + 4 files changed, 42 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 787500ff00c3..32f3cdc9835a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "blk.h" #include "blk-rq-qos.h" @@ -135,6 +137,42 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) return -EINVAL; } + if (bi->pi_tuple_size > bi->metadata_size) { + pr_warn("pi_tuple_size (%u) exceeds metadata_size (%u)\n", + bi->pi_tuple_size, + bi->metadata_size); + return -EINVAL; + } + + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_NONE: + if (bi->pi_tuple_size) { + pr_warn("pi_tuple_size must be 0 when checksum type \ + is none\n"); + return -EINVAL; + } + break; + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) { + pr_warn("pi_tuple_size mismatch for T10 PI: expected \ + %zu, got %u\n", + sizeof(struct t10_pi_tuple), + bi->pi_tuple_size); + return -EINVAL; + } + break; + case BLK_INTEGRITY_CSUM_CRC64: + if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) { + pr_warn("pi_tuple_size mismatch for CRC64 PI: \ + expected %zu, got %u\n", + sizeof(struct crc64_pi_tuple), + bi->pi_tuple_size); + return -EINVAL; + } + break; + } + if (!bi->interval_exp) bi->interval_exp = ilog2(lim->logical_block_size); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b027dda38e69..fe72accab516 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1867,6 +1867,8 @@ static bool nvme_init_integrity(struct nvme_ns_head *head, } bi->metadata_size = head->ms; + if (bi->csum_type) + bi->pi_tuple_size = head->pi_size; bi->pi_offset = info->pi_offset; return true; } diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 18bfca1f1c78..ff4217fef93b 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -53,6 +53,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim) bi->flags |= BLK_INTEGRITY_REF_TAG; bi->metadata_size = sizeof(struct t10_pi_tuple); + bi->pi_tuple_size = bi->metadata_size; if (dif && type) { bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ccda87d06a38..0d4011dcfed3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -120,6 +120,7 @@ struct blk_integrity { unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; + unsigned char pi_tuple_size; }; typedef unsigned int __bitwise blk_mode_t; From f3ee50659148e4c1b7d1e58cfec897b14d36a00d Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:47 +0530 Subject: [PATCH 3/6] nvme: set pi_offset only when checksum type is not BLK_INTEGRITY_CSUM_NONE protection information is treated as opaque when checksum type is BLK_INTEGRITY_CSUM_NONE. In order to maintain the right metadata semantics, set pi_offset only in cases where checksum type is not BLK_INTEGRITY_CSUM_NONE. Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/20250630090548.3317-4-anuj20.g@samsung.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Christian Brauner --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fe72accab516..806b6e73276d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1867,9 +1867,10 @@ static bool nvme_init_integrity(struct nvme_ns_head *head, } bi->metadata_size = head->ms; - if (bi->csum_type) + if (bi->csum_type) { bi->pi_tuple_size = head->pi_size; - bi->pi_offset = info->pi_offset; + bi->pi_offset = info->pi_offset; + } return true; } From 9eb22f7fedfc9eb1b7f431a5359abd4d15b0b0cd Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 30 Jun 2025 14:35:48 +0530 Subject: [PATCH 4/6] fs: add ioctl to query metadata and protection info capabilities Add a new ioctl, FS_IOC_GETLBMD_CAP, to query metadata and protection info (PI) capabilities. This ioctl returns information about the files integrity profile. This is useful for userspace applications to understand a files end-to-end data protection support and configure the I/O accordingly. For now this interface is only supported by block devices. However the design and placement of this ioctl in generic FS ioctl space allows us to extend it to work over files as well. This maybe useful when filesystems start supporting PI-aware layouts. A new structure struct logical_block_metadata_cap is introduced, which contains the following fields: 1. lbmd_flags: bitmask of logical block metadata capability flags 2. lbmd_interval: the amount of data described by each unit of logical block metadata 3. lbmd_size: size in bytes of the logical block metadata associated with each interval 4. lbmd_opaque_size: size in bytes of the opaque block tag associated with each interval 5. lbmd_opaque_offset: offset in bytes of the opaque block tag within the logical block metadata 6. lbmd_pi_size: size in bytes of the T10 PI tuple associated with each interval 7. lbmd_pi_offset: offset in bytes of T10 PI tuple within the logical block metadata 8. lbmd_pi_guard_tag_type: T10 PI guard tag type 9. lbmd_pi_app_tag_size: size in bytes of the T10 PI application tag 10. lbmd_pi_ref_tag_size: size in bytes of the T10 PI reference tag 11. lbmd_pi_storage_tag_size: size in bytes of the T10 PI storage tag The internal logic to fetch the capability is encapsulated in a helper function blk_get_meta_cap(), which uses the blk_integrity profile associated with the device. The ioctl returns -EOPNOTSUPP, if CONFIG_BLK_DEV_INTEGRITY is not enabled. Suggested-by: Martin K. Petersen Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/20250630090548.3317-5-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- block/blk-integrity.c | 52 ++++++++++++++++++++++++++++++ block/ioctl.c | 4 +++ include/linux/blk-integrity.h | 7 +++++ include/uapi/linux/fs.h | 59 +++++++++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index c1102bf4cd8d..9d9dc9c32083 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "blk.h" @@ -54,6 +55,57 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) return segments; } +int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, + struct logical_block_metadata_cap __user *argp) +{ + struct blk_integrity *bi = blk_get_integrity(bdev->bd_disk); + struct logical_block_metadata_cap meta_cap = {}; + size_t usize = _IOC_SIZE(cmd); + + if (!argp) + return -EINVAL; + if (usize < LBMD_SIZE_VER0) + return -EINVAL; + if (!bi) + goto out; + + if (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) + meta_cap.lbmd_flags |= LBMD_PI_CAP_INTEGRITY; + if (bi->flags & BLK_INTEGRITY_REF_TAG) + meta_cap.lbmd_flags |= LBMD_PI_CAP_REFTAG; + meta_cap.lbmd_interval = 1 << bi->interval_exp; + meta_cap.lbmd_size = bi->metadata_size; + meta_cap.lbmd_pi_size = bi->pi_tuple_size; + meta_cap.lbmd_pi_offset = bi->pi_offset; + meta_cap.lbmd_opaque_size = bi->metadata_size - bi->pi_tuple_size; + if (meta_cap.lbmd_opaque_size && !bi->pi_offset) + meta_cap.lbmd_opaque_offset = bi->pi_tuple_size; + + meta_cap.lbmd_guard_tag_type = bi->csum_type; + if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE) + meta_cap.lbmd_app_tag_size = 2; + + if (bi->flags & BLK_INTEGRITY_REF_TAG) { + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + meta_cap.lbmd_ref_tag_size = + sizeof_field(struct crc64_pi_tuple, ref_tag); + break; + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + meta_cap.lbmd_ref_tag_size = + sizeof_field(struct t10_pi_tuple, ref_tag); + break; + default: + break; + } + } + +out: + return copy_struct_to_user(argp, usize, &meta_cap, sizeof(meta_cap), + NULL); +} + /** * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist * @rq: request to map diff --git a/block/ioctl.c b/block/ioctl.c index e472cc1030c6..9ad403733e19 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "blk.h" #include "blk-crypto-internal.h" @@ -566,6 +567,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, { unsigned int max_sectors; + if (_IOC_NR(cmd) == _IOC_NR(FS_IOC_GETLBMD_CAP)) + return blk_get_meta_cap(bdev, cmd, argp); + switch (cmd) { case BLKFLSBUF: return blkdev_flushbuf(bdev, cmd, arg); diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index d27730da47f3..e04c6e5bf1c6 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -29,6 +29,8 @@ int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes); +int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, + struct logical_block_metadata_cap __user *argp); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -92,6 +94,11 @@ static inline struct bio_vec rq_integrity_vec(struct request *rq) rq->bio->bi_integrity->bip_iter); } #else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, + struct logical_block_metadata_cap __user *argp) +{ + return -EOPNOTSUPP; +} static inline int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *b) { diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0098b0ce8ccb..83720a2fd20d 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -91,6 +91,63 @@ struct fs_sysfs_path { __u8 name[128]; }; +/* Protection info capability flags */ +#define LBMD_PI_CAP_INTEGRITY (1 << 0) +#define LBMD_PI_CAP_REFTAG (1 << 1) + +/* Checksum types for Protection Information */ +#define LBMD_PI_CSUM_NONE 0 +#define LBMD_PI_CSUM_IP 1 +#define LBMD_PI_CSUM_CRC16_T10DIF 2 +#define LBMD_PI_CSUM_CRC64_NVME 4 + +/* sizeof first published struct */ +#define LBMD_SIZE_VER0 16 + +/* + * Logical block metadata capability descriptor + * If the device does not support metadata, all the fields will be zero. + * Applications must check lbmd_flags to determine whether metadata is + * supported or not. + */ +struct logical_block_metadata_cap { + /* Bitmask of logical block metadata capability flags */ + __u32 lbmd_flags; + /* + * The amount of data described by each unit of logical block + * metadata + */ + __u16 lbmd_interval; + /* + * Size in bytes of the logical block metadata associated with each + * interval + */ + __u8 lbmd_size; + /* + * Size in bytes of the opaque block tag associated with each + * interval + */ + __u8 lbmd_opaque_size; + /* + * Offset in bytes of the opaque block tag within the logical block + * metadata + */ + __u8 lbmd_opaque_offset; + /* Size in bytes of the T10 PI tuple associated with each interval */ + __u8 lbmd_pi_size; + /* Offset in bytes of T10 PI tuple within the logical block metadata */ + __u8 lbmd_pi_offset; + /* T10 PI guard tag type */ + __u8 lbmd_guard_tag_type; + /* Size in bytes of the T10 PI application tag */ + __u8 lbmd_app_tag_size; + /* Size in bytes of the T10 PI reference tag */ + __u8 lbmd_ref_tag_size; + /* Size in bytes of the T10 PI storage tag */ + __u8 lbmd_storage_tag_size; + __u8 pad; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 @@ -247,6 +304,8 @@ struct fsxattr { * also /sys/kernel/debug/ for filesystems with debugfs exports */ #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) +/* Get logical block metadata capability details */ +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) From 42b0ef01e6b5e9c77b383d32c25a0ec2a735d08a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Jul 2025 10:46:51 +0200 Subject: [PATCH 5/6] block: fix FS_IOC_GETLBMD_CAP parsing in blkdev_common_ioctl() Anders and Naresh found that the addition of the FS_IOC_GETLBMD_CAP handling in the blockdev ioctl handler breaks all ioctls with _IOC_NR==2, as the new command is not added to the switch but only a few of the command bits are check. Move the check into the blk_get_meta_cap() function itself and make it return -ENOIOCTLCMD for any unsupported command code, including those with a smaller size that previously returned -EINVAL. For consistency this also drops the check for NULL 'arg' that is really useless, as any invalid pointer should return -EFAULT. Fixes: 9eb22f7fedfc ("fs: add ioctl to query metadata and protection info capabilities") Link: https://lore.kernel.org/all/CA+G9fYvk9HHE5UJ7cdJHTcY6P5JKnp+_e+sdC5U-ZQFTP9_hqQ@mail.gmail.com/ Reported-by: Naresh Kamboju Cc: Anders Roxell Cc: Naresh Kamboju Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/20250711084708.2714436-1-arnd@kernel.org Tested-by: Anders Roxell Signed-off-by: Christian Brauner --- block/blk-integrity.c | 10 ++++++---- block/ioctl.c | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 9d9dc9c32083..61a79e19c78f 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -62,10 +62,12 @@ int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, struct logical_block_metadata_cap meta_cap = {}; size_t usize = _IOC_SIZE(cmd); - if (!argp) - return -EINVAL; - if (usize < LBMD_SIZE_VER0) - return -EINVAL; + if (_IOC_DIR(cmd) != _IOC_DIR(FS_IOC_GETLBMD_CAP) || + _IOC_TYPE(cmd) != _IOC_TYPE(FS_IOC_GETLBMD_CAP) || + _IOC_NR(cmd) != _IOC_NR(FS_IOC_GETLBMD_CAP) || + _IOC_SIZE(cmd) < LBMD_SIZE_VER0) + return -ENOIOCTLCMD; + if (!bi) goto out; diff --git a/block/ioctl.c b/block/ioctl.c index 9ad403733e19..af2e22e5533c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -566,9 +566,11 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, void __user *argp) { unsigned int max_sectors; + int ret; - if (_IOC_NR(cmd) == _IOC_NR(FS_IOC_GETLBMD_CAP)) - return blk_get_meta_cap(bdev, cmd, argp); + ret = blk_get_meta_cap(bdev, cmd, argp); + if (ret != -ENOIOCTLCMD) + return ret; switch (cmd) { case BLKFLSBUF: From bc5b0c8febccbeabfefc9b59083b223ec7c7b53a Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Tue, 22 Jul 2025 17:37:55 +0530 Subject: [PATCH 6/6] block: fix lbmd_guard_tag_type assignment in FS_IOC_GETLBMD_CAP The blk_get_meta_cap() implementation directly assigns bi->csum_type to the UAPI field lbmd_guard_tag_type. This is not right as the kernel enum blk_integrity_checksum values are not guaranteed to match the UAPI defined values. Fix this by explicitly mapping internal checksum types to UAPI-defined constants to ensure compatibility and correctness, especially for the devices using CRC64 PI. Fixes: 9eb22f7fedfc ("fs: add ioctl to query metadata and protection info capabilities") Reported-by: Vincent Fu Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/20250722120755.87501-1-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/blk-integrity.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 61a79e19c78f..056b8948369d 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -83,7 +83,21 @@ int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, if (meta_cap.lbmd_opaque_size && !bi->pi_offset) meta_cap.lbmd_opaque_offset = bi->pi_tuple_size; - meta_cap.lbmd_guard_tag_type = bi->csum_type; + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_NONE: + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_NONE; + break; + case BLK_INTEGRITY_CSUM_IP: + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_IP; + break; + case BLK_INTEGRITY_CSUM_CRC: + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC16_T10DIF; + break; + case BLK_INTEGRITY_CSUM_CRC64: + meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC64_NVME; + break; + } + if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE) meta_cap.lbmd_app_tag_size = 2;