mirror of https://github.com/torvalds/linux.git
block: use zone condition to determine conventional zones
The conv_zones_bitmap field of struct gendisk is used to define a bitmap to identify the conventional zones of a zoned block device. The bit for a zone is set in this bitmap if the zone is a conventional one, that is, if the zone type is BLK_ZONE_TYPE_CONVENTIONAL. For such zone, this always corresponds to the zone condition BLK_ZONE_COND_NOT_WP. In other words, conv_zones_bitmap tracks a single condition of the zones of a zoned block device. In preparation for tracking more zone conditions, change conv_zones_bitmap into an array of zone conditions, using 1 byte per zone. This increases the memory usage from 1 bit per zone to 1 byte per zone, that is, from 16 KiB to about 100 KiB for a 30 TB SMR HDD with 256 MiB zones. This is a trade-off to allow fast cached report zones later on top of this change. Rename the conv_zones_bitmap field of struct gendisk to zones_cond. Add a blk_revalidate_zone_cond() function to initialize the zones_cond array of a disk during device scan and to update it on device revalidation. Move the allocation of the zones_cond array to disk_revalidate_zone_resources(), making sure that this array is always allocated, even for devices that do not need zone write plugs (zone resources), to ensure that bdev_zone_is_seq() can be re-implemented to use the zone condition array in place of the conv zones bitmap. Finally, the function bdev_zone_is_seq() is rewritten to use a test on the condition of the target zone. Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
ca1a897fb2
commit
6e945ffb65
|
|
@ -114,6 +114,33 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_zone_cond_str);
|
EXPORT_SYMBOL_GPL(blk_zone_cond_str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bdev_zone_is_seq - check if a sector belongs to a sequential write zone
|
||||||
|
* @bdev: block device to check
|
||||||
|
* @sector: sector number
|
||||||
|
*
|
||||||
|
* Check if @sector on @bdev is contained in a sequential write required zone.
|
||||||
|
*/
|
||||||
|
bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = bdev->bd_disk;
|
||||||
|
unsigned int zno = disk_zone_no(disk, sector);
|
||||||
|
bool is_seq = false;
|
||||||
|
u8 *zones_cond;
|
||||||
|
|
||||||
|
if (!bdev_is_zoned(bdev))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
zones_cond = rcu_dereference(disk->zones_cond);
|
||||||
|
if (zones_cond && zno < disk->nr_zones)
|
||||||
|
is_seq = zones_cond[zno] != BLK_ZONE_COND_NOT_WP;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return is_seq;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(bdev_zone_is_seq);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Zone report arguments for block device drivers report_zones operation.
|
* Zone report arguments for block device drivers report_zones operation.
|
||||||
* @cb: report_zones_cb callback for each reported zone.
|
* @cb: report_zones_cb callback for each reported zone.
|
||||||
|
|
@ -1458,22 +1485,16 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
|
||||||
disk->zone_wplugs_hash_bits = 0;
|
disk->zone_wplugs_hash_bits = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int disk_set_conv_zones_bitmap(struct gendisk *disk,
|
static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
|
||||||
unsigned long *bitmap)
|
|
||||||
{
|
{
|
||||||
unsigned int nr_conv_zones = 0;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||||
if (bitmap)
|
zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond,
|
||||||
nr_conv_zones = bitmap_weight(bitmap, disk->nr_zones);
|
|
||||||
bitmap = rcu_replace_pointer(disk->conv_zones_bitmap, bitmap,
|
|
||||||
lockdep_is_held(&disk->zone_wplugs_lock));
|
lockdep_is_held(&disk->zone_wplugs_lock));
|
||||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||||
|
|
||||||
kfree_rcu_mightsleep(bitmap);
|
kfree_rcu_mightsleep(zones_cond);
|
||||||
|
|
||||||
return nr_conv_zones;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void disk_free_zone_resources(struct gendisk *disk)
|
void disk_free_zone_resources(struct gendisk *disk)
|
||||||
|
|
@ -1497,7 +1518,7 @@ void disk_free_zone_resources(struct gendisk *disk)
|
||||||
mempool_destroy(disk->zone_wplugs_pool);
|
mempool_destroy(disk->zone_wplugs_pool);
|
||||||
disk->zone_wplugs_pool = NULL;
|
disk->zone_wplugs_pool = NULL;
|
||||||
|
|
||||||
disk_set_conv_zones_bitmap(disk, NULL);
|
disk_set_zones_cond_array(disk, NULL);
|
||||||
disk->zone_capacity = 0;
|
disk->zone_capacity = 0;
|
||||||
disk->last_zone_capacity = 0;
|
disk->last_zone_capacity = 0;
|
||||||
disk->nr_zones = 0;
|
disk->nr_zones = 0;
|
||||||
|
|
@ -1516,12 +1537,31 @@ static inline bool disk_need_zone_resources(struct gendisk *disk)
|
||||||
queue_emulates_zone_append(disk->queue);
|
queue_emulates_zone_append(disk->queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct blk_revalidate_zone_args {
|
||||||
|
struct gendisk *disk;
|
||||||
|
u8 *zones_cond;
|
||||||
|
unsigned int nr_zones;
|
||||||
|
unsigned int nr_conv_zones;
|
||||||
|
unsigned int zone_capacity;
|
||||||
|
unsigned int last_zone_capacity;
|
||||||
|
sector_t sector;
|
||||||
|
};
|
||||||
|
|
||||||
static int disk_revalidate_zone_resources(struct gendisk *disk,
|
static int disk_revalidate_zone_resources(struct gendisk *disk,
|
||||||
unsigned int nr_zones)
|
struct blk_revalidate_zone_args *args)
|
||||||
{
|
{
|
||||||
struct queue_limits *lim = &disk->queue->limits;
|
struct queue_limits *lim = &disk->queue->limits;
|
||||||
unsigned int pool_size;
|
unsigned int pool_size;
|
||||||
|
|
||||||
|
args->disk = disk;
|
||||||
|
args->nr_zones =
|
||||||
|
DIV_ROUND_UP_ULL(get_capacity(disk), lim->chunk_sectors);
|
||||||
|
|
||||||
|
/* Cached zone conditions: 1 byte per zone */
|
||||||
|
args->zones_cond = kzalloc(args->nr_zones, GFP_NOIO);
|
||||||
|
if (!args->zones_cond)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
if (!disk_need_zone_resources(disk))
|
if (!disk_need_zone_resources(disk))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
@ -1531,7 +1571,8 @@ static int disk_revalidate_zone_resources(struct gendisk *disk,
|
||||||
*/
|
*/
|
||||||
pool_size = max(lim->max_open_zones, lim->max_active_zones);
|
pool_size = max(lim->max_open_zones, lim->max_active_zones);
|
||||||
if (!pool_size)
|
if (!pool_size)
|
||||||
pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_zones);
|
pool_size =
|
||||||
|
min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, args->nr_zones);
|
||||||
|
|
||||||
if (!disk->zone_wplugs_hash)
|
if (!disk->zone_wplugs_hash)
|
||||||
return disk_alloc_zone_resources(disk, pool_size);
|
return disk_alloc_zone_resources(disk, pool_size);
|
||||||
|
|
@ -1539,15 +1580,6 @@ static int disk_revalidate_zone_resources(struct gendisk *disk,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct blk_revalidate_zone_args {
|
|
||||||
struct gendisk *disk;
|
|
||||||
unsigned long *conv_zones_bitmap;
|
|
||||||
unsigned int nr_zones;
|
|
||||||
unsigned int zone_capacity;
|
|
||||||
unsigned int last_zone_capacity;
|
|
||||||
sector_t sector;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the disk zone resources information and device queue limits.
|
* Update the disk zone resources information and device queue limits.
|
||||||
* The disk queue is frozen when this is executed.
|
* The disk queue is frozen when this is executed.
|
||||||
|
|
@ -1556,7 +1588,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||||
struct blk_revalidate_zone_args *args)
|
struct blk_revalidate_zone_args *args)
|
||||||
{
|
{
|
||||||
struct request_queue *q = disk->queue;
|
struct request_queue *q = disk->queue;
|
||||||
unsigned int nr_seq_zones, nr_conv_zones;
|
unsigned int nr_seq_zones;
|
||||||
unsigned int pool_size, memflags;
|
unsigned int pool_size, memflags;
|
||||||
struct queue_limits lim;
|
struct queue_limits lim;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
@ -1566,24 +1598,24 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||||
memflags = blk_mq_freeze_queue(q);
|
memflags = blk_mq_freeze_queue(q);
|
||||||
|
|
||||||
disk->nr_zones = args->nr_zones;
|
disk->nr_zones = args->nr_zones;
|
||||||
disk->zone_capacity = args->zone_capacity;
|
if (args->nr_conv_zones >= disk->nr_zones) {
|
||||||
disk->last_zone_capacity = args->last_zone_capacity;
|
|
||||||
nr_conv_zones =
|
|
||||||
disk_set_conv_zones_bitmap(disk, args->conv_zones_bitmap);
|
|
||||||
if (nr_conv_zones >= disk->nr_zones) {
|
|
||||||
pr_warn("%s: Invalid number of conventional zones %u / %u\n",
|
pr_warn("%s: Invalid number of conventional zones %u / %u\n",
|
||||||
disk->disk_name, nr_conv_zones, disk->nr_zones);
|
disk->disk_name, args->nr_conv_zones, disk->nr_zones);
|
||||||
ret = -ENODEV;
|
ret = -ENODEV;
|
||||||
goto unfreeze;
|
goto unfreeze;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
disk->zone_capacity = args->zone_capacity;
|
||||||
|
disk->last_zone_capacity = args->last_zone_capacity;
|
||||||
|
disk_set_zones_cond_array(disk, args->zones_cond);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some devices can advertize zone resource limits that are larger than
|
* Some devices can advertise zone resource limits that are larger than
|
||||||
* the number of sequential zones of the zoned block device, e.g. a
|
* the number of sequential zones of the zoned block device, e.g. a
|
||||||
* small ZNS namespace. For such case, assume that the zoned device has
|
* small ZNS namespace. For such case, assume that the zoned device has
|
||||||
* no zone resource limits.
|
* no zone resource limits.
|
||||||
*/
|
*/
|
||||||
nr_seq_zones = disk->nr_zones - nr_conv_zones;
|
nr_seq_zones = disk->nr_zones - args->nr_conv_zones;
|
||||||
if (lim.max_open_zones >= nr_seq_zones)
|
if (lim.max_open_zones >= nr_seq_zones)
|
||||||
lim.max_open_zones = 0;
|
lim.max_open_zones = 0;
|
||||||
if (lim.max_active_zones >= nr_seq_zones)
|
if (lim.max_active_zones >= nr_seq_zones)
|
||||||
|
|
@ -1624,6 +1656,44 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int blk_revalidate_zone_cond(struct blk_zone *zone, unsigned int idx,
|
||||||
|
struct blk_revalidate_zone_args *args)
|
||||||
|
{
|
||||||
|
enum blk_zone_cond cond = zone->cond;
|
||||||
|
|
||||||
|
/* Check that the zone condition is consistent with the zone type. */
|
||||||
|
switch (cond) {
|
||||||
|
case BLK_ZONE_COND_NOT_WP:
|
||||||
|
if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
|
||||||
|
goto invalid_condition;
|
||||||
|
break;
|
||||||
|
case BLK_ZONE_COND_IMP_OPEN:
|
||||||
|
case BLK_ZONE_COND_EXP_OPEN:
|
||||||
|
case BLK_ZONE_COND_CLOSED:
|
||||||
|
case BLK_ZONE_COND_EMPTY:
|
||||||
|
case BLK_ZONE_COND_FULL:
|
||||||
|
case BLK_ZONE_COND_OFFLINE:
|
||||||
|
case BLK_ZONE_COND_READONLY:
|
||||||
|
if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
|
||||||
|
goto invalid_condition;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pr_warn("%s: Invalid zone condition 0x%X\n",
|
||||||
|
args->disk->disk_name, cond);
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
args->zones_cond[idx] = cond;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
invalid_condition:
|
||||||
|
pr_warn("%s: Invalid zone condition 0x%x for type 0x%x\n",
|
||||||
|
args->disk->disk_name, cond, zone->type);
|
||||||
|
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||||
struct blk_revalidate_zone_args *args)
|
struct blk_revalidate_zone_args *args)
|
||||||
{
|
{
|
||||||
|
|
@ -1638,17 +1708,7 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||||
if (disk_zone_is_last(disk, zone))
|
if (disk_zone_is_last(disk, zone))
|
||||||
args->last_zone_capacity = zone->capacity;
|
args->last_zone_capacity = zone->capacity;
|
||||||
|
|
||||||
if (!disk_need_zone_resources(disk))
|
args->nr_conv_zones++;
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!args->conv_zones_bitmap) {
|
|
||||||
args->conv_zones_bitmap =
|
|
||||||
bitmap_zalloc(args->nr_zones, GFP_NOIO);
|
|
||||||
if (!args->conv_zones_bitmap)
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
set_bit(idx, args->conv_zones_bitmap);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1746,6 +1806,11 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check zone condition */
|
||||||
|
ret = blk_revalidate_zone_cond(zone, idx, args);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* Check zone type */
|
/* Check zone type */
|
||||||
switch (zone->type) {
|
switch (zone->type) {
|
||||||
case BLK_ZONE_TYPE_CONVENTIONAL:
|
case BLK_ZONE_TYPE_CONVENTIONAL:
|
||||||
|
|
@ -1813,10 +1878,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||||
* Ensure that all memory allocations in this context are done as if
|
* Ensure that all memory allocations in this context are done as if
|
||||||
* GFP_NOIO was specified.
|
* GFP_NOIO was specified.
|
||||||
*/
|
*/
|
||||||
args.disk = disk;
|
|
||||||
args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors);
|
|
||||||
noio_flag = memalloc_noio_save();
|
noio_flag = memalloc_noio_save();
|
||||||
ret = disk_revalidate_zone_resources(disk, args.nr_zones);
|
ret = disk_revalidate_zone_resources(disk, &args);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
memalloc_noio_restore(noio_flag);
|
memalloc_noio_restore(noio_flag);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -196,7 +196,7 @@ struct gendisk {
|
||||||
unsigned int nr_zones;
|
unsigned int nr_zones;
|
||||||
unsigned int zone_capacity;
|
unsigned int zone_capacity;
|
||||||
unsigned int last_zone_capacity;
|
unsigned int last_zone_capacity;
|
||||||
unsigned long __rcu *conv_zones_bitmap;
|
u8 __rcu *zones_cond;
|
||||||
unsigned int zone_wplugs_hash_bits;
|
unsigned int zone_wplugs_hash_bits;
|
||||||
atomic_t nr_zone_wplugs;
|
atomic_t nr_zone_wplugs;
|
||||||
spinlock_t zone_wplugs_lock;
|
spinlock_t zone_wplugs_lock;
|
||||||
|
|
@ -925,12 +925,20 @@ static inline unsigned int bdev_zone_capacity(struct block_device *bdev,
|
||||||
{
|
{
|
||||||
return disk_zone_capacity(bdev->bd_disk, pos);
|
return disk_zone_capacity(bdev->bd_disk, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector);
|
||||||
|
|
||||||
#else /* CONFIG_BLK_DEV_ZONED */
|
#else /* CONFIG_BLK_DEV_ZONED */
|
||||||
static inline unsigned int disk_nr_zones(struct gendisk *disk)
|
static inline unsigned int disk_nr_zones(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
|
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -1533,33 +1541,6 @@ static inline bool bdev_is_zone_aligned(struct block_device *bdev,
|
||||||
return bdev_is_zone_start(bdev, sector);
|
return bdev_is_zone_start(bdev, sector);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* bdev_zone_is_seq - check if a sector belongs to a sequential write zone
|
|
||||||
* @bdev: block device to check
|
|
||||||
* @sector: sector number
|
|
||||||
*
|
|
||||||
* Check if @sector on @bdev is contained in a sequential write required zone.
|
|
||||||
*/
|
|
||||||
static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
|
|
||||||
{
|
|
||||||
bool is_seq = false;
|
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
|
|
||||||
if (bdev_is_zoned(bdev)) {
|
|
||||||
struct gendisk *disk = bdev->bd_disk;
|
|
||||||
unsigned long *bitmap;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
bitmap = rcu_dereference(disk->conv_zones_bitmap);
|
|
||||||
is_seq = !bitmap ||
|
|
||||||
!test_bit(disk_zone_no(disk, sector), bitmap);
|
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return is_seq;
|
|
||||||
}
|
|
||||||
|
|
||||||
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
|
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
sector_t nr_sects, gfp_t gfp_mask);
|
sector_t nr_sects, gfp_t gfp_mask);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue