block-6.10-20240530
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmZZK5gQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpin6D/93mQCnfghcaz3Am+a9djskhkC1FfcO0Nbj SCvwLI9VmhjuxdLiAr3pOyMzRpRNvFXeMJd7+Gm2f+DKF/EgjBamNfFWBCXY9wSM s2pKslJTGwak7AqIvWFt6BBDHYPgj2xBwKc5QU4fJPud6QmyFXVlLUEFY2qedJIL jBrCpfdmLPWb9kB8ZlEZXEitGD8CsSm0lSfF5qYT9SO3fwIi9b0aTkTlB7k6N3AA 3cvGilR+Uc6zG9LqC28d06SjxAO2IwJCmXTDNt5quzsanUAfKedfMbsJPjlmMsYZ x19h5TRaliP8xgsdUPdKOe1tWn2NJZkv0DGwH7j4l26wUZ6AYF/h893SbvRO+zIu 08KUzBy87mZXYasXZVqXMmkULTHTncDWLrflWYy5RgMiWiBPi0TSZ31y5mcSMf4h d50Y5isN1j3bFFnoR8bZ8g7Oel4EGHAy00TgLMVSMLkxf7WFwcKrwvUgERjBKi+Q MzzRA2q9cFZTEq+APNnWo2Ar+Ot43s7oKvnGKhl2dLNpd/hcdsikj/ObVMBGEpLL Ew2cBxqXvWA36BPIxAm0QKMghW64zYuaWkxr7hhY2W/QCr5BOHKnmxBrArIl741X R02LQCgItsI4jVur/Ch85FF/agMnRe8TbKIQbPIi2Sho+8NB1b6iOczlrS0VSrBm f7SKUgQ+Aw== =dgh+ -----END PGP SIGNATURE----- Merge tag 'block-6.10-20240530' of git://git.kernel.dk/linux Pull block fixes from Jens Axboe: - NVMe fixes via Keith: - Removing unused fields (Kanchan) - Large folio offsets support (Kundan) - Multipath NUMA node initialiazation fix (Nilay) - Multipath IO stats accounting fixes (Keith) - Circular lockdep fix (Keith) - Target race condition fix (Sagi) - Target memory leak fix (Sagi) - bcache fixes - null_blk fixes (Damien) - Fix regression in io.max due to throttle low removal (Waiman) - DM limit table fixes (Christoph) - SCSI and block limit fixes (Christoph) - zone fixes (Damien) - Misc fixes (Christoph, Hannes, hexue) * tag 'block-6.10-20240530' of git://git.kernel.dk/linux: (25 commits) blk-throttle: Fix incorrect display of io.max block: Fix zone write plugging handling of devices with a runt zone block: Fix validation of zoned device with a runt zone null_blk: Do not allow runt zone with zone capacity smaller then zone size nvmet: fix a possible leak when destroy a ctrl during qp establishment nvme: use srcu for iterating namespace list bcache: code cleanup in __bch_bucket_alloc_set() bcache: call force_wake_up_gc() if necessary in check_should_bypass() bcache: allow allocator to invalidate bucket in gc block: check for max_hw_sectors underflow block: stack max_user_sectors sd: also set max_user_sectors when setting max_sectors null_blk: Print correct max open zones limit in null_init_zoned_dev() block: delete redundant function declaration null_blk: Fix return value of nullb_device_power_store() dm: make dm_set_zones_restrictions work on the queue limits dm: remove dm_check_zoned dm: move setting zoned_enabled to dm_table_set_restrictions block: remove blk_queue_max_integrity_segments nvme: adjust multiples of NVME_CTRL_PAGE_SIZE in offset ...
This commit is contained in:
commit
0f9a75179d
@ -104,6 +104,7 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
static int blk_validate_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_hw_sectors;
|
||||
unsigned int logical_block_sectors;
|
||||
|
||||
/*
|
||||
* Unless otherwise specified, default to 512 byte logical blocks and a
|
||||
@ -134,8 +135,11 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
|
||||
return -EINVAL;
|
||||
logical_block_sectors = lim->logical_block_size >> SECTOR_SHIFT;
|
||||
if (WARN_ON_ONCE(logical_block_sectors > lim->max_hw_sectors))
|
||||
return -EINVAL;
|
||||
lim->max_hw_sectors = round_down(lim->max_hw_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
logical_block_sectors);
|
||||
|
||||
/*
|
||||
* The actual max_sectors value is a complex beast and also takes the
|
||||
@ -153,7 +157,7 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
|
||||
}
|
||||
lim->max_sectors = round_down(lim->max_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
logical_block_sectors);
|
||||
|
||||
/*
|
||||
* Random default for the maximum number of segments. Driver should not
|
||||
@ -611,6 +615,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
unsigned int top, bottom, alignment, ret = 0;
|
||||
|
||||
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
|
||||
t->max_user_sectors = min_not_zero(t->max_user_sectors,
|
||||
b->max_user_sectors);
|
||||
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
|
||||
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
|
||||
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
|
||||
|
@ -64,7 +64,6 @@ struct blk_stat_callback {
|
||||
|
||||
struct blk_queue_stats *blk_alloc_queue_stats(void);
|
||||
void blk_free_queue_stats(struct blk_queue_stats *);
|
||||
bool blk_stats_alloc_enable(struct request_queue *q);
|
||||
|
||||
void blk_stat_add(struct request *rq, u64 now);
|
||||
|
||||
|
@ -1399,32 +1399,32 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
bps_dft = U64_MAX;
|
||||
iops_dft = UINT_MAX;
|
||||
|
||||
if (tg->bps_conf[READ] == bps_dft &&
|
||||
tg->bps_conf[WRITE] == bps_dft &&
|
||||
tg->iops_conf[READ] == iops_dft &&
|
||||
tg->iops_conf[WRITE] == iops_dft)
|
||||
if (tg->bps[READ] == bps_dft &&
|
||||
tg->bps[WRITE] == bps_dft &&
|
||||
tg->iops[READ] == iops_dft &&
|
||||
tg->iops[WRITE] == iops_dft)
|
||||
return 0;
|
||||
|
||||
seq_printf(sf, "%s", dname);
|
||||
if (tg->bps_conf[READ] == U64_MAX)
|
||||
if (tg->bps[READ] == U64_MAX)
|
||||
seq_printf(sf, " rbps=max");
|
||||
else
|
||||
seq_printf(sf, " rbps=%llu", tg->bps_conf[READ]);
|
||||
seq_printf(sf, " rbps=%llu", tg->bps[READ]);
|
||||
|
||||
if (tg->bps_conf[WRITE] == U64_MAX)
|
||||
if (tg->bps[WRITE] == U64_MAX)
|
||||
seq_printf(sf, " wbps=max");
|
||||
else
|
||||
seq_printf(sf, " wbps=%llu", tg->bps_conf[WRITE]);
|
||||
seq_printf(sf, " wbps=%llu", tg->bps[WRITE]);
|
||||
|
||||
if (tg->iops_conf[READ] == UINT_MAX)
|
||||
if (tg->iops[READ] == UINT_MAX)
|
||||
seq_printf(sf, " riops=max");
|
||||
else
|
||||
seq_printf(sf, " riops=%u", tg->iops_conf[READ]);
|
||||
seq_printf(sf, " riops=%u", tg->iops[READ]);
|
||||
|
||||
if (tg->iops_conf[WRITE] == UINT_MAX)
|
||||
if (tg->iops[WRITE] == UINT_MAX)
|
||||
seq_printf(sf, " wiops=max");
|
||||
else
|
||||
seq_printf(sf, " wiops=%u", tg->iops_conf[WRITE]);
|
||||
seq_printf(sf, " wiops=%u", tg->iops[WRITE]);
|
||||
|
||||
seq_printf(sf, "\n");
|
||||
return 0;
|
||||
|
@ -95,15 +95,11 @@ struct throtl_grp {
|
||||
bool has_rules_bps[2];
|
||||
bool has_rules_iops[2];
|
||||
|
||||
/* internally used bytes per second rate limits */
|
||||
/* bytes per second rate limits */
|
||||
uint64_t bps[2];
|
||||
/* user configured bps limits */
|
||||
uint64_t bps_conf[2];
|
||||
|
||||
/* internally used IOPS limits */
|
||||
/* IOPS limits */
|
||||
unsigned int iops[2];
|
||||
/* user configured IOPS limits */
|
||||
unsigned int iops_conf[2];
|
||||
|
||||
/* Number of bytes dispatched in current slice */
|
||||
uint64_t bytes_disp[2];
|
||||
|
@ -450,6 +450,25 @@ static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector)
|
||||
return test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
|
||||
{
|
||||
return zone->start + zone->len >= get_capacity(disk);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_full(struct gendisk *disk,
|
||||
unsigned int zno, unsigned int offset_in_zone)
|
||||
{
|
||||
if (zno < disk->nr_zones - 1)
|
||||
return offset_in_zone >= disk->zone_capacity;
|
||||
return offset_in_zone >= disk->last_zone_capacity;
|
||||
}
|
||||
|
||||
static bool disk_zone_wplug_is_full(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset);
|
||||
}
|
||||
|
||||
static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
@ -543,7 +562,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
|
||||
return false;
|
||||
|
||||
/* We can remove zone write plugs for zones that are empty or full. */
|
||||
return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity;
|
||||
return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug);
|
||||
}
|
||||
|
||||
static void disk_remove_zone_wplug(struct gendisk *disk,
|
||||
@ -664,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
|
||||
static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
unsigned int zone_capacity = disk->zone_capacity;
|
||||
unsigned int wp_offset = zwplug->wp_offset;
|
||||
struct bio_list bl = BIO_EMPTY_LIST;
|
||||
struct bio *bio;
|
||||
|
||||
while ((bio = bio_list_pop(&zwplug->bio_list))) {
|
||||
if (wp_offset >= zone_capacity ||
|
||||
if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) ||
|
||||
(bio_op(bio) != REQ_OP_ZONE_APPEND &&
|
||||
bio_offset_from_zone_start(bio) != wp_offset)) {
|
||||
blk_zone_wplug_bio_io_error(zwplug, bio);
|
||||
@ -909,7 +927,6 @@ void blk_zone_write_plug_init_request(struct request *req)
|
||||
sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req);
|
||||
struct request_queue *q = req->q;
|
||||
struct gendisk *disk = q->disk;
|
||||
unsigned int zone_capacity = disk->zone_capacity;
|
||||
struct blk_zone_wplug *zwplug =
|
||||
disk_get_zone_wplug(disk, blk_rq_pos(req));
|
||||
unsigned long flags;
|
||||
@ -933,7 +950,7 @@ void blk_zone_write_plug_init_request(struct request *req)
|
||||
* into the back of the request.
|
||||
*/
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
while (zwplug->wp_offset < zone_capacity) {
|
||||
while (!disk_zone_wplug_is_full(disk, zwplug)) {
|
||||
bio = bio_list_peek(&zwplug->bio_list);
|
||||
if (!bio)
|
||||
break;
|
||||
@ -979,7 +996,7 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
|
||||
* We know such BIO will fail, and that would potentially overflow our
|
||||
* write pointer offset beyond the end of the zone.
|
||||
*/
|
||||
if (zwplug->wp_offset >= disk->zone_capacity)
|
||||
if (disk_zone_wplug_is_full(disk, zwplug))
|
||||
goto err;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
@ -1556,6 +1573,7 @@ void disk_free_zone_resources(struct gendisk *disk)
|
||||
kfree(disk->conv_zones_bitmap);
|
||||
disk->conv_zones_bitmap = NULL;
|
||||
disk->zone_capacity = 0;
|
||||
disk->last_zone_capacity = 0;
|
||||
disk->nr_zones = 0;
|
||||
}
|
||||
|
||||
@ -1600,6 +1618,7 @@ struct blk_revalidate_zone_args {
|
||||
unsigned long *conv_zones_bitmap;
|
||||
unsigned int nr_zones;
|
||||
unsigned int zone_capacity;
|
||||
unsigned int last_zone_capacity;
|
||||
sector_t sector;
|
||||
};
|
||||
|
||||
@ -1617,6 +1636,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
|
||||
disk->nr_zones = args->nr_zones;
|
||||
disk->zone_capacity = args->zone_capacity;
|
||||
disk->last_zone_capacity = args->last_zone_capacity;
|
||||
swap(disk->conv_zones_bitmap, args->conv_zones_bitmap);
|
||||
if (disk->conv_zones_bitmap)
|
||||
nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap,
|
||||
@ -1668,6 +1688,9 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (disk_zone_is_last(disk, zone))
|
||||
args->last_zone_capacity = zone->capacity;
|
||||
|
||||
if (!disk_need_zone_resources(disk))
|
||||
return 0;
|
||||
|
||||
@ -1693,11 +1716,14 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
|
||||
|
||||
/*
|
||||
* Remember the capacity of the first sequential zone and check
|
||||
* if it is constant for all zones.
|
||||
* if it is constant for all zones, ignoring the last zone as it can be
|
||||
* smaller.
|
||||
*/
|
||||
if (!args->zone_capacity)
|
||||
args->zone_capacity = zone->capacity;
|
||||
if (zone->capacity != args->zone_capacity) {
|
||||
if (disk_zone_is_last(disk, zone)) {
|
||||
args->last_zone_capacity = zone->capacity;
|
||||
} else if (zone->capacity != args->zone_capacity) {
|
||||
pr_warn("%s: Invalid variable zone capacity\n",
|
||||
disk->disk_name);
|
||||
return -ENODEV;
|
||||
@ -1732,7 +1758,6 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
{
|
||||
struct blk_revalidate_zone_args *args = data;
|
||||
struct gendisk *disk = args->disk;
|
||||
sector_t capacity = get_capacity(disk);
|
||||
sector_t zone_sectors = disk->queue->limits.chunk_sectors;
|
||||
int ret;
|
||||
|
||||
@ -1743,7 +1768,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (zone->start >= capacity || !zone->len) {
|
||||
if (zone->start >= get_capacity(disk) || !zone->len) {
|
||||
pr_warn("%s: Invalid zone start %llu, length %llu\n",
|
||||
disk->disk_name, zone->start, zone->len);
|
||||
return -ENODEV;
|
||||
@ -1753,7 +1778,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
* All zones must have the same size, with the exception on an eventual
|
||||
* smaller last zone.
|
||||
*/
|
||||
if (zone->start + zone->len < capacity) {
|
||||
if (!disk_zone_is_last(disk, zone)) {
|
||||
if (zone->len != zone_sectors) {
|
||||
pr_warn("%s: Invalid zoned device with non constant zone size\n",
|
||||
disk->disk_name);
|
||||
|
@ -494,6 +494,7 @@ static ssize_t nullb_device_power_store(struct config_item *item,
|
||||
|
||||
set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
|
||||
dev->power = newp;
|
||||
ret = count;
|
||||
} else if (dev->power && !newp) {
|
||||
if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
|
||||
dev->power = newp;
|
||||
|
@ -74,6 +74,17 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a smaller zone capacity was requested, do not allow a smaller last
|
||||
* zone at the same time as such zone configuration does not correspond
|
||||
* to any real zoned device.
|
||||
*/
|
||||
if (dev->zone_capacity != dev->zone_size &&
|
||||
dev->size & (dev->zone_size - 1)) {
|
||||
pr_err("A smaller last zone is not allowed with zone capacity smaller than zone size.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
zone_capacity_sects = mb_to_sects(dev->zone_capacity);
|
||||
dev_capacity_sects = mb_to_sects(dev->size);
|
||||
dev->zone_size_sects = mb_to_sects(dev->zone_size);
|
||||
@ -108,7 +119,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
|
||||
dev->zone_max_open = dev->zone_max_active;
|
||||
pr_info("changed the maximum number of open zones to %u\n",
|
||||
dev->nr_zones);
|
||||
dev->zone_max_open);
|
||||
} else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
|
||||
dev->zone_max_open = 0;
|
||||
pr_info("zone_max_open limit disabled, limit >= zone count\n");
|
||||
|
@ -129,12 +129,9 @@ static inline bool can_inc_bucket_gen(struct bucket *b)
|
||||
|
||||
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
BUG_ON(!ca->set->gc_mark_valid);
|
||||
|
||||
return (!GC_MARK(b) ||
|
||||
GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
|
||||
!atomic_read(&b->pin) &&
|
||||
can_inc_bucket_gen(b);
|
||||
return (ca->set->gc_mark_valid || b->reclaimable_in_gc) &&
|
||||
((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
|
||||
!atomic_read(&b->pin) && can_inc_bucket_gen(b));
|
||||
}
|
||||
|
||||
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
||||
@ -148,6 +145,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
||||
bch_inc_gen(ca, b);
|
||||
b->prio = INITIAL_PRIO;
|
||||
atomic_inc(&b->pin);
|
||||
b->reclaimable_in_gc = 0;
|
||||
}
|
||||
|
||||
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
||||
@ -352,8 +350,7 @@ static int bch_allocator_thread(void *arg)
|
||||
*/
|
||||
|
||||
retry_invalidate:
|
||||
allocator_wait(ca, ca->set->gc_mark_valid &&
|
||||
!ca->invalidate_needs_gc);
|
||||
allocator_wait(ca, !ca->invalidate_needs_gc);
|
||||
invalidate_buckets(ca);
|
||||
|
||||
/*
|
||||
@ -501,8 +498,8 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
|
||||
|
||||
ca = c->cache;
|
||||
b = bch_bucket_alloc(ca, reserve, wait);
|
||||
if (b == -1)
|
||||
goto err;
|
||||
if (b < 0)
|
||||
return -1;
|
||||
|
||||
k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
|
||||
bucket_to_sector(c, b),
|
||||
@ -511,10 +508,6 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
|
||||
SET_KEY_PTRS(k, 1);
|
||||
|
||||
return 0;
|
||||
err:
|
||||
bch_bucket_free(c, k);
|
||||
bkey_put(c, k);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
|
||||
|
@ -200,6 +200,7 @@ struct bucket {
|
||||
uint8_t gen;
|
||||
uint8_t last_gc; /* Most out of date gen in the btree */
|
||||
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
|
||||
uint16_t reclaimable_in_gc:1;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1741,18 +1741,20 @@ static void btree_gc_start(struct cache_set *c)
|
||||
|
||||
mutex_lock(&c->bucket_lock);
|
||||
|
||||
c->gc_mark_valid = 0;
|
||||
c->gc_done = ZERO_KEY;
|
||||
|
||||
ca = c->cache;
|
||||
for_each_bucket(b, ca) {
|
||||
b->last_gc = b->gen;
|
||||
if (bch_can_invalidate_bucket(ca, b))
|
||||
b->reclaimable_in_gc = 1;
|
||||
if (!atomic_read(&b->pin)) {
|
||||
SET_GC_MARK(b, 0);
|
||||
SET_GC_SECTORS_USED(b, 0);
|
||||
}
|
||||
}
|
||||
|
||||
c->gc_mark_valid = 0;
|
||||
mutex_unlock(&c->bucket_lock);
|
||||
}
|
||||
|
||||
@ -1809,6 +1811,9 @@ static void bch_btree_gc_finish(struct cache_set *c)
|
||||
for_each_bucket(b, ca) {
|
||||
c->need_gc = max(c->need_gc, bucket_gc_gen(b));
|
||||
|
||||
if (b->reclaimable_in_gc)
|
||||
b->reclaimable_in_gc = 0;
|
||||
|
||||
if (atomic_read(&b->pin))
|
||||
continue;
|
||||
|
||||
|
@ -369,10 +369,24 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
|
||||
struct io *i;
|
||||
|
||||
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
|
||||
c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
|
||||
(bio_op(bio) == REQ_OP_DISCARD))
|
||||
goto skip;
|
||||
|
||||
if (c->gc_stats.in_use > CUTOFF_CACHE_ADD) {
|
||||
/*
|
||||
* If cached buckets are all clean now, 'true' will be
|
||||
* returned and all requests will bypass the cache device.
|
||||
* Then c->sectors_to_gc has no chance to be negative, and
|
||||
* gc thread won't wake up and caching won't work forever.
|
||||
* Here call force_wake_up_gc() to avoid such aftermath.
|
||||
*/
|
||||
if (BDEV_STATE(&dc->sb) == BDEV_STATE_CLEAN &&
|
||||
c->gc_mark_valid)
|
||||
force_wake_up_gc(c);
|
||||
|
||||
goto skip;
|
||||
}
|
||||
|
||||
if (mode == CACHE_MODE_NONE ||
|
||||
(mode == CACHE_MODE_WRITEAROUND &&
|
||||
op_is_write(bio_op(bio))))
|
||||
|
@ -1981,10 +1981,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
if (!dm_table_supports_secure_erase(t))
|
||||
limits->max_secure_erase_sectors = 0;
|
||||
|
||||
r = queue_limits_set(q, limits);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
|
||||
wc = true;
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
|
||||
@ -2036,15 +2032,16 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
* For a zoned target, setup the zones related queue attributes
|
||||
* and resources necessary for zone append emulation if necessary.
|
||||
*/
|
||||
if (blk_queue_is_zoned(q)) {
|
||||
r = dm_set_zones_restrictions(t, q);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
|
||||
r = dm_set_zones_restrictions(t, q, limits);
|
||||
if (r)
|
||||
return r;
|
||||
if (blk_queue_is_zoned(q) &&
|
||||
!static_key_enabled(&zoned_enabled.key))
|
||||
static_branch_enable(&zoned_enabled);
|
||||
}
|
||||
|
||||
r = queue_limits_set(q, limits);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
dm_update_crypto_profile(q, t);
|
||||
|
||||
/*
|
||||
|
@ -160,37 +160,6 @@ static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_check_zoned(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
struct gendisk *disk = md->disk;
|
||||
unsigned int nr_conv_zones = 0;
|
||||
int ret;
|
||||
|
||||
/* Count conventional zones */
|
||||
md->zone_revalidate_map = t;
|
||||
ret = dm_blk_report_zones(disk, 0, UINT_MAX,
|
||||
dm_check_zoned_cb, &nr_conv_zones);
|
||||
md->zone_revalidate_map = NULL;
|
||||
if (ret < 0) {
|
||||
DMERR("Check zoned failed %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we only have conventional zones, expose the mapped device as
|
||||
* a regular device.
|
||||
*/
|
||||
if (nr_conv_zones >= ret) {
|
||||
disk->queue->limits.max_open_zones = 0;
|
||||
disk->queue->limits.max_active_zones = 0;
|
||||
disk->queue->limits.zoned = false;
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
disk->nr_zones = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revalidate the zones of a mapped device to initialize resource necessary
|
||||
* for zone append emulation. Note that we cannot simply use the block layer
|
||||
@ -251,9 +220,12 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
|
||||
return true;
|
||||
}
|
||||
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct mapped_device *md = t->md;
|
||||
struct gendisk *disk = md->disk;
|
||||
unsigned int nr_conv_zones = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -265,21 +237,37 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
} else {
|
||||
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
blk_queue_max_zone_append_sectors(q, 0);
|
||||
lim->max_zone_append_sectors = 0;
|
||||
}
|
||||
|
||||
if (!get_capacity(md->disk))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Check that the mapped device will indeed be zoned, that is, that it
|
||||
* has sequential write required zones.
|
||||
* Count conventional zones to check that the mapped device will indeed
|
||||
* have sequential write required zones.
|
||||
*/
|
||||
ret = dm_check_zoned(md, t);
|
||||
if (ret)
|
||||
md->zone_revalidate_map = t;
|
||||
ret = dm_blk_report_zones(disk, 0, UINT_MAX,
|
||||
dm_check_zoned_cb, &nr_conv_zones);
|
||||
md->zone_revalidate_map = NULL;
|
||||
if (ret < 0) {
|
||||
DMERR("Check zoned failed %d", ret);
|
||||
return ret;
|
||||
if (!blk_queue_is_zoned(q))
|
||||
}
|
||||
|
||||
/*
|
||||
* If we only have conventional zones, expose the mapped device as
|
||||
* a regular device.
|
||||
*/
|
||||
if (nr_conv_zones >= ret) {
|
||||
lim->max_open_zones = 0;
|
||||
lim->max_active_zones = 0;
|
||||
lim->zoned = false;
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
disk->nr_zones = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!md->disk->nr_zones) {
|
||||
DMINFO("%s using %s zone append",
|
||||
@ -287,7 +275,13 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
|
||||
queue_emulates_zone_append(q) ? "emulated" : "native");
|
||||
}
|
||||
|
||||
return dm_revalidate_zones(md, t);
|
||||
ret = dm_revalidate_zones(md, t);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!static_key_enabled(&zoned_enabled.key))
|
||||
static_branch_enable(&zoned_enabled);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -101,7 +101,8 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
|
||||
/*
|
||||
* Zoned targets related functions.
|
||||
*/
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *lim);
|
||||
void dm_zone_endio(struct dm_io *io, struct bio *clone);
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
|
@ -414,7 +414,15 @@ static inline void nvme_end_req_zoned(struct request *req)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvme_end_req(struct request *req)
|
||||
static inline void __nvme_end_req(struct request *req)
|
||||
{
|
||||
nvme_end_req_zoned(req);
|
||||
nvme_trace_bio_complete(req);
|
||||
if (req->cmd_flags & REQ_NVME_MPATH)
|
||||
nvme_mpath_end_request(req);
|
||||
}
|
||||
|
||||
void nvme_end_req(struct request *req)
|
||||
{
|
||||
blk_status_t status = nvme_error_status(nvme_req(req)->status);
|
||||
|
||||
@ -424,10 +432,7 @@ static inline void nvme_end_req(struct request *req)
|
||||
else
|
||||
nvme_log_error(req);
|
||||
}
|
||||
nvme_end_req_zoned(req);
|
||||
nvme_trace_bio_complete(req);
|
||||
if (req->cmd_flags & REQ_NVME_MPATH)
|
||||
nvme_mpath_end_request(req);
|
||||
__nvme_end_req(req);
|
||||
blk_mq_end_request(req, status);
|
||||
}
|
||||
|
||||
@ -476,7 +481,7 @@ void nvme_complete_batch_req(struct request *req)
|
||||
{
|
||||
trace_nvme_complete_rq(req);
|
||||
nvme_cleanup_cmd(req);
|
||||
nvme_end_req_zoned(req);
|
||||
__nvme_end_req(req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_complete_batch_req);
|
||||
|
||||
@ -673,7 +678,7 @@ static void nvme_free_ns(struct kref *kref)
|
||||
kfree(ns);
|
||||
}
|
||||
|
||||
static inline bool nvme_get_ns(struct nvme_ns *ns)
|
||||
bool nvme_get_ns(struct nvme_ns *ns)
|
||||
{
|
||||
return kref_get_unless_zero(&ns->kref);
|
||||
}
|
||||
@ -3679,9 +3684,10 @@ out_unlock:
|
||||
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
{
|
||||
struct nvme_ns *ns, *ret = NULL;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
||||
if (ns->head->ns_id == nsid) {
|
||||
if (!nvme_get_ns(ns))
|
||||
continue;
|
||||
@ -3691,7 +3697,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
if (ns->head->ns_id > nsid)
|
||||
break;
|
||||
}
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
|
||||
@ -3705,7 +3711,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
|
||||
|
||||
list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) {
|
||||
if (tmp->head->ns_id < ns->head->ns_id) {
|
||||
list_add(&ns->list, &tmp->list);
|
||||
list_add_rcu(&ns->list, &tmp->list);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -3771,17 +3777,18 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
|
||||
if (nvme_update_ns_info(ns, info))
|
||||
goto out_unlink_ns;
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
mutex_lock(&ctrl->namespaces_lock);
|
||||
/*
|
||||
* Ensure that no namespaces are added to the ctrl list after the queues
|
||||
* are frozen, thereby avoiding a deadlock between scan and reset.
|
||||
*/
|
||||
if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
mutex_unlock(&ctrl->namespaces_lock);
|
||||
goto out_unlink_ns;
|
||||
}
|
||||
nvme_ns_add_to_ctrl_list(ns);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
mutex_unlock(&ctrl->namespaces_lock);
|
||||
synchronize_srcu(&ctrl->srcu);
|
||||
nvme_get_ctrl(ctrl);
|
||||
|
||||
if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups))
|
||||
@ -3804,9 +3811,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
|
||||
|
||||
out_cleanup_ns_from_list:
|
||||
nvme_put_ctrl(ctrl);
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_del_init(&ns->list);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
mutex_lock(&ctrl->namespaces_lock);
|
||||
list_del_rcu(&ns->list);
|
||||
mutex_unlock(&ctrl->namespaces_lock);
|
||||
synchronize_srcu(&ctrl->srcu);
|
||||
out_unlink_ns:
|
||||
mutex_lock(&ctrl->subsys->lock);
|
||||
list_del_rcu(&ns->siblings);
|
||||
@ -3856,9 +3864,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
|
||||
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
|
||||
del_gendisk(ns->disk);
|
||||
|
||||
down_write(&ns->ctrl->namespaces_rwsem);
|
||||
list_del_init(&ns->list);
|
||||
up_write(&ns->ctrl->namespaces_rwsem);
|
||||
mutex_lock(&ns->ctrl->namespaces_lock);
|
||||
list_del_rcu(&ns->list);
|
||||
mutex_unlock(&ns->ctrl->namespaces_lock);
|
||||
synchronize_srcu(&ns->ctrl->srcu);
|
||||
|
||||
if (last_path)
|
||||
nvme_mpath_shutdown_disk(ns->head);
|
||||
@ -3948,16 +3957,17 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns *ns, *next;
|
||||
LIST_HEAD(rm_list);
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
mutex_lock(&ctrl->namespaces_lock);
|
||||
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
|
||||
if (ns->head->ns_id > nsid)
|
||||
list_move_tail(&ns->list, &rm_list);
|
||||
list_splice_init_rcu(&ns->list, &rm_list,
|
||||
synchronize_rcu);
|
||||
}
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
mutex_unlock(&ctrl->namespaces_lock);
|
||||
synchronize_srcu(&ctrl->srcu);
|
||||
|
||||
list_for_each_entry_safe(ns, next, &rm_list, list)
|
||||
nvme_ns_remove(ns);
|
||||
|
||||
}
|
||||
|
||||
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
|
||||
@ -4127,9 +4137,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
|
||||
/* this is a no-op when called from the controller reset handler */
|
||||
nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_splice_init(&ctrl->namespaces, &ns_list);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
mutex_lock(&ctrl->namespaces_lock);
|
||||
list_splice_init_rcu(&ctrl->namespaces, &ns_list, synchronize_rcu);
|
||||
mutex_unlock(&ctrl->namespaces_lock);
|
||||
synchronize_srcu(&ctrl->srcu);
|
||||
|
||||
list_for_each_entry_safe(ns, next, &ns_list, list)
|
||||
nvme_ns_remove(ns);
|
||||
@ -4577,6 +4588,7 @@ static void nvme_free_ctrl(struct device *dev)
|
||||
key_put(ctrl->tls_key);
|
||||
nvme_free_cels(ctrl);
|
||||
nvme_mpath_uninit(ctrl);
|
||||
cleanup_srcu_struct(&ctrl->srcu);
|
||||
nvme_auth_stop(ctrl);
|
||||
nvme_auth_free(ctrl);
|
||||
__free_page(ctrl->discard_page);
|
||||
@ -4609,10 +4621,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
ctrl->passthru_err_log_enabled = false;
|
||||
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
|
||||
spin_lock_init(&ctrl->lock);
|
||||
mutex_init(&ctrl->namespaces_lock);
|
||||
|
||||
ret = init_srcu_struct(&ctrl->srcu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_init(&ctrl->scan_lock);
|
||||
INIT_LIST_HEAD(&ctrl->namespaces);
|
||||
xa_init(&ctrl->cels);
|
||||
init_rwsem(&ctrl->namespaces_rwsem);
|
||||
ctrl->dev = dev;
|
||||
ctrl->ops = ops;
|
||||
ctrl->quirks = quirks;
|
||||
@ -4692,6 +4709,7 @@ out_release_instance:
|
||||
out:
|
||||
if (ctrl->discard_page)
|
||||
__free_page(ctrl->discard_page);
|
||||
cleanup_srcu_struct(&ctrl->srcu);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
|
||||
@ -4700,22 +4718,24 @@ EXPORT_SYMBOL_GPL(nvme_init_ctrl);
|
||||
void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
|
||||
blk_mark_disk_dead(ns->disk);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead);
|
||||
|
||||
void nvme_unfreeze(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
|
||||
blk_mq_unfreeze_queue(ns->queue);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_unfreeze);
|
||||
@ -4723,14 +4743,15 @@ EXPORT_SYMBOL_GPL(nvme_unfreeze);
|
||||
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
||||
timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
|
||||
if (timeout <= 0)
|
||||
break;
|
||||
}
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
return timeout;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
|
||||
@ -4738,23 +4759,25 @@ EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
|
||||
void nvme_wait_freeze(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
|
||||
blk_mq_freeze_queue_wait(ns->queue);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_wait_freeze);
|
||||
|
||||
void nvme_start_freeze(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
|
||||
blk_freeze_queue_start(ns->queue);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_start_freeze);
|
||||
|
||||
@ -4797,11 +4820,12 @@ EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
|
||||
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
|
||||
blk_sync_queue(ns->queue);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_sync_io_queues);
|
||||
|
||||
|
@ -789,15 +789,15 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
|
||||
bool open_for_write)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int ret;
|
||||
int ret, srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
if (list_empty(&ctrl->namespaces)) {
|
||||
ret = -ENOTTY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
|
||||
ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list);
|
||||
if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
|
||||
dev_warn(ctrl->device,
|
||||
"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
|
||||
@ -807,15 +807,18 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
|
||||
|
||||
dev_warn(ctrl->device,
|
||||
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
|
||||
kref_get(&ns->kref);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
if (!nvme_get_ns(ns)) {
|
||||
ret = -ENXIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
|
||||
ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
|
||||
nvme_put_ns(ns);
|
||||
return ret;
|
||||
|
||||
out_unlock:
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -118,7 +118,8 @@ void nvme_failover_req(struct request *req)
|
||||
blk_steal_bios(&ns->head->requeue_list, req);
|
||||
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
|
||||
|
||||
blk_mq_end_request(req, 0);
|
||||
nvme_req(req)->status = 0;
|
||||
nvme_end_req(req);
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
}
|
||||
|
||||
@ -150,16 +151,17 @@ void nvme_mpath_end_request(struct request *rq)
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
||||
if (!ns->head->disk)
|
||||
continue;
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
|
||||
disk_uevent(ns->head->disk, KOBJ_CHANGE);
|
||||
}
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
static const char *nvme_ana_state_names[] = {
|
||||
@ -193,13 +195,14 @@ out:
|
||||
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
||||
nvme_mpath_clear_current_path(ns);
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
}
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
|
||||
@ -595,7 +598,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||
int node, srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
for_each_node(node)
|
||||
for_each_online_node(node)
|
||||
__nvme_find_path(head, node);
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
}
|
||||
@ -680,6 +683,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
|
||||
unsigned *nr_change_groups = data;
|
||||
struct nvme_ns *ns;
|
||||
int srcu_idx;
|
||||
|
||||
dev_dbg(ctrl->device, "ANA group %d: %s.\n",
|
||||
le32_to_cpu(desc->grpid),
|
||||
@ -691,8 +695,8 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||
if (!nr_nsids)
|
||||
return 0;
|
||||
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
||||
unsigned nsid;
|
||||
again:
|
||||
nsid = le32_to_cpu(desc->nsids[n]);
|
||||
@ -705,7 +709,7 @@ again:
|
||||
if (ns->head->ns_id > nsid)
|
||||
goto again;
|
||||
}
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -282,7 +282,8 @@ struct nvme_ctrl {
|
||||
struct blk_mq_tag_set *tagset;
|
||||
struct blk_mq_tag_set *admin_tagset;
|
||||
struct list_head namespaces;
|
||||
struct rw_semaphore namespaces_rwsem;
|
||||
struct mutex namespaces_lock;
|
||||
struct srcu_struct srcu;
|
||||
struct device ctrl_device;
|
||||
struct device *device; /* char device */
|
||||
#ifdef CONFIG_NVME_HWMON
|
||||
@ -471,8 +472,6 @@ struct nvme_ns_head {
|
||||
u8 pi_type;
|
||||
u8 pi_offset;
|
||||
u8 guard_type;
|
||||
u16 sgs;
|
||||
u32 sws;
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
u64 zsze;
|
||||
#endif
|
||||
@ -767,6 +766,7 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
|
||||
}
|
||||
}
|
||||
|
||||
void nvme_end_req(struct request *req);
|
||||
void nvme_complete_rq(struct request *req);
|
||||
void nvme_complete_batch_req(struct request *req);
|
||||
|
||||
@ -1161,6 +1161,7 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
|
||||
struct nvme_command *cmd, int status);
|
||||
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
|
||||
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
|
||||
bool nvme_get_ns(struct nvme_ns *ns);
|
||||
void nvme_put_ns(struct nvme_ns *ns);
|
||||
|
||||
static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
|
||||
|
@ -778,7 +778,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
||||
struct bio_vec bv = req_bvec(req);
|
||||
|
||||
if (!is_pci_p2pdma_page(bv.bv_page)) {
|
||||
if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
|
||||
if ((bv.bv_offset & (NVME_CTRL_PAGE_SIZE - 1)) +
|
||||
bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
|
||||
return nvme_setup_prp_simple(dev, req,
|
||||
&cmnd->rw, &bv);
|
||||
|
||||
|
@ -676,10 +676,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
|
||||
if (kstrtobool(page, &enable))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* take a global nvmet_config_sem because the disable routine has a
|
||||
* window where it releases the subsys-lock, giving a chance to
|
||||
* a parallel enable to concurrently execute causing the disable to
|
||||
* have a misaccounting of the ns percpu_ref.
|
||||
*/
|
||||
down_write(&nvmet_config_sem);
|
||||
if (enable)
|
||||
ret = nvmet_ns_enable(ns);
|
||||
else
|
||||
nvmet_ns_disable(ns);
|
||||
up_write(&nvmet_config_sem);
|
||||
|
||||
return ret ? ret : count;
|
||||
}
|
||||
|
@ -818,6 +818,15 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
|
||||
percpu_ref_exit(&sq->ref);
|
||||
nvmet_auth_sq_free(sq);
|
||||
|
||||
/*
|
||||
* we must reference the ctrl again after waiting for inflight IO
|
||||
* to complete. Because admin connect may have sneaked in after we
|
||||
* store sq->ctrl locally, but before we killed the percpu_ref. the
|
||||
* admin connect allocates and assigns sq->ctrl, which now needs a
|
||||
* final ref put, as this ctrl is going away.
|
||||
*/
|
||||
ctrl = sq->ctrl;
|
||||
|
||||
if (ctrl) {
|
||||
/*
|
||||
* The teardown flow may take some time, and the host may not
|
||||
|
@ -3700,8 +3700,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
|
||||
*/
|
||||
if (sdkp->first_scan ||
|
||||
q->limits.max_sectors > q->limits.max_dev_sectors ||
|
||||
q->limits.max_sectors > q->limits.max_hw_sectors)
|
||||
q->limits.max_sectors > q->limits.max_hw_sectors) {
|
||||
q->limits.max_sectors = rw_max;
|
||||
q->limits.max_user_sectors = rw_max;
|
||||
}
|
||||
|
||||
sdkp->first_scan = 0;
|
||||
|
||||
|
@ -66,12 +66,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q)
|
||||
return q->integrity.profile;
|
||||
}
|
||||
|
||||
static inline void blk_queue_max_integrity_segments(struct request_queue *q,
|
||||
unsigned int segs)
|
||||
{
|
||||
q->limits.max_integrity_segments = segs;
|
||||
}
|
||||
|
||||
static inline unsigned short
|
||||
queue_max_integrity_segments(const struct request_queue *q)
|
||||
{
|
||||
@ -151,10 +145,6 @@ static inline void blk_integrity_register(struct gendisk *d,
|
||||
static inline void blk_integrity_unregister(struct gendisk *d)
|
||||
{
|
||||
}
|
||||
static inline void blk_queue_max_integrity_segments(struct request_queue *q,
|
||||
unsigned int segs)
|
||||
{
|
||||
}
|
||||
static inline unsigned short
|
||||
queue_max_integrity_segments(const struct request_queue *q)
|
||||
{
|
||||
|
@ -186,6 +186,7 @@ struct gendisk {
|
||||
*/
|
||||
unsigned int nr_zones;
|
||||
unsigned int zone_capacity;
|
||||
unsigned int last_zone_capacity;
|
||||
unsigned long *conv_zones_bitmap;
|
||||
unsigned int zone_wplugs_hash_bits;
|
||||
spinlock_t zone_wplugs_lock;
|
||||
|
Loading…
x
Reference in New Issue
Block a user