block-5.10-2020-11-07
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+m/y4QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgplQwEACb6FFsEJRn2xnFkyZUQE1QAIEQJWsSAaV+ xK0u+srqyy+sXvSSQ8wy6FsT3Fgs0CYU8SFkCW/F+EdnqzY0L03RTX7qU+vVtLXC CmAOQACKPKplCLROz9hontPwNIyheXzhkD+sBaFlCvgjMJpyZvQfkJSKGcgTkMy3 ZgQB8j9+3qhkRhk0yA2gjW+uH2EKHyjYQSPUHDhgxzES+ZrwEhXmNc0eKbBDr74R +SYJYs2A5ZcauoCR0IYnPq2XiK3kbv3NMRWIgxVN1LWde7RqNo+iCgP1ps06Ax9i W5WPxMq9Y9IFacM/bCHIu+TsJMg8poWmYQ/VlCXT8deH2V1dWZ5L2HnhOPVInh3o P//3B+rCyGpMKe5s4JjfHddtDhrlbfvyIWg3LEqUxH5VmXtUYgq9pAVOwqaxZMkO U2cMWRUgDUVcta2ulQfcCqaQuIC5Mp+Up4HuYfe4jmYeKWMFCfPlCx0ZsIuR9hkz wcgODZLHMbEEMKydFnQDoQjngjIpYqr8TJAMjQ0P1c7sEYlVE0wXmQb/uXL7BPPa QU7+RxeWiMXDJTn9ZJAKkeWPvFC++JcE0QfZysSRO4eb/ny2i2xalJI6d8v4xwvU Q/VgVQeQnhbeves8hwsjQrjSU/oPBCiiMYwBRQikpGbRG5P2NadcEox0MfzS6ojT a/bQAoOR9A== =BcVw -----END PGP SIGNATURE----- Merge tag 'block-5.10-2020-11-07' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Christoph: - revert a nvme_queue size optimization (Keith Bush) - fabrics timeout races fixes (Chao Leng and Sagi Grimberg)" - null_blk zone locking fix (Damien) * tag 'block-5.10-2020-11-07' of git://git.kernel.dk/linux-block: null_blk: Fix scheduling in atomic with zoned mode nvme-tcp: avoid repeated request completion nvme-rdma: avoid repeated request completion nvme-tcp: avoid race between time out and tear down nvme-rdma: avoid race between time out and tear down nvme: introduce nvme_sync_io_queues Revert "nvme-pci: remove last_sq_tail"
This commit is contained in:
commit
4429f14aee
@ -47,7 +47,7 @@ struct nullb_device {
|
||||
unsigned int nr_zones_closed;
|
||||
struct blk_zone *zones;
|
||||
sector_t zone_size_sects;
|
||||
spinlock_t zone_dev_lock;
|
||||
spinlock_t zone_lock;
|
||||
unsigned long *zone_locks;
|
||||
|
||||
unsigned long size; /* device size in MB */
|
||||
|
@ -46,11 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
if (!dev->zones)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&dev->zone_dev_lock);
|
||||
dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
|
||||
if (!dev->zone_locks) {
|
||||
kvfree(dev->zones);
|
||||
return -ENOMEM;
|
||||
/*
|
||||
* With memory backing, the zone_lock spinlock needs to be temporarily
|
||||
* released to avoid scheduling in atomic context. To guarantee zone
|
||||
* information protection, use a bitmap to lock zones with
|
||||
* wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
|
||||
* implies that the queue is marked with BLK_MQ_F_BLOCKING.
|
||||
*/
|
||||
spin_lock_init(&dev->zone_lock);
|
||||
if (dev->memory_backed) {
|
||||
dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
|
||||
if (!dev->zone_locks) {
|
||||
kvfree(dev->zones);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (dev->zone_nr_conv >= dev->nr_zones) {
|
||||
@ -137,12 +146,17 @@ void null_free_zoned_dev(struct nullb_device *dev)
|
||||
|
||||
static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
|
||||
{
|
||||
wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
|
||||
if (dev->memory_backed)
|
||||
wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
|
||||
spin_lock_irq(&dev->zone_lock);
|
||||
}
|
||||
|
||||
static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
|
||||
{
|
||||
clear_and_wake_up_bit(zno, dev->zone_locks);
|
||||
spin_unlock_irq(&dev->zone_lock);
|
||||
|
||||
if (dev->memory_backed)
|
||||
clear_and_wake_up_bit(zno, dev->zone_locks);
|
||||
}
|
||||
|
||||
int null_report_zones(struct gendisk *disk, sector_t sector,
|
||||
@ -322,7 +336,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
|
||||
|
||||
null_lock_zone(dev, zno);
|
||||
spin_lock(&dev->zone_dev_lock);
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_FULL:
|
||||
@ -375,9 +388,17 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
|
||||
spin_unlock(&dev->zone_dev_lock);
|
||||
/*
|
||||
* Memory backing allocation may sleep: release the zone_lock spinlock
|
||||
* to avoid scheduling in atomic context. Zone operation atomicity is
|
||||
* still guaranteed through the zone_locks bitmap.
|
||||
*/
|
||||
if (dev->memory_backed)
|
||||
spin_unlock_irq(&dev->zone_lock);
|
||||
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
|
||||
spin_lock(&dev->zone_dev_lock);
|
||||
if (dev->memory_backed)
|
||||
spin_lock_irq(&dev->zone_lock);
|
||||
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock;
|
||||
|
||||
@ -392,7 +413,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
ret = BLK_STS_OK;
|
||||
|
||||
unlock:
|
||||
spin_unlock(&dev->zone_dev_lock);
|
||||
null_unlock_zone(dev, zno);
|
||||
|
||||
return ret;
|
||||
@ -516,9 +536,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
|
||||
null_lock_zone(dev, i);
|
||||
zone = &dev->zones[i];
|
||||
if (zone->cond != BLK_ZONE_COND_EMPTY) {
|
||||
spin_lock(&dev->zone_dev_lock);
|
||||
null_reset_zone(dev, zone);
|
||||
spin_unlock(&dev->zone_dev_lock);
|
||||
trace_nullb_zone_op(cmd, i, zone->cond);
|
||||
}
|
||||
null_unlock_zone(dev, i);
|
||||
@ -530,7 +548,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
|
||||
zone = &dev->zones[zone_no];
|
||||
|
||||
null_lock_zone(dev, zone_no);
|
||||
spin_lock(&dev->zone_dev_lock);
|
||||
|
||||
switch (op) {
|
||||
case REQ_OP_ZONE_RESET:
|
||||
@ -550,8 +567,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
|
||||
break;
|
||||
}
|
||||
|
||||
spin_unlock(&dev->zone_dev_lock);
|
||||
|
||||
if (ret == BLK_STS_OK)
|
||||
trace_nullb_zone_op(cmd, zone_no, zone->cond);
|
||||
|
||||
|
@ -4582,8 +4582,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_start_queues);
|
||||
|
||||
|
||||
void nvme_sync_queues(struct nvme_ctrl *ctrl)
|
||||
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
@ -4591,7 +4590,12 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
blk_sync_queue(ns->queue);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_sync_io_queues);
|
||||
|
||||
void nvme_sync_queues(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
nvme_sync_io_queues(ctrl);
|
||||
if (ctrl->admin_q)
|
||||
blk_sync_queue(ctrl->admin_q);
|
||||
}
|
||||
|
@ -602,6 +602,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl);
|
||||
void nvme_start_queues(struct nvme_ctrl *ctrl);
|
||||
void nvme_kill_queues(struct nvme_ctrl *ctrl);
|
||||
void nvme_sync_queues(struct nvme_ctrl *ctrl);
|
||||
void nvme_sync_io_queues(struct nvme_ctrl *ctrl);
|
||||
void nvme_unfreeze(struct nvme_ctrl *ctrl);
|
||||
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
|
||||
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
|
||||
|
@ -198,6 +198,7 @@ struct nvme_queue {
|
||||
u32 q_depth;
|
||||
u16 cq_vector;
|
||||
u16 sq_tail;
|
||||
u16 last_sq_tail;
|
||||
u16 cq_head;
|
||||
u16 qid;
|
||||
u8 cq_phase;
|
||||
@ -455,11 +456,24 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
|
||||
/*
|
||||
* Write sq tail if we are asked to, or if the next command would wrap.
|
||||
*/
|
||||
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
|
||||
{
|
||||
if (!write_sq) {
|
||||
u16 next_tail = nvmeq->sq_tail + 1;
|
||||
|
||||
if (next_tail == nvmeq->q_depth)
|
||||
next_tail = 0;
|
||||
if (next_tail != nvmeq->last_sq_tail)
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
|
||||
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
|
||||
writel(nvmeq->sq_tail, nvmeq->q_db);
|
||||
nvmeq->last_sq_tail = nvmeq->sq_tail;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -476,8 +490,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
|
||||
cmd, sizeof(*cmd));
|
||||
if (++nvmeq->sq_tail == nvmeq->q_depth)
|
||||
nvmeq->sq_tail = 0;
|
||||
if (write_sq)
|
||||
nvme_write_sq_db(nvmeq);
|
||||
nvme_write_sq_db(nvmeq, write_sq);
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
}
|
||||
|
||||
@ -486,7 +499,8 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
|
||||
struct nvme_queue *nvmeq = hctx->driver_data;
|
||||
|
||||
spin_lock(&nvmeq->sq_lock);
|
||||
nvme_write_sq_db(nvmeq);
|
||||
if (nvmeq->sq_tail != nvmeq->last_sq_tail)
|
||||
nvme_write_sq_db(nvmeq, true);
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
}
|
||||
|
||||
@ -1496,6 +1510,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
|
||||
struct nvme_dev *dev = nvmeq->dev;
|
||||
|
||||
nvmeq->sq_tail = 0;
|
||||
nvmeq->last_sq_tail = 0;
|
||||
nvmeq->cq_head = 0;
|
||||
nvmeq->cq_phase = 1;
|
||||
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
|
||||
|
@ -122,7 +122,6 @@ struct nvme_rdma_ctrl {
|
||||
struct sockaddr_storage src_addr;
|
||||
|
||||
struct nvme_ctrl ctrl;
|
||||
struct mutex teardown_lock;
|
||||
bool use_inline_data;
|
||||
u32 io_queues[HCTX_MAX_TYPES];
|
||||
};
|
||||
@ -1010,8 +1009,8 @@ out_free_io_queues:
|
||||
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
bool remove)
|
||||
{
|
||||
mutex_lock(&ctrl->teardown_lock);
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
blk_sync_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
if (ctrl->ctrl.admin_tagset) {
|
||||
blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
|
||||
@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
if (remove)
|
||||
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, remove);
|
||||
mutex_unlock(&ctrl->teardown_lock);
|
||||
}
|
||||
|
||||
static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
|
||||
bool remove)
|
||||
{
|
||||
mutex_lock(&ctrl->teardown_lock);
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_start_freeze(&ctrl->ctrl);
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
nvme_sync_io_queues(&ctrl->ctrl);
|
||||
nvme_rdma_stop_io_queues(ctrl);
|
||||
if (ctrl->ctrl.tagset) {
|
||||
blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
|
||||
@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
|
||||
nvme_start_queues(&ctrl->ctrl);
|
||||
nvme_rdma_destroy_io_queues(ctrl, remove);
|
||||
}
|
||||
mutex_unlock(&ctrl->teardown_lock);
|
||||
}
|
||||
|
||||
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
|
||||
@ -1976,16 +1973,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
|
||||
{
|
||||
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
||||
struct nvme_rdma_queue *queue = req->queue;
|
||||
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
|
||||
|
||||
/* fence other contexts that may complete the command */
|
||||
mutex_lock(&ctrl->teardown_lock);
|
||||
nvme_rdma_stop_queue(queue);
|
||||
if (!blk_mq_request_completed(rq)) {
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
mutex_unlock(&ctrl->teardown_lock);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return
|
||||
@ -2320,7 +2313,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ctrl->ctrl.opts = opts;
|
||||
INIT_LIST_HEAD(&ctrl->list);
|
||||
mutex_init(&ctrl->teardown_lock);
|
||||
|
||||
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
|
||||
opts->trsvcid =
|
||||
|
@ -124,7 +124,6 @@ struct nvme_tcp_ctrl {
|
||||
struct sockaddr_storage src_addr;
|
||||
struct nvme_ctrl ctrl;
|
||||
|
||||
struct mutex teardown_lock;
|
||||
struct work_struct err_work;
|
||||
struct delayed_work connect_work;
|
||||
struct nvme_tcp_request async_req;
|
||||
@ -1886,8 +1885,8 @@ out_free_queue:
|
||||
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
|
||||
bool remove)
|
||||
{
|
||||
mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
|
||||
blk_mq_quiesce_queue(ctrl->admin_q);
|
||||
blk_sync_queue(ctrl->admin_q);
|
||||
nvme_tcp_stop_queue(ctrl, 0);
|
||||
if (ctrl->admin_tagset) {
|
||||
blk_mq_tagset_busy_iter(ctrl->admin_tagset,
|
||||
@ -1897,18 +1896,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
|
||||
if (remove)
|
||||
blk_mq_unquiesce_queue(ctrl->admin_q);
|
||||
nvme_tcp_destroy_admin_queue(ctrl, remove);
|
||||
mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
|
||||
}
|
||||
|
||||
static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
|
||||
bool remove)
|
||||
{
|
||||
mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
|
||||
if (ctrl->queue_count <= 1)
|
||||
goto out;
|
||||
return;
|
||||
blk_mq_quiesce_queue(ctrl->admin_q);
|
||||
nvme_start_freeze(ctrl);
|
||||
nvme_stop_queues(ctrl);
|
||||
nvme_sync_io_queues(ctrl);
|
||||
nvme_tcp_stop_io_queues(ctrl);
|
||||
if (ctrl->tagset) {
|
||||
blk_mq_tagset_busy_iter(ctrl->tagset,
|
||||
@ -1918,8 +1916,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
|
||||
if (remove)
|
||||
nvme_start_queues(ctrl);
|
||||
nvme_tcp_destroy_io_queues(ctrl, remove);
|
||||
out:
|
||||
mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
|
||||
}
|
||||
|
||||
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
|
||||
@ -2171,14 +2167,11 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
|
||||
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
|
||||
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
|
||||
|
||||
/* fence other contexts that may complete the command */
|
||||
mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
|
||||
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
|
||||
if (!blk_mq_request_completed(rq)) {
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return
|
||||
@ -2455,7 +2448,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
|
||||
nvme_tcp_reconnect_ctrl_work);
|
||||
INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
|
||||
INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
|
||||
mutex_init(&ctrl->teardown_lock);
|
||||
|
||||
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
|
||||
opts->trsvcid =
|
||||
|
Loading…
Reference in New Issue
Block a user