Merge branch 'nvme-5.8' of git://git.infradead.org/nvme into block-5.8
Pull NVMe fixes from Christoph. * 'nvme-5.8' of git://git.infradead.org/nvme: nvme-multipath: fix bogus request queue reference put nvme-multipath: fix deadlock due to head->lock nvme: don't protect ns mutation with ns->head->lock nvme-multipath: fix deadlock between ana_work and scan_work nvme: fix possible deadlock when I/O is blocked nvme-rdma: assign completion vector correctly nvme-loop: initialize tagset numa value to the value of the ctrl nvme-tcp: initialize tagset numa value to the value of the ctrl nvme-pci: initialize tagset numa value to the value of the ctrl nvme-pci: override the value of the controller's numa node nvme: set initial value for controller's numa node
This commit is contained in:
commit
1b52671d79
@ -1974,7 +1974,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
|
||||
if (ns->head->disk) {
|
||||
nvme_update_disk_info(ns->head->disk, ns, id);
|
||||
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
|
||||
revalidate_disk(ns->head->disk);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
@ -4174,6 +4173,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
ctrl->dev = dev;
|
||||
ctrl->ops = ops;
|
||||
ctrl->quirks = quirks;
|
||||
ctrl->numa_node = NUMA_NO_NODE;
|
||||
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
|
||||
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
|
||||
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
|
||||
|
@ -409,15 +409,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||
{
|
||||
struct nvme_ns_head *head = ns->head;
|
||||
|
||||
lockdep_assert_held(&ns->head->lock);
|
||||
|
||||
if (!head->disk)
|
||||
return;
|
||||
|
||||
if (!(head->disk->flags & GENHD_FL_UP))
|
||||
if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
|
||||
device_add_disk(&head->subsys->dev, head->disk,
|
||||
nvme_ns_id_attr_groups);
|
||||
|
||||
mutex_lock(&head->lock);
|
||||
if (nvme_path_is_optimized(ns)) {
|
||||
int node, srcu_idx;
|
||||
|
||||
@ -426,9 +425,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||
__nvme_find_path(head, node);
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
}
|
||||
mutex_unlock(&head->lock);
|
||||
|
||||
synchronize_srcu(&ns->head->srcu);
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
synchronize_srcu(&head->srcu);
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
}
|
||||
|
||||
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
|
||||
@ -483,14 +483,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
|
||||
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
|
||||
struct nvme_ns *ns)
|
||||
{
|
||||
mutex_lock(&ns->head->lock);
|
||||
ns->ana_grpid = le32_to_cpu(desc->grpid);
|
||||
ns->ana_state = desc->state;
|
||||
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
||||
|
||||
if (nvme_state_is_live(ns->ana_state))
|
||||
nvme_mpath_set_live(ns);
|
||||
mutex_unlock(&ns->head->lock);
|
||||
}
|
||||
|
||||
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||
@ -640,31 +638,37 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
|
||||
}
|
||||
DEVICE_ATTR_RO(ana_state);
|
||||
|
||||
static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
|
||||
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ana_group_desc *desc, void *data)
|
||||
{
|
||||
struct nvme_ns *ns = data;
|
||||
struct nvme_ana_group_desc *dst = data;
|
||||
|
||||
if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
|
||||
nvme_update_ns_ana_state(desc, ns);
|
||||
return -ENXIO; /* just break out of the loop */
|
||||
}
|
||||
if (desc->grpid != dst->grpid)
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
*dst = *desc;
|
||||
return -ENXIO; /* just break out of the loop */
|
||||
}
|
||||
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
{
|
||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||
struct nvme_ana_group_desc desc = {
|
||||
.grpid = id->anagrpid,
|
||||
.state = 0,
|
||||
};
|
||||
|
||||
mutex_lock(&ns->ctrl->ana_lock);
|
||||
ns->ana_grpid = le32_to_cpu(id->anagrpid);
|
||||
nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
|
||||
nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
|
||||
mutex_unlock(&ns->ctrl->ana_lock);
|
||||
if (desc.state) {
|
||||
/* found the group desc: update */
|
||||
nvme_update_ns_ana_state(&desc, ns);
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&ns->head->lock);
|
||||
ns->ana_state = NVME_ANA_OPTIMIZED;
|
||||
nvme_mpath_set_live(ns);
|
||||
mutex_unlock(&ns->head->lock);
|
||||
}
|
||||
|
||||
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
|
||||
@ -686,6 +690,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
flush_work(&head->requeue_work);
|
||||
blk_cleanup_queue(head->disk->queue);
|
||||
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||
/*
|
||||
* if device_add_disk wasn't called, prevent
|
||||
* disk release to put a bogus reference on the
|
||||
* request queue
|
||||
*/
|
||||
head->disk->queue = NULL;
|
||||
}
|
||||
put_disk(head->disk);
|
||||
}
|
||||
|
||||
|
@ -364,6 +364,8 @@ struct nvme_ns_head {
|
||||
spinlock_t requeue_lock;
|
||||
struct work_struct requeue_work;
|
||||
struct mutex lock;
|
||||
unsigned long flags;
|
||||
#define NVME_NSHEAD_DISK_LIVE 0
|
||||
struct nvme_ns __rcu *current_path[];
|
||||
#endif
|
||||
};
|
||||
|
@ -1593,7 +1593,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
||||
|
||||
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
|
||||
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
|
||||
dev->admin_tagset.numa_node = dev->ctrl.numa_node;
|
||||
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
|
||||
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
|
||||
dev->admin_tagset.driver_data = dev;
|
||||
@ -1669,6 +1669,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
dev->ctrl.numa_node = dev_to_node(dev->dev);
|
||||
|
||||
nvmeq = &dev->queues[0];
|
||||
aqa = nvmeq->q_depth - 1;
|
||||
aqa |= aqa << 16;
|
||||
@ -2257,7 +2259,7 @@ static void nvme_dev_add(struct nvme_dev *dev)
|
||||
if (dev->io_queues[HCTX_TYPE_POLL])
|
||||
dev->tagset.nr_maps++;
|
||||
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
||||
dev->tagset.numa_node = dev_to_node(dev->dev);
|
||||
dev->tagset.numa_node = dev->ctrl.numa_node;
|
||||
dev->tagset.queue_depth =
|
||||
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
|
||||
dev->tagset.cmd_size = sizeof(struct nvme_iod);
|
||||
|
@ -470,7 +470,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
|
||||
* Spread I/O queues completion vectors according their queue index.
|
||||
* Admin queues can always go on completion vector 0.
|
||||
*/
|
||||
comp_vector = idx == 0 ? idx : idx - 1;
|
||||
comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
|
||||
|
||||
/* Polling queues need direct cq polling context */
|
||||
if (nvme_rdma_poll_queue(queue))
|
||||
|
@ -1532,7 +1532,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
||||
set->ops = &nvme_tcp_admin_mq_ops;
|
||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
set->reserved_tags = 2; /* connect + keep-alive */
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
set->driver_data = ctrl;
|
||||
@ -1544,7 +1544,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
||||
set->ops = &nvme_tcp_mq_ops;
|
||||
set->queue_depth = nctrl->sqsize + 1;
|
||||
set->reserved_tags = 1; /* fabric connect */
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
set->numa_node = nctrl->numa_node;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||
set->driver_data = ctrl;
|
||||
|
@ -340,7 +340,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
|
||||
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
|
||||
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
|
||||
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
|
||||
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||
ctrl->admin_tag_set.driver_data = ctrl;
|
||||
@ -512,7 +512,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
|
||||
ctrl->tag_set.ops = &nvme_loop_mq_ops;
|
||||
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
|
||||
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
|
||||
ctrl->tag_set.numa_node = NUMA_NO_NODE;
|
||||
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||
|
Loading…
x
Reference in New Issue
Block a user