nvme: add verbose error logging
Improves logging of NVMe errors. If NVME_VERBOSE_ERRORS is configured, a verbose description of the error is logged, otherwise only status codes/bits is logged. Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com> [kch]: fix several nits, cosmetics, and trim down code. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Alan Adamson <alan.adamson@oracle.com> Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> Reviewed-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
72e8b5cd7d
commit
bd83fe6f2c
@ -24,6 +24,14 @@ config NVME_MULTIPATH
|
||||
/dev/nvmeXnY device will show up for each NVMe namespace,
|
||||
even if it is accessible through multiple controllers.
|
||||
|
||||
config NVME_VERBOSE_ERRORS
|
||||
bool "NVMe verbose error reporting"
|
||||
depends on NVME_CORE
|
||||
help
|
||||
This option enables verbose reporting for NVMe errors. The
|
||||
error translation table will grow the kernel image size by
|
||||
about 4 KB.
|
||||
|
||||
config NVME_HWMON
|
||||
bool "NVMe hardware monitoring"
|
||||
depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
|
||||
|
@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
|
||||
obj-$(CONFIG_NVME_FC) += nvme-fc.o
|
||||
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
|
||||
|
||||
nvme-core-y := core.o ioctl.o
|
||||
nvme-core-y := core.o ioctl.o constants.o
|
||||
nvme-core-$(CONFIG_TRACING) += trace.o
|
||||
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
|
||||
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||
|
185
drivers/nvme/host/constants.c
Normal file
185
drivers/nvme/host/constants.c
Normal file
@ -0,0 +1,185 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NVM Express device driver verbose errors
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include "nvme.h"
|
||||
|
||||
#ifdef CONFIG_NVME_VERBOSE_ERRORS
|
||||
static const char * const nvme_ops[] = {
|
||||
[nvme_cmd_flush] = "Flush",
|
||||
[nvme_cmd_write] = "Write",
|
||||
[nvme_cmd_read] = "Read",
|
||||
[nvme_cmd_write_uncor] = "Write Uncorrectable",
|
||||
[nvme_cmd_compare] = "Compare",
|
||||
[nvme_cmd_write_zeroes] = "Write Zeros",
|
||||
[nvme_cmd_dsm] = "Dataset Management",
|
||||
[nvme_cmd_verify] = "Verify",
|
||||
[nvme_cmd_resv_register] = "Reservation Register",
|
||||
[nvme_cmd_resv_report] = "Reservation Report",
|
||||
[nvme_cmd_resv_acquire] = "Reservation Acquire",
|
||||
[nvme_cmd_resv_release] = "Reservation Release",
|
||||
[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
|
||||
[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
|
||||
[nvme_cmd_zone_append] = "Zone Management Append",
|
||||
};
|
||||
|
||||
static const char * const nvme_admin_ops[] = {
|
||||
[nvme_admin_delete_sq] = "Delete SQ",
|
||||
[nvme_admin_create_sq] = "Create SQ",
|
||||
[nvme_admin_get_log_page] = "Get Log Page",
|
||||
[nvme_admin_delete_cq] = "Delete CQ",
|
||||
[nvme_admin_create_cq] = "Create CQ",
|
||||
[nvme_admin_identify] = "Identify",
|
||||
[nvme_admin_abort_cmd] = "Abort Command",
|
||||
[nvme_admin_set_features] = "Set Features",
|
||||
[nvme_admin_get_features] = "Get Features",
|
||||
[nvme_admin_async_event] = "Async Event",
|
||||
[nvme_admin_ns_mgmt] = "Namespace Management",
|
||||
[nvme_admin_activate_fw] = "Activate Firmware",
|
||||
[nvme_admin_download_fw] = "Download Firmware",
|
||||
[nvme_admin_dev_self_test] = "Device Self Test",
|
||||
[nvme_admin_ns_attach] = "Namespace Attach",
|
||||
[nvme_admin_keep_alive] = "Keep Alive",
|
||||
[nvme_admin_directive_send] = "Directive Send",
|
||||
[nvme_admin_directive_recv] = "Directive Receive",
|
||||
[nvme_admin_virtual_mgmt] = "Virtual Management",
|
||||
[nvme_admin_nvme_mi_send] = "NVMe Send MI",
|
||||
[nvme_admin_nvme_mi_recv] = "NVMe Receive MI",
|
||||
[nvme_admin_dbbuf] = "Doorbell Buffer Config",
|
||||
[nvme_admin_format_nvm] = "Format NVM",
|
||||
[nvme_admin_security_send] = "Security Send",
|
||||
[nvme_admin_security_recv] = "Security Receive",
|
||||
[nvme_admin_sanitize_nvm] = "Sanitize NVM",
|
||||
[nvme_admin_get_lba_status] = "Get LBA Status",
|
||||
};
|
||||
|
||||
static const char * const nvme_statuses[] = {
|
||||
[NVME_SC_SUCCESS] = "Success",
|
||||
[NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode",
|
||||
[NVME_SC_INVALID_FIELD] = "Invalid Field in Command",
|
||||
[NVME_SC_CMDID_CONFLICT] = "Command ID Conflict",
|
||||
[NVME_SC_DATA_XFER_ERROR] = "Data Transfer Error",
|
||||
[NVME_SC_POWER_LOSS] = "Commands Aborted due to Power Loss Notification",
|
||||
[NVME_SC_INTERNAL] = "Internal Error",
|
||||
[NVME_SC_ABORT_REQ] = "Command Abort Requested",
|
||||
[NVME_SC_ABORT_QUEUE] = "Command Aborted due to SQ Deletion",
|
||||
[NVME_SC_FUSED_FAIL] = "Command Aborted due to Failed Fused Command",
|
||||
[NVME_SC_FUSED_MISSING] = "Command Aborted due to Missing Fused Command",
|
||||
[NVME_SC_INVALID_NS] = "Invalid Namespace or Format",
|
||||
[NVME_SC_CMD_SEQ_ERROR] = "Command Sequence Error",
|
||||
[NVME_SC_SGL_INVALID_LAST] = "Invalid SGL Segment Descriptor",
|
||||
[NVME_SC_SGL_INVALID_COUNT] = "Invalid Number of SGL Descriptors",
|
||||
[NVME_SC_SGL_INVALID_DATA] = "Data SGL Length Invalid",
|
||||
[NVME_SC_SGL_INVALID_METADATA] = "Metadata SGL Length Invalid",
|
||||
[NVME_SC_SGL_INVALID_TYPE] = "SGL Descriptor Type Invalid",
|
||||
[NVME_SC_CMB_INVALID_USE] = "Invalid Use of Controller Memory Buffer",
|
||||
[NVME_SC_PRP_INVALID_OFFSET] = "PRP Offset Invalid",
|
||||
[NVME_SC_ATOMIC_WU_EXCEEDED] = "Atomic Write Unit Exceeded",
|
||||
[NVME_SC_OP_DENIED] = "Operation Denied",
|
||||
[NVME_SC_SGL_INVALID_OFFSET] = "SGL Offset Invalid",
|
||||
[NVME_SC_RESERVED] = "Reserved",
|
||||
[NVME_SC_HOST_ID_INCONSIST] = "Host Identifier Inconsistent Format",
|
||||
[NVME_SC_KA_TIMEOUT_EXPIRED] = "Keep Alive Timeout Expired",
|
||||
[NVME_SC_KA_TIMEOUT_INVALID] = "Keep Alive Timeout Invalid",
|
||||
[NVME_SC_ABORTED_PREEMPT_ABORT] = "Command Aborted due to Preempt and Abort",
|
||||
[NVME_SC_SANITIZE_FAILED] = "Sanitize Failed",
|
||||
[NVME_SC_SANITIZE_IN_PROGRESS] = "Sanitize In Progress",
|
||||
[NVME_SC_SGL_INVALID_GRANULARITY] = "SGL Data Block Granularity Invalid",
|
||||
[NVME_SC_CMD_NOT_SUP_CMB_QUEUE] = "Command Not Supported for Queue in CMB",
|
||||
[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
|
||||
[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
|
||||
[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
|
||||
[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
|
||||
[NVME_SC_LBA_RANGE] = "LBA Out of Range",
|
||||
[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
|
||||
[NVME_SC_NS_NOT_READY] = "Namespace Not Ready",
|
||||
[NVME_SC_RESERVATION_CONFLICT] = "Reservation Conflict",
|
||||
[NVME_SC_FORMAT_IN_PROGRESS] = "Format In Progress",
|
||||
[NVME_SC_CQ_INVALID] = "Completion Queue Invalid",
|
||||
[NVME_SC_QID_INVALID] = "Invalid Queue Identifier",
|
||||
[NVME_SC_QUEUE_SIZE] = "Invalid Queue Size",
|
||||
[NVME_SC_ABORT_LIMIT] = "Abort Command Limit Exceeded",
|
||||
[NVME_SC_ABORT_MISSING] = "Reserved", /* XXX */
|
||||
[NVME_SC_ASYNC_LIMIT] = "Asynchronous Event Request Limit Exceeded",
|
||||
[NVME_SC_FIRMWARE_SLOT] = "Invalid Firmware Slot",
|
||||
[NVME_SC_FIRMWARE_IMAGE] = "Invalid Firmware Image",
|
||||
[NVME_SC_INVALID_VECTOR] = "Invalid Interrupt Vector",
|
||||
[NVME_SC_INVALID_LOG_PAGE] = "Invalid Log Page",
|
||||
[NVME_SC_INVALID_FORMAT] = "Invalid Format",
|
||||
[NVME_SC_FW_NEEDS_CONV_RESET] = "Firmware Activation Requires Conventional Reset",
|
||||
[NVME_SC_INVALID_QUEUE] = "Invalid Queue Deletion",
|
||||
[NVME_SC_FEATURE_NOT_SAVEABLE] = "Feature Identifier Not Saveable",
|
||||
[NVME_SC_FEATURE_NOT_CHANGEABLE] = "Feature Not Changeable",
|
||||
[NVME_SC_FEATURE_NOT_PER_NS] = "Feature Not Namespace Specific",
|
||||
[NVME_SC_FW_NEEDS_SUBSYS_RESET] = "Firmware Activation Requires NVM Subsystem Reset",
|
||||
[NVME_SC_FW_NEEDS_RESET] = "Firmware Activation Requires Reset",
|
||||
[NVME_SC_FW_NEEDS_MAX_TIME] = "Firmware Activation Requires Maximum Time Violation",
|
||||
[NVME_SC_FW_ACTIVATE_PROHIBITED] = "Firmware Activation Prohibited",
|
||||
[NVME_SC_OVERLAPPING_RANGE] = "Overlapping Range",
|
||||
[NVME_SC_NS_INSUFFICIENT_CAP] = "Namespace Insufficient Capacity",
|
||||
[NVME_SC_NS_ID_UNAVAILABLE] = "Namespace Identifier Unavailable",
|
||||
[NVME_SC_NS_ALREADY_ATTACHED] = "Namespace Already Attached",
|
||||
[NVME_SC_NS_IS_PRIVATE] = "Namespace Is Private",
|
||||
[NVME_SC_NS_NOT_ATTACHED] = "Namespace Not Attached",
|
||||
[NVME_SC_THIN_PROV_NOT_SUPP] = "Thin Provisioning Not Supported",
|
||||
[NVME_SC_CTRL_LIST_INVALID] = "Controller List Invalid",
|
||||
[NVME_SC_SELT_TEST_IN_PROGRESS] = "Device Self-test In Progress",
|
||||
[NVME_SC_BP_WRITE_PROHIBITED] = "Boot Partition Write Prohibited",
|
||||
[NVME_SC_CTRL_ID_INVALID] = "Invalid Controller Identifier",
|
||||
[NVME_SC_SEC_CTRL_STATE_INVALID] = "Invalid Secondary Controller State",
|
||||
[NVME_SC_CTRL_RES_NUM_INVALID] = "Invalid Number of Controller Resources",
|
||||
[NVME_SC_RES_ID_INVALID] = "Invalid Resource Identifier",
|
||||
[NVME_SC_PMR_SAN_PROHIBITED] = "Sanitize Prohibited",
|
||||
[NVME_SC_ANA_GROUP_ID_INVALID] = "ANA Group Identifier Invalid",
|
||||
[NVME_SC_ANA_ATTACH_FAILED] = "ANA Attach Failed",
|
||||
[NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes",
|
||||
[NVME_SC_INVALID_PI] = "Invalid Protection Information",
|
||||
[NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range",
|
||||
[NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported",
|
||||
[NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error",
|
||||
[NVME_SC_ZONE_FULL] = "Zone Is Full",
|
||||
[NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only",
|
||||
[NVME_SC_ZONE_OFFLINE] = "Zone Is Offline",
|
||||
[NVME_SC_ZONE_INVALID_WRITE] = "Zone Invalid Write",
|
||||
[NVME_SC_ZONE_TOO_MANY_ACTIVE] = "Too Many Active Zones",
|
||||
[NVME_SC_ZONE_TOO_MANY_OPEN] = "Too Many Open Zones",
|
||||
[NVME_SC_ZONE_INVALID_TRANSITION] = "Invalid Zone State Transition",
|
||||
[NVME_SC_WRITE_FAULT] = "Write Fault",
|
||||
[NVME_SC_READ_ERROR] = "Unrecovered Read Error",
|
||||
[NVME_SC_GUARD_CHECK] = "End-to-end Guard Check Error",
|
||||
[NVME_SC_APPTAG_CHECK] = "End-to-end Application Tag Check Error",
|
||||
[NVME_SC_REFTAG_CHECK] = "End-to-end Reference Tag Check Error",
|
||||
[NVME_SC_COMPARE_FAILED] = "Compare Failure",
|
||||
[NVME_SC_ACCESS_DENIED] = "Access Denied",
|
||||
[NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
|
||||
[NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
|
||||
[NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
|
||||
[NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
|
||||
[NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
|
||||
};
|
||||
|
||||
const unsigned char *nvme_get_error_status_str(u16 status)
|
||||
{
|
||||
status &= 0x7ff;
|
||||
if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
|
||||
return nvme_statuses[status & 0x7ff];
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
const unsigned char *nvme_get_opcode_str(u8 opcode)
|
||||
{
|
||||
if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode])
|
||||
return nvme_ops[opcode];
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||
{
|
||||
if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode])
|
||||
return nvme_admin_ops[opcode];
|
||||
return "Unknown";
|
||||
}
|
||||
#endif /* CONFIG_NVME_VERBOSE_ERRORS */
|
@ -299,6 +299,37 @@ static void nvme_retry_req(struct request *req)
|
||||
blk_mq_delay_kick_requeue_list(req->q, delay);
|
||||
}
|
||||
|
||||
static void nvme_log_error(struct request *req)
|
||||
{
|
||||
struct nvme_ns *ns = req->q->queuedata;
|
||||
struct nvme_request *nr = nvme_req(req);
|
||||
|
||||
if (ns) {
|
||||
pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
|
||||
ns->disk ? ns->disk->disk_name : "?",
|
||||
nvme_get_opcode_str(nr->cmd->common.opcode),
|
||||
nr->cmd->common.opcode,
|
||||
(unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)),
|
||||
(unsigned long long)blk_rq_bytes(req) >> ns->lba_shift,
|
||||
nvme_get_error_status_str(nr->status),
|
||||
nr->status >> 8 & 7, /* Status Code Type */
|
||||
nr->status & 0xff, /* Status Code */
|
||||
nr->status & NVME_SC_MORE ? "MORE " : "",
|
||||
nr->status & NVME_SC_DNR ? "DNR " : "");
|
||||
return;
|
||||
}
|
||||
|
||||
pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s\n",
|
||||
dev_name(nr->ctrl->device),
|
||||
nvme_get_admin_opcode_str(nr->cmd->common.opcode),
|
||||
nr->cmd->common.opcode,
|
||||
nvme_get_error_status_str(nr->status),
|
||||
nr->status >> 8 & 7, /* Status Code Type */
|
||||
nr->status & 0xff, /* Status Code */
|
||||
nr->status & NVME_SC_MORE ? "MORE " : "",
|
||||
nr->status & NVME_SC_DNR ? "DNR " : "");
|
||||
}
|
||||
|
||||
enum nvme_disposition {
|
||||
COMPLETE,
|
||||
RETRY,
|
||||
@ -339,6 +370,8 @@ static inline void nvme_end_req(struct request *req)
|
||||
{
|
||||
blk_status_t status = nvme_error_status(nvme_req(req)->status);
|
||||
|
||||
if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS))
|
||||
nvme_log_error(req);
|
||||
nvme_end_req_zoned(req);
|
||||
nvme_trace_bio_complete(req);
|
||||
blk_mq_end_request(req, status);
|
||||
|
@ -938,4 +938,23 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
|
||||
return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVME_VERBOSE_ERRORS
|
||||
const unsigned char *nvme_get_error_status_str(u16 status);
|
||||
const unsigned char *nvme_get_opcode_str(u8 opcode);
|
||||
const unsigned char *nvme_get_admin_opcode_str(u8 opcode);
|
||||
#else /* CONFIG_NVME_VERBOSE_ERRORS */
|
||||
static inline const unsigned char *nvme_get_error_status_str(u16 status)
|
||||
{
|
||||
return "I/O Error";
|
||||
}
|
||||
static inline const unsigned char *nvme_get_opcode_str(u8 opcode)
|
||||
{
|
||||
return "I/O Cmd";
|
||||
}
|
||||
static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||
{
|
||||
return "Admin Cmd";
|
||||
}
|
||||
#endif /* CONFIG_NVME_VERBOSE_ERRORS */
|
||||
|
||||
#endif /* _NVME_H */
|
||||
|
@ -1636,6 +1636,7 @@ enum {
|
||||
NVME_SC_HOST_ABORTED_CMD = 0x371,
|
||||
|
||||
NVME_SC_CRD = 0x1800,
|
||||
NVME_SC_MORE = 0x2000,
|
||||
NVME_SC_DNR = 0x4000,
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user