scsi: mpt3sas: Added support for nvme encapsulated request message.

* Mpt3sas driver uses the NVMe Encapsulated Request message to send an
  NVMe command to an NVMe device attached to the IOC.

* Normal I/O commands like reads and writes are passed to the
  controller as SCSI commands and the controller has the ability to
  translate the commands to NVMe equivalent.

* This encapsulated NVMe command is used by applications to send
  direct NVMe commands to NVMe drives.

Signed-off-by: Chaitra P B <chaitra.basappa@broadcom.com>
Signed-off-by: Suganath Prabu S <suganath-prabu.subramani@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Suganath Prabu Subramani 2017-10-31 18:02:29 +05:30 committed by Martin K. Petersen
parent 016d5c35e2
commit aff39e6121
3 changed files with 342 additions and 7 deletions

View File

@ -557,6 +557,11 @@ _base_sas_ioc_info(struct MPT3SAS_ADAPTER *ioc, MPI2DefaultReply_t *mpi_reply,
frame_sz = sizeof(Mpi2SmpPassthroughRequest_t) + ioc->sge_size;
func_str = "smp_passthru";
break;
case MPI2_FUNCTION_NVME_ENCAPSULATED:
frame_sz = sizeof(Mpi26NVMeEncapsulatedRequest_t) +
ioc->sge_size;
func_str = "nvme_encapsulated";
break;
default:
frame_sz = 32;
func_str = "unknown";
@ -985,7 +990,9 @@ _base_interrupt(int irq, void *bus_id)
if (request_desript_type ==
MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS ||
request_desript_type ==
MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS) {
MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS ||
request_desript_type ==
MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS) {
cb_idx = _base_get_cb_idx(ioc, smid);
if ((likely(cb_idx < MPT_MAX_CALLBACKS)) &&
(likely(mpt_callbacks[cb_idx] != NULL))) {
@ -1345,6 +1352,225 @@ _base_build_sg(struct MPT3SAS_ADAPTER *ioc, void *psge,
}
}
/* IEEE format sgls */
/**
* _base_build_nvme_prp - This function is called for NVMe end devices to build
* a native SGL (NVMe PRP). The native SGL is built starting in the first PRP
* entry of the NVMe message (PRP1). If the data buffer is small enough to be
* described entirely using PRP1, then PRP2 is not used. If needed, PRP2 is
* used to describe a larger data buffer. If the data buffer is too large to
* describe using the two PRP entriess inside the NVMe message, then PRP1
* describes the first data memory segment, and PRP2 contains a pointer to a PRP
* list located elsewhere in memory to describe the remaining data memory
* segments. The PRP list will be contiguous.
* The native SGL for NVMe devices is a Physical Region Page (PRP). A PRP
* consists of a list of PRP entries to describe a number of noncontigous
* physical memory segments as a single memory buffer, just as a SGL does. Note
* however, that this function is only used by the IOCTL call, so the memory
* given will be guaranteed to be contiguous. There is no need to translate
* non-contiguous SGL into a PRP in this case. All PRPs will describe
* contiguous space that is one page size each.
*
* Each NVMe message contains two PRP entries. The first (PRP1) either contains
* a PRP list pointer or a PRP element, depending upon the command. PRP2
* contains the second PRP element if the memory being described fits within 2
* PRP entries, or a PRP list pointer if the PRP spans more than two entries.
*
* A PRP list pointer contains the address of a PRP list, structured as a linear
* array of PRP entries. Each PRP entry in this list describes a segment of
* physical memory.
*
* Each 64-bit PRP entry comprises an address and an offset field. The address
* always points at the beginning of a 4KB physical memory page, and the offset
* describes where within that 4KB page the memory segment begins. Only the
* first element in a PRP list may contain a non-zero offest, implying that all
* memory segments following the first begin at the start of a 4KB page.
*
* Each PRP element normally describes 4KB of physical memory, with exceptions
* for the first and last elements in the list. If the memory being described
* by the list begins at a non-zero offset within the first 4KB page, then the
* first PRP element will contain a non-zero offset indicating where the region
* begins within the 4KB page. The last memory segment may end before the end
* of the 4KB segment, depending upon the overall size of the memory being
* described by the PRP list.
*
* Since PRP entries lack any indication of size, the overall data buffer length
* is used to determine where the end of the data memory buffer is located, and
* how many PRP entries are required to describe it.
*
* @ioc: per adapter object
* @smid: system request message index for getting asscociated SGL
* @nvme_encap_request: the NVMe request msg frame pointer
* @data_out_dma: physical address for WRITES
* @data_out_sz: data xfer size for WRITES
* @data_in_dma: physical address for READS
* @data_in_sz: data xfer size for READS
*
* Returns nothing.
*/
static void
_base_build_nvme_prp(struct MPT3SAS_ADAPTER *ioc, u16 smid,
Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request,
dma_addr_t data_out_dma, size_t data_out_sz, dma_addr_t data_in_dma,
size_t data_in_sz)
{
int prp_size = NVME_PRP_SIZE;
u64 *prp_entry, *prp1_entry, *prp2_entry, *prp_entry_phys;
u64 *prp_page, *prp_page_phys;
u32 offset, entry_len;
u32 page_mask_result, page_mask;
dma_addr_t paddr;
size_t length;
/*
* Not all commands require a data transfer. If no data, just return
* without constructing any PRP.
*/
if (!data_in_sz && !data_out_sz)
return;
/*
* Set pointers to PRP1 and PRP2, which are in the NVMe command.
* PRP1 is located at a 24 byte offset from the start of the NVMe
* command. Then set the current PRP entry pointer to PRP1.
*/
prp1_entry = (u64 *)(nvme_encap_request->NVMe_Command +
NVME_CMD_PRP1_OFFSET);
prp2_entry = (u64 *)(nvme_encap_request->NVMe_Command +
NVME_CMD_PRP2_OFFSET);
prp_entry = prp1_entry;
/*
* For the PRP entries, use the specially allocated buffer of
* contiguous memory.
*/
prp_page = (u64 *)mpt3sas_base_get_pcie_sgl(ioc, smid);
prp_page_phys = (u64 *)mpt3sas_base_get_pcie_sgl_dma(ioc, smid);
/*
* Check if we are within 1 entry of a page boundary we don't
* want our first entry to be a PRP List entry.
*/
page_mask = ioc->page_size - 1;
page_mask_result = (uintptr_t)((u8 *)prp_page + prp_size) & page_mask;
if (!page_mask_result) {
/* Bump up to next page boundary. */
prp_page = (u64 *)((u8 *)prp_page + prp_size);
prp_page_phys = (u64 *)((u8 *)prp_page_phys + prp_size);
}
/*
* Set PRP physical pointer, which initially points to the current PRP
* DMA memory page.
*/
prp_entry_phys = prp_page_phys;
/* Get physical address and length of the data buffer. */
if (data_in_sz) {
paddr = data_in_dma;
length = data_in_sz;
} else {
paddr = data_out_dma;
length = data_out_sz;
}
/* Loop while the length is not zero. */
while (length) {
/*
* Check if we need to put a list pointer here if we are at
* page boundary - prp_size (8 bytes).
*/
page_mask_result =
(uintptr_t)((u8 *)prp_entry_phys + prp_size) & page_mask;
if (!page_mask_result) {
/*
* This is the last entry in a PRP List, so we need to
* put a PRP list pointer here. What this does is:
* - bump the current memory pointer to the next
* address, which will be the next full page.
* - set the PRP Entry to point to that page. This
* is now the PRP List pointer.
* - bump the PRP Entry pointer the start of the
* next page. Since all of this PRP memory is
* contiguous, no need to get a new page - it's
* just the next address.
*/
prp_entry_phys++;
*prp_entry = cpu_to_le64((uintptr_t)prp_entry_phys);
prp_entry++;
}
/* Need to handle if entry will be part of a page. */
offset = (u32)paddr & page_mask;
entry_len = ioc->page_size - offset;
if (prp_entry == prp1_entry) {
/*
* Must fill in the first PRP pointer (PRP1) before
* moving on.
*/
*prp1_entry = cpu_to_le64((u64)paddr);
/*
* Now point to the second PRP entry within the
* command (PRP2).
*/
prp_entry = prp2_entry;
} else if (prp_entry == prp2_entry) {
/*
* Should the PRP2 entry be a PRP List pointer or just
* a regular PRP pointer? If there is more than one
* more page of data, must use a PRP List pointer.
*/
if (length > ioc->page_size) {
/*
* PRP2 will contain a PRP List pointer because
* more PRP's are needed with this command. The
* list will start at the beginning of the
* contiguous buffer.
*/
*prp2_entry =
cpu_to_le64((uintptr_t)prp_entry_phys);
/*
* The next PRP Entry will be the start of the
* first PRP List.
*/
prp_entry = prp_page;
} else {
/*
* After this, the PRP Entries are complete.
* This command uses 2 PRP's and no PRP list.
*/
*prp2_entry = cpu_to_le64((u64)paddr);
}
} else {
/*
* Put entry in list and bump the addresses.
*
* After PRP1 and PRP2 are filled in, this will fill in
* all remaining PRP entries in a PRP List, one per
* each time through the loop.
*/
*prp_entry = cpu_to_le64((u64)paddr);
prp_entry++;
prp_entry_phys++;
}
/*
* Bump the phys address of the command's data buffer by the
* entry_len.
*/
paddr += entry_len;
/* Decrement length accounting for last partial page. */
if (entry_len > length)
length = 0;
else
length -= entry_len;
}
}
/**
* base_make_prp_nvme -
* Prepare PRPs(Physical Region Page)- SGLs specific to NVMe drives only
@ -2793,6 +3019,30 @@ _base_put_smid_hi_priority(struct MPT3SAS_ADAPTER *ioc, u16 smid,
&ioc->scsi_lookup_lock);
}
/**
* _base_put_smid_nvme_encap - send NVMe encapsulated request to
* firmware
* @ioc: per adapter object
* @smid: system request message index
*
* Return nothing.
*/
static void
_base_put_smid_nvme_encap(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
Mpi2RequestDescriptorUnion_t descriptor;
u64 *request = (u64 *)&descriptor;
descriptor.Default.RequestFlags =
MPI26_REQ_DESCRIPT_FLAGS_PCIE_ENCAPSULATED;
descriptor.Default.MSIxIndex = _base_get_msix_index(ioc);
descriptor.Default.SMID = cpu_to_le16(smid);
descriptor.Default.LMID = 0;
descriptor.Default.DescriptorTypeDependent = 0;
_base_writeq(*request, &ioc->chip->RequestDescriptorPostLow,
&ioc->scsi_lookup_lock);
}
/**
* _base_put_smid_default - Default, primarily used for config pages
* @ioc: per adapter object
@ -2883,6 +3133,27 @@ _base_put_smid_hi_priority_atomic(struct MPT3SAS_ADAPTER *ioc, u16 smid,
writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost);
}
/**
* _base_put_smid_nvme_encap_atomic - send NVMe encapsulated request to
* firmware using Atomic Request Descriptor
* @ioc: per adapter object
* @smid: system request message index
*
* Return nothing.
*/
static void
_base_put_smid_nvme_encap_atomic(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
Mpi26AtomicRequestDescriptor_t descriptor;
u32 *request = (u32 *)&descriptor;
descriptor.RequestFlags = MPI26_REQ_DESCRIPT_FLAGS_PCIE_ENCAPSULATED;
descriptor.MSIxIndex = _base_get_msix_index(ioc);
descriptor.SMID = cpu_to_le16(smid);
writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost);
}
/**
* _base_put_smid_default - Default, primarily used for config pages
* use Atomic Request Descriptor
@ -5707,6 +5978,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
*/
ioc->build_sg_scmd = &_base_build_sg_scmd_ieee;
ioc->build_sg = &_base_build_sg_ieee;
ioc->build_nvme_prp = &_base_build_nvme_prp;
ioc->build_zero_len_sge = &_base_build_zero_len_sge_ieee;
ioc->sge_size_ieee = sizeof(Mpi2IeeeSgeSimple64_t);
@ -5718,11 +5990,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
ioc->put_smid_scsi_io = &_base_put_smid_scsi_io_atomic;
ioc->put_smid_fast_path = &_base_put_smid_fast_path_atomic;
ioc->put_smid_hi_priority = &_base_put_smid_hi_priority_atomic;
ioc->put_smid_nvme_encap = &_base_put_smid_nvme_encap_atomic;
} else {
ioc->put_smid_default = &_base_put_smid_default;
ioc->put_smid_scsi_io = &_base_put_smid_scsi_io;
ioc->put_smid_fast_path = &_base_put_smid_fast_path;
ioc->put_smid_hi_priority = &_base_put_smid_hi_priority;
ioc->put_smid_nvme_encap = &_base_put_smid_nvme_encap;
}

View File

@ -1184,6 +1184,9 @@ struct MPT3SAS_ADAPTER {
MPT_BUILD_SG build_sg_mpi;
MPT_BUILD_ZERO_LEN_SGE build_zero_len_sge_mpi;
/* function ptr for NVMe PRP elements only */
NVME_BUILD_PRP build_nvme_prp;
/* event log */
u32 event_type[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS];
u32 event_context;
@ -1354,6 +1357,7 @@ struct MPT3SAS_ADAPTER {
PUT_SMID_IO_FP_HIP put_smid_fast_path;
PUT_SMID_IO_FP_HIP put_smid_hi_priority;
PUT_SMID_DEFAULT put_smid_default;
PUT_SMID_DEFAULT put_smid_nvme_encap;
};

View File

@ -272,6 +272,7 @@ mpt3sas_ctl_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
{
MPI2DefaultReply_t *mpi_reply;
Mpi2SCSIIOReply_t *scsiio_reply;
Mpi26NVMeEncapsulatedErrorReply_t *nvme_error_reply;
const void *sense_data;
u32 sz;
@ -298,6 +299,18 @@ mpt3sas_ctl_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
memcpy(ioc->ctl_cmds.sense, sense_data, sz);
}
}
/*
* Get Error Response data for NVMe device. The ctl_cmds.sense
* buffer is used to store the Error Response data.
*/
if (mpi_reply->Function == MPI2_FUNCTION_NVME_ENCAPSULATED) {
nvme_error_reply =
(Mpi26NVMeEncapsulatedErrorReply_t *)mpi_reply;
sz = min_t(u32, NVME_ERROR_RESPONSE_SIZE,
le32_to_cpu(nvme_error_reply->ErrorResponseCount));
sense_data = mpt3sas_base_get_sense_buffer(ioc, smid);
memcpy(ioc->ctl_cmds.sense, sense_data, sz);
}
}
_ctl_display_some_debug(ioc, smid, "ctl_done", mpi_reply);
@ -641,11 +654,12 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
{
MPI2RequestHeader_t *mpi_request = NULL, *request;
MPI2DefaultReply_t *mpi_reply;
Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request = NULL;
u32 ioc_state;
u16 smid;
unsigned long timeout;
u8 issue_reset;
u32 sz;
u32 sz, sz_arg;
void *psge;
void *data_out = NULL;
dma_addr_t data_out_dma = 0;
@ -742,7 +756,8 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
if (mpi_request->Function == MPI2_FUNCTION_SCSI_IO_REQUEST ||
mpi_request->Function == MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH ||
mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT ||
mpi_request->Function == MPI2_FUNCTION_SATA_PASSTHROUGH) {
mpi_request->Function == MPI2_FUNCTION_SATA_PASSTHROUGH ||
mpi_request->Function == MPI2_FUNCTION_NVME_ENCAPSULATED) {
device_handle = le16_to_cpu(mpi_request->FunctionDependent1);
if (!device_handle || (device_handle >
@ -793,6 +808,38 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
init_completion(&ioc->ctl_cmds.done);
switch (mpi_request->Function) {
case MPI2_FUNCTION_NVME_ENCAPSULATED:
{
nvme_encap_request = (Mpi26NVMeEncapsulatedRequest_t *)request;
/*
* Get the Physical Address of the sense buffer.
* Use Error Response buffer address field to hold the sense
* buffer address.
* Clear the internal sense buffer, which will potentially hold
* the Completion Queue Entry on return, or 0 if no Entry.
* Build the PRPs and set direction bits.
* Send the request.
*/
nvme_encap_request->ErrorResponseBaseAddress = ioc->sense_dma &
0xFFFFFFFF00000000;
nvme_encap_request->ErrorResponseBaseAddress |=
(U64)mpt3sas_base_get_sense_buffer_dma(ioc, smid);
nvme_encap_request->ErrorResponseAllocationLength =
NVME_ERROR_RESPONSE_SIZE;
memset(ioc->ctl_cmds.sense, 0, NVME_ERROR_RESPONSE_SIZE);
ioc->build_nvme_prp(ioc, smid, nvme_encap_request,
data_out_dma, data_out_sz, data_in_dma, data_in_sz);
if (test_bit(device_handle, ioc->device_remove_in_progress)) {
dtmprintk(ioc, pr_info(MPT3SAS_FMT "handle(0x%04x) :"
"ioctl failed due to device removal in progress\n",
ioc->name, device_handle));
mpt3sas_base_free_smid(ioc, smid);
ret = -EINVAL;
goto out;
}
ioc->put_smid_nvme_encap(ioc, smid);
break;
}
case MPI2_FUNCTION_SCSI_IO_REQUEST:
case MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH:
{
@ -1008,15 +1055,25 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
}
}
/* copy out sense to user */
/* copy out sense/NVMe Error Response to user */
if (karg.max_sense_bytes && (mpi_request->Function ==
MPI2_FUNCTION_SCSI_IO_REQUEST || mpi_request->Function ==
MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
sz = min_t(u32, karg.max_sense_bytes, SCSI_SENSE_BUFFERSIZE);
MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH || mpi_request->Function ==
MPI2_FUNCTION_NVME_ENCAPSULATED)) {
if (karg.sense_data_ptr == NULL) {
pr_info(MPT3SAS_FMT "Response buffer provided"
" by application is NULL; Response data will"
" not be returned.\n", ioc->name);
goto out;
}
sz_arg = (mpi_request->Function ==
MPI2_FUNCTION_NVME_ENCAPSULATED) ? NVME_ERROR_RESPONSE_SIZE :
SCSI_SENSE_BUFFERSIZE;
sz = min_t(u32, karg.max_sense_bytes, sz_arg);
if (copy_to_user(karg.sense_data_ptr, ioc->ctl_cmds.sense,
sz)) {
pr_err("failure at %s:%d/%s()!\n", __FILE__,
__LINE__, __func__);
__LINE__, __func__);
ret = -ENODATA;
goto out;
}