2005-04-17 16:05:31 -05:00
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
2005-06-25 10:34:39 -04:00
* Fibre Channel Host Bus Adapters . *
2022-02-24 18:23:08 -08:00
* Copyright ( C ) 2017 - 2022 Broadcom . All Rights Reserved . The term *
2018-06-26 08:24:31 -07:00
* “ Broadcom ” refers to Broadcom Inc . and / or its subsidiaries . *
2016-03-31 14:12:34 -07:00
* Copyright ( C ) 2004 - 2016 Emulex . All rights reserved . *
2005-06-25 10:34:39 -04:00
* EMULEX and SLI are trademarks of Emulex . *
2017-02-12 13:52:39 -08:00
* www . broadcom . com *
2005-06-25 10:34:39 -04:00
* Portions Copyright ( C ) 2004 - 2005 Christoph Hellwig *
2005-04-17 16:05:31 -05:00
* *
* This program is free software ; you can redistribute it and / or *
2005-06-25 10:34:39 -04:00
* modify it under the terms of version 2 of the GNU General *
* Public License as published by the Free Software Foundation . *
* This program is distributed in the hope that it will be useful . *
* ALL EXPRESS OR IMPLIED CONDITIONS , REPRESENTATIONS AND *
* WARRANTIES , INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY , *
* FITNESS FOR A PARTICULAR PURPOSE , OR NON - INFRINGEMENT , ARE *
* DISCLAIMED , EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
* TO BE LEGALLY INVALID . See the GNU General Public License for *
* more details , a copy of which can be found in the file COPYING *
* included with this package . *
2005-04-17 16:05:31 -05:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2007-06-17 19:56:36 -05:00
# include <scsi/scsi_host.h>
2021-09-20 14:33:23 +02:00
# include <linux/hashtable.h>
2017-02-12 13:52:30 -08:00
# include <linux/ktime.h>
2017-09-29 17:34:34 -07:00
# include <linux/workqueue.h>
2011-07-22 18:36:33 -04:00
# if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
# define CONFIG_SCSI_LPFC_DEBUG_FS
# endif
2005-04-17 16:05:31 -05:00
struct lpfc_sli2_slim ;
2012-09-29 11:30:06 -04:00
# define ELX_MODEL_NAME_SIZE 80
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
# define LPFC_PCI_DEV_LP 0x1
# define LPFC_PCI_DEV_OC 0x2
# define LPFC_SLI_REV2 2
# define LPFC_SLI_REV3 3
# define LPFC_SLI_REV4 4
2008-04-07 10:16:05 -04:00
# define LPFC_MAX_TARGET 4096 /* max number of targets supported */
2006-07-06 15:49:25 -04:00
# define LPFC_MAX_DISC_THREADS 64 / * max outstanding discovery els
requests */
# define LPFC_MAX_NS_RETRY 3 / * Number of retry attempts to contact
the NameServer before giving up . */
2005-11-28 11:42:38 -05:00
# define LPFC_CMD_PER_LUN 3 /* max outstanding cmds per lun */
2008-12-04 22:39:46 -05:00
# define LPFC_DEFAULT_SG_SEG_CNT 64 /* sg element count per scsi cmnd */
2010-02-26 14:15:00 -05:00
# define LPFC_DEFAULT_MENLO_SG_SEG_CNT 128 / * sg element count per scsi
cmnd for menlo needs nearly twice as for firmware
downloads using bsg */
2013-04-17 20:16:15 -04:00
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.
Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI
to contain the exchanges Scatter/Gather List. This caps the number of SGL
elements that can be in the SGL. There are not extensions to extend the
list out of the 2 pages.
The G7 hardware adds a SGE type that allows the SGL to be vectored to a
different scatter/gather list segment. And that segment can contain a SGE
to go to another segment and so on. The initial segment must still be
pre-registered for the XRI, but it can be a much smaller amount (256Bytes)
as it can now be dynamically grown. This much smaller allocation can
handle the SG list for most normal I/O, and the dynamic aspect allows it to
support many MB's if needed.
The implementation creates a pool which contains "segments" and which is
initially sized to hold the initial small segment per xri. If an I/O
requires additional segments, they are allocated from the pool. If the
pool has no more segments, the pool is grown based on what is now
needed. After the I/O completes, the additional segments are returned to
the pool for use by other I/Os. Once allocated, the additional segments are
not released under the assumption of "if needed once, it will be needed
again". Pools are kept on a per-hardware queue basis, which is typically
1:1 per cpu, but may be shared by multiple cpus.
The switch to the smaller initial allocation significantly reduces the
memory footprint of the driver (which only grows if large ios are
issued). Based on the several K of XRIs for the adapter, the 8KB->256B
reduction can conserve 32MBs or more.
It has been observed with per-cpu resource pools that allocating a resource
on CPU A, may be put back on CPU B. While the get routines are distributed
evenly, only a limited subset of CPUs may be handling the put routines.
This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because
all the resources are being put on a limited subset of CPUs.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 16:57:09 -07:00
# define LPFC_DEFAULT_XPSGL_SIZE 256
# define LPFC_MAX_SG_TABLESIZE 0xffff
2013-04-17 20:16:15 -04:00
# define LPFC_MIN_SG_SLI4_BUF_SZ 0x800 /* based on LPFC_DEFAULT_SG_SEG_CNT */
2018-09-10 10:30:42 -07:00
# define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */
2013-04-17 20:16:15 -04:00
# define LPFC_MAX_SG_SEG_CNT_DIF 512 /* sg element count per scsi cmnd */
2008-12-04 22:39:46 -05:00
# define LPFC_MAX_SG_SEG_CNT 4096 /* sg element count per scsi cmnd */
2017-11-20 16:00:43 -08:00
# define LPFC_MIN_SG_SEG_CNT 32 /* sg element count per scsi cmnd */
2013-04-17 20:16:05 -04:00
# define LPFC_MAX_SGL_SEG_CNT 512 /* SGL element count per scsi cmnd */
# define LPFC_MAX_BPL_SEG_CNT 4096 /* BPL element count per scsi cmnd */
2017-11-20 16:00:33 -08:00
# define LPFC_MAX_NVME_SEG_CNT 256 /* max SGL element cnt per NVME cmnd */
2013-04-17 20:16:05 -04:00
2011-05-24 11:40:48 -04:00
# define LPFC_MAX_SGE_SIZE 0x80000000 /* Maximum data allowed in a SGE */
2005-04-17 16:05:31 -05:00
# define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */
2005-11-28 11:42:38 -05:00
# define LPFC_Q_RAMP_UP_INTERVAL 120 /* lun q_depth ramp up interval */
2008-06-14 22:52:59 -04:00
# define LPFC_VNAME_LEN 100 /* vport symbolic name length */
2008-09-07 11:52:04 -04:00
# define LPFC_TGTQ_RAMPUP_PCENT 5 /* Target queue rampup in percentage */
2010-07-14 15:32:10 -04:00
# define LPFC_MIN_TGT_QDEPTH 10
2008-09-07 11:52:04 -04:00
# define LPFC_MAX_TGT_QDEPTH 0xFFFF
2005-04-17 16:05:31 -05:00
2008-09-07 11:52:10 -04:00
# define LPFC_MAX_BUCKET_COUNT 20 / * Maximum no. of buckets for stat data
collection . */
2007-06-17 19:56:38 -05:00
/*
* Following time intervals are used of adjusting SCSI device
* queue depths when there are driver resource error or Firmware
* resource error .
*/
2013-04-17 20:14:58 -04:00
/* 1 Second */
# define QUEUE_RAMP_DOWN_INTERVAL (msecs_to_jiffies(1000 * 1))
2007-06-17 19:56:38 -05:00
/* Number of exchanges reserved for discovery to complete */
# define LPFC_DISC_IOCB_BUFF_COUNT 20
2007-06-17 19:56:39 -05:00
# define LPFC_HB_MBOX_INTERVAL 5 /* Heart beat interval in seconds. */
2007-08-02 11:10:37 -04:00
# define LPFC_HB_MBOX_TIMEOUT 30 /* Heart beat timeout in seconds. */
2007-06-17 19:56:39 -05:00
2008-08-24 21:50:30 -04:00
/* Error Attention event polling interval */
# define LPFC_ERATT_POLL_INTERVAL 5 /* EATT poll interval in seconds */
2005-04-17 16:05:31 -05:00
/* Define macros for 64 bit support */
# define putPaddrLow(addr) ((uint32_t) (0xffffffff & (u64)(addr)))
# define putPaddrHigh(addr) ((uint32_t) (0xffffffff & (((u64)(addr))>>32)))
# define getPaddr(high, low) ((dma_addr_t)( \
( ( ( u64 ) ( high ) < < 16 ) < < 16 ) | ( ( u64 ) ( low ) ) ) )
/* Provide maximum configuration definitions. */
# define LPFC_DRVR_TIMEOUT 16 /* driver iocb timeout value in sec */
# define FC_MAX_ADPTMSG 64
# define MAX_HBAEVT 32
scsi: lpfc: Fix eh_deadline setting for sli3 adapters.
A previous change unilaterally removed the hba reset entry point
from the sli3 host template. This was done to allow tape devices
being used for back up from being removed. Why was this done ?
When there was non-responding device on the fabric, the error
escalation policy would escalate to the reset handler. When the
reset handler was called, it would reset the adapter, dropping
link, thus logging out and terminating all i/o's - on any target.
If there was a tape device on the same adapter that wasn't in
error, it would kill the tape i/o's, effectively killing the
tape device state. With the reset point removed, the adapter
reset avoided the fabric logout, allowing the other devices to
continue to operate unaffected. A hack - yes. Hint: we really
need a transport I_T nexus reset callback added to the eh process
(in between the SCSI target reset and hba reset points), so a
fc logout could occur to the one bad target only and stop the error
escalation process.
This patch commonizes the approach so it can be used for sli3 and sli4
adapters, but mandates the admin, via module parameter, specifically
identify which adapters the resets are to be removed for. Additionally,
bus_reset, which sends Target Reset TMFs to all targets, is also removed
from the template as it too has the same effect as the adapter reset.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Tested-by: Laurence Oberman <loberman@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-03-04 09:30:31 -08:00
# define MAX_HBAS_NO_RESET 16
2005-04-17 16:05:31 -05:00
2008-08-24 21:50:30 -04:00
/* Number of MSI-X vectors the driver uses */
# define LPFC_MSIX_VECTORS 2
2008-06-14 22:52:53 -04:00
/* lpfc wait event data ready flag */
2017-03-04 09:30:38 -08:00
# define LPFC_DATA_READY 0 /* bit 0 */
2008-06-14 22:52:53 -04:00
2012-05-09 21:19:25 -04:00
/* queue dump line buffer size */
# define LPFC_LBUF_SZ 128
2012-06-12 13:54:36 -04:00
/* mailbox system shutdown options */
# define LPFC_MBX_NO_WAIT 0
# define LPFC_MBX_WAIT 1
2021-08-16 09:28:52 -07:00
# define LPFC_CFG_PARAM_MAGIC_NUM 0xFEAA0005
# define LPFC_PORT_CFG_NAME " / cfg / port.cfg"
# define lpfc_rangecheck(val, min, max) \
( ( uint ) ( val ) > = ( uint ) ( min ) & & ( val ) < = ( max ) )
[SCSI] lpfc 8.1.1 : Add polled-mode support
- Add functionality to run in polled mode only. Includes run time
attribute to enable mode.
- Enable runtime writable hba settings for coallescing and delay parameters
Customers have requested a mode in the driver to run strictly polled.
This is generally to support an environment where the server is extremely
loaded and is looking to reclaim some cpu cycles from adapter interrupt
handling.
This patch adds a new "poll" attribute, and the following behavior:
if value is 0 (default):
The driver uses the normal method for i/o completion. It uses the
firmware feature of interrupt coalesing. The firmware allows a
minimum number of i/o completions before an interrupt, or a maximum
time delay between interrupts. By default, the driver sets these
to no delay (disabled) or 1 i/o - meaning coalescing is disabled.
Attributes were provided to change the coalescing values, but it was
a module-load time only and global across all adapters.
This patch allows them to be writable on a per-adapter basis.
if value is 1 :
Interrupts are left enabled, expecting that the user has tuned the
interrupt coalescing values. When this setting is enabled, the driver
will attempt to service completed i/o whenever new i/o is submitted
to the adapter. If the coalescing values are large, and the i/o
generation rate steady, an interrupt will be avoided by servicing
completed i/o prior to the coalescing thresholds kicking in. However,
if the i/o completion load is high enough or i/o generation slow, the
coalescion values will ensure that completed i/o is serviced in a timely
fashion.
if value is 3 :
Turns off FCP i/o interrupts altogether. The coalescing values now have
no effect. A new attribute "poll_tmo" (default 10ms) exists to set
the polling interval for i/o completion. When this setting is enabled,
the driver will attempt to service completed i/o and restart the
interval timer whenever new i/o is submitted. This behavior allows for
servicing of completed i/o sooner than the interval timer, but ensures
that if no i/o is being issued, then the interval timer will kick in
to service the outstanding i/o.
Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
2005-11-29 16:32:13 -05:00
enum lpfc_polling_flags {
ENABLE_FCP_RING_POLLING = 0x1 ,
DISABLE_FCP_RING_INT = 0x2
} ;
2017-02-12 13:52:30 -08:00
struct perf_prof {
uint16_t cmd_cpu [ 40 ] ;
uint16_t rsp_cpu [ 40 ] ;
uint16_t qh_cpu [ 40 ] ;
uint16_t wqidx [ 40 ] ;
} ;
2017-02-12 13:52:32 -08:00
/*
* Provide for FC4 TYPE x28 - NVME . The
* bit mask for FCP and NVME is 0x8 identically
* because they are 32 bit positions distance .
*/
2017-02-12 13:52:31 -08:00
# define LPFC_FC4_TYPE_BITMASK 0x00000100
2005-04-17 16:05:31 -05:00
/* Provide DMA memory definitions the driver uses per port instance. */
struct lpfc_dmabuf {
struct list_head list ;
void * virt ; /* virtual address ptr */
dma_addr_t phys ; /* mapped address */
2007-10-27 13:38:00 -04:00
uint32_t buffer_tag ; /* used for tagged queue ring */
2005-04-17 16:05:31 -05:00
} ;
2017-05-15 15:20:45 -07:00
struct lpfc_nvmet_ctxbuf {
struct list_head list ;
2020-03-31 09:50:03 -07:00
struct lpfc_async_xchg_ctx * context ;
2017-05-15 15:20:45 -07:00
struct lpfc_iocbq * iocbq ;
struct lpfc_sglq * sglq ;
2019-01-28 11:14:39 -08:00
struct work_struct defer_work ;
2017-05-15 15:20:45 -07:00
} ;
2005-04-17 16:05:31 -05:00
struct lpfc_dma_pool {
struct lpfc_dmabuf * elements ;
uint32_t max_count ;
uint32_t current_count ;
} ;
2007-06-17 19:56:37 -05:00
struct hbq_dmabuf {
2009-05-22 14:51:39 -04:00
struct lpfc_dmabuf hbuf ;
2007-06-17 19:56:37 -05:00
struct lpfc_dmabuf dbuf ;
2017-02-12 13:52:30 -08:00
uint16_t total_size ;
uint16_t bytes_recv ;
2007-06-17 19:56:37 -05:00
uint32_t tag ;
2009-10-02 15:16:39 -04:00
struct lpfc_cq_event cq_event ;
2009-10-02 15:17:02 -04:00
unsigned long time_stamp ;
2017-02-12 13:52:30 -08:00
void * context ;
} ;
struct rqb_dmabuf {
struct lpfc_dmabuf hbuf ;
struct lpfc_dmabuf dbuf ;
uint16_t total_size ;
uint16_t bytes_recv ;
2017-05-15 15:20:46 -07:00
uint16_t idx ;
2017-02-12 13:52:30 -08:00
struct lpfc_queue * hrq ; /* ptr to associated Header RQ */
struct lpfc_queue * drq ; /* ptr to associated Data RQ */
2007-06-17 19:56:37 -05:00
} ;
2005-04-17 16:05:31 -05:00
/* Priority bit. Set value to exceed low water mark in lpfc_mem. */
# define MEM_PRI 0x100
/****************************************************************************/
/* Device VPD save area */
/****************************************************************************/
typedef struct lpfc_vpd {
uint32_t status ; /* vpd status value */
uint32_t length ; /* number of bytes actually returned */
struct {
uint32_t rsvd1 ; /* Revision numbers */
uint32_t biuRev ;
uint32_t smRev ;
uint32_t smFwRev ;
uint32_t endecRev ;
uint16_t rBit ;
uint8_t fcphHigh ;
uint8_t fcphLow ;
uint8_t feaLevelHigh ;
uint8_t feaLevelLow ;
uint32_t postKernRev ;
uint32_t opFwRev ;
uint8_t opFwName [ 16 ] ;
uint32_t sli1FwRev ;
uint8_t sli1FwName [ 16 ] ;
uint32_t sli2FwRev ;
uint8_t sli2FwName [ 16 ] ;
} rev ;
2007-06-17 19:56:38 -05:00
struct {
# ifdef __BIG_ENDIAN_BITFIELD
2020-03-22 11:13:03 -07:00
uint32_t rsvd3 : 20 ; /* Reserved */
2009-05-22 14:51:39 -04:00
uint32_t rsvd2 : 3 ; /* Reserved */
uint32_t cbg : 1 ; /* Configure BlockGuard */
2007-06-17 19:56:38 -05:00
uint32_t cmv : 1 ; /* Configure Max VPIs */
uint32_t ccrp : 1 ; /* Config Command Ring Polling */
uint32_t csah : 1 ; /* Configure Synchronous Abort Handling */
uint32_t chbs : 1 ; /* Cofigure Host Backing store */
uint32_t cinb : 1 ; /* Enable Interrupt Notification Block */
uint32_t cerbm : 1 ; /* Configure Enhanced Receive Buf Mgmt */
uint32_t cmx : 1 ; /* Configure Max XRIs */
uint32_t cmr : 1 ; /* Configure Max RPIs */
# else /* __LITTLE_ENDIAN */
uint32_t cmr : 1 ; /* Configure Max RPIs */
uint32_t cmx : 1 ; /* Configure Max XRIs */
uint32_t cerbm : 1 ; /* Configure Enhanced Receive Buf Mgmt */
uint32_t cinb : 1 ; /* Enable Interrupt Notification Block */
uint32_t chbs : 1 ; /* Cofigure Host Backing store */
uint32_t csah : 1 ; /* Configure Synchronous Abort Handling */
uint32_t ccrp : 1 ; /* Config Command Ring Polling */
uint32_t cmv : 1 ; /* Configure Max VPIs */
2009-05-22 14:51:39 -04:00
uint32_t cbg : 1 ; /* Configure BlockGuard */
uint32_t rsvd2 : 3 ; /* Reserved */
2020-03-22 11:13:03 -07:00
uint32_t rsvd3 : 20 ; /* Reserved */
2007-06-17 19:56:38 -05:00
# endif
} sli3Feat ;
2005-04-17 16:05:31 -05:00
} lpfc_vpd_t ;
/*
* lpfc stat counters
*/
struct lpfc_stats {
/* Statistics for ELS commands */
uint32_t elsLogiCol ;
uint32_t elsRetryExceeded ;
uint32_t elsXmitRetry ;
uint32_t elsDelayRetry ;
uint32_t elsRcvDrop ;
uint32_t elsRcvFrame ;
uint32_t elsRcvRSCN ;
uint32_t elsRcvRNID ;
uint32_t elsRcvFARP ;
uint32_t elsRcvFARPR ;
uint32_t elsRcvFLOGI ;
uint32_t elsRcvPLOGI ;
uint32_t elsRcvADISC ;
uint32_t elsRcvPDISC ;
uint32_t elsRcvFAN ;
uint32_t elsRcvLOGO ;
uint32_t elsRcvPRLO ;
uint32_t elsRcvPRLI ;
2006-02-28 19:25:15 -05:00
uint32_t elsRcvLIRR ;
2010-10-22 11:05:53 -04:00
uint32_t elsRcvRLS ;
2006-02-28 19:25:15 -05:00
uint32_t elsRcvRPL ;
2009-11-18 15:39:44 -05:00
uint32_t elsRcvRRQ ;
2010-10-22 11:05:53 -04:00
uint32_t elsRcvRTV ;
uint32_t elsRcvECHO ;
2015-05-21 13:55:18 -04:00
uint32_t elsRcvLCB ;
2015-05-21 13:55:21 -04:00
uint32_t elsRcvRDP ;
2021-05-14 12:55:58 -07:00
uint32_t elsRcvRDF ;
2005-04-17 16:05:31 -05:00
uint32_t elsXmitFLOGI ;
2007-06-17 19:56:38 -05:00
uint32_t elsXmitFDISC ;
2005-04-17 16:05:31 -05:00
uint32_t elsXmitPLOGI ;
uint32_t elsXmitPRLI ;
uint32_t elsXmitADISC ;
uint32_t elsXmitLOGO ;
uint32_t elsXmitSCR ;
2019-05-14 14:58:05 -07:00
uint32_t elsXmitRSCN ;
2005-04-17 16:05:31 -05:00
uint32_t elsXmitRNID ;
uint32_t elsXmitFARP ;
uint32_t elsXmitFARPR ;
uint32_t elsXmitACC ;
uint32_t elsXmitLSRJT ;
uint32_t frameRcvBcast ;
uint32_t frameRcvMulti ;
uint32_t strayXmitCmpl ;
uint32_t frameXmitDelay ;
uint32_t xriCmdCmpl ;
uint32_t xriStatErr ;
uint32_t LinkUp ;
uint32_t LinkDown ;
uint32_t LinkMultiEvent ;
uint32_t NoRcvBuf ;
uint32_t fcpCmd ;
uint32_t fcpCmpl ;
uint32_t fcpRspErr ;
uint32_t fcpRemoteStop ;
uint32_t fcpPortRjt ;
uint32_t fcpPortBusy ;
uint32_t fcpError ;
uint32_t fcpLocalErr ;
} ;
2007-06-17 19:56:36 -05:00
struct lpfc_hba ;
2007-06-17 19:56:38 -05:00
2021-06-08 10:05:47 +05:30
# define LPFC_VMID_TIMER 300 /* timer interval in seconds */
# define LPFC_MAX_VMID_SIZE 256
# define LPFC_COMPRESS_VMID_SIZE 16
union lpfc_vmid_io_tag {
u32 app_id ; /* App Id vmid */
u8 cs_ctl_vmid ; /* Priority tag vmid */
} ;
# define JIFFIES_PER_HR (HZ * 60 * 60)
struct lpfc_vmid {
u8 flag ;
# define LPFC_VMID_SLOT_FREE 0x0
# define LPFC_VMID_SLOT_USED 0x1
# define LPFC_VMID_REQ_REGISTER 0x2
# define LPFC_VMID_REGISTERED 0x4
# define LPFC_VMID_DE_REGISTER 0x8
char host_vmid [ LPFC_MAX_VMID_SIZE ] ;
union lpfc_vmid_io_tag un ;
struct hlist_node hnode ;
u64 io_rd_cnt ;
u64 io_wr_cnt ;
u8 vmid_len ;
u8 delete_inactive ; /* Delete if inactive flag 0 = no, 1 = yes */
u32 hash_index ;
u64 __percpu * last_io_time ;
} ;
# define lpfc_vmid_is_type_priority_tag(vport)\
( vport - > vmid_priority_tagging ? 1 : 0 )
# define LPFC_VMID_HASH_SIZE 256
# define LPFC_VMID_HASH_MASK 255
# define LPFC_VMID_HASH_SHIFT 6
struct lpfc_vmid_context {
struct lpfc_vmid * vmp ;
struct lpfc_nodelist * nlp ;
bool instantiated ;
} ;
struct lpfc_vmid_priority_range {
u8 low ;
u8 high ;
u8 qos ;
} ;
struct lpfc_vmid_priority_info {
u32 num_descriptors ;
struct lpfc_vmid_priority_range * vmid_range ;
} ;
# define QFPA_EVEN_ONLY 0x01
# define QFPA_ODD_ONLY 0x02
# define QFPA_EVEN_ODD 0x03
2007-06-17 19:56:36 -05:00
enum discovery_state {
2007-06-17 19:56:38 -05:00
LPFC_VPORT_UNKNOWN = 0 , /* vport state is unknown */
LPFC_VPORT_FAILED = 1 , /* vport has failed */
LPFC_LOCAL_CFG_LINK = 6 , /* local NPORT Id configured */
LPFC_FLOGI = 7 , /* FLOGI sent to Fabric */
LPFC_FDISC = 8 , /* FDISC sent for vport */
LPFC_FABRIC_CFG_LINK = 9 , /* Fabric assigned NPORT Id
* configured */
LPFC_NS_REG = 10 , /* Register with NameServer */
LPFC_NS_QRY = 11 , /* Query NameServer for NPort ID list */
LPFC_BUILD_DISC_LIST = 12 , /* Build ADISC and PLOGI lists for
* device authentication / discovery */
LPFC_DISC_AUTH = 13 , /* Processing ADISC list */
LPFC_VPORT_READY = 32 ,
2007-06-17 19:56:36 -05:00
} ;
enum hba_state {
LPFC_LINK_UNKNOWN = 0 , /* HBA state is unknown */
LPFC_WARM_START = 1 , /* HBA state after selective reset */
LPFC_INIT_START = 2 , /* Initial state after board reset */
LPFC_INIT_MBX_CMDS = 3 , /* Initialize HBA with mbox commands */
LPFC_LINK_DOWN = 4 , /* HBA initialized, link is down */
LPFC_LINK_UP = 5 , /* Link is up - issue READ_LA */
2007-06-17 19:56:38 -05:00
LPFC_CLEAR_LA = 6 , /* authentication cmplt - issue
2007-06-17 19:56:36 -05:00
* CLEAR_LA */
2007-06-17 19:56:38 -05:00
LPFC_HBA_READY = 32 ,
2007-06-17 19:56:36 -05:00
LPFC_HBA_ERROR = - 1
} ;
2018-10-23 13:41:11 -07:00
struct lpfc_trunk_link_state {
enum hba_state state ;
uint8_t fault ;
} ;
struct lpfc_trunk_link {
struct lpfc_trunk_link_state link0 ,
link1 ,
link2 ,
link3 ;
} ;
2021-08-16 09:28:52 -07:00
/* Format of congestion module parameters */
struct lpfc_cgn_param {
uint32_t cgn_param_magic ;
uint8_t cgn_param_version ; /* version 1 */
uint8_t cgn_param_mode ; /* 0=off 1=managed 2=monitor only */
# define LPFC_CFG_OFF 0
# define LPFC_CFG_MANAGED 1
# define LPFC_CFG_MONITOR 2
uint8_t cgn_rsvd1 ;
uint8_t cgn_rsvd2 ;
uint8_t cgn_param_level0 ;
uint8_t cgn_param_level1 ;
uint8_t cgn_param_level2 ;
uint8_t byte11 ;
uint8_t byte12 ;
uint8_t byte13 ;
uint8_t byte14 ;
uint8_t byte15 ;
} ;
2021-08-16 09:28:51 -07:00
/* Max number of days of congestion data */
# define LPFC_MAX_CGN_DAYS 10
/* Format of congestion buffer info
* This structure defines memory thats allocated and registered with
* the HBA firmware . When adding or removing fields from this structure
* the alignment must match the HBA firmware .
*/
struct lpfc_cgn_info {
/* Header */
__le16 cgn_info_size ; /* is sizeof(struct lpfc_cgn_info) */
uint8_t cgn_info_version ; /* represents format of structure */
# define LPFC_CGN_INFO_V1 1
# define LPFC_CGN_INFO_V2 2
# define LPFC_CGN_INFO_V3 3
uint8_t cgn_info_mode ; /* 0=off 1=managed 2=monitor only */
uint8_t cgn_info_detect ;
uint8_t cgn_info_action ;
uint8_t cgn_info_level0 ;
uint8_t cgn_info_level1 ;
uint8_t cgn_info_level2 ;
/* Start Time */
uint8_t cgn_info_month ;
uint8_t cgn_info_day ;
uint8_t cgn_info_year ;
uint8_t cgn_info_hour ;
uint8_t cgn_info_minute ;
uint8_t cgn_info_second ;
/* minute / hours / daily indices */
uint8_t cgn_index_minute ;
uint8_t cgn_index_hour ;
uint8_t cgn_index_day ;
__le16 cgn_warn_freq ;
__le16 cgn_alarm_freq ;
__le16 cgn_lunq ;
uint8_t cgn_pad1 [ 8 ] ;
/* Driver Information */
__le16 cgn_drvr_min [ 60 ] ;
__le32 cgn_drvr_hr [ 24 ] ;
__le32 cgn_drvr_day [ LPFC_MAX_CGN_DAYS ] ;
/* Congestion Warnings */
__le16 cgn_warn_min [ 60 ] ;
__le32 cgn_warn_hr [ 24 ] ;
__le32 cgn_warn_day [ LPFC_MAX_CGN_DAYS ] ;
/* Latency Information */
__le32 cgn_latency_min [ 60 ] ;
__le32 cgn_latency_hr [ 24 ] ;
__le32 cgn_latency_day [ LPFC_MAX_CGN_DAYS ] ;
/* Bandwidth Information */
__le16 cgn_bw_min [ 60 ] ;
__le16 cgn_bw_hr [ 24 ] ;
__le16 cgn_bw_day [ LPFC_MAX_CGN_DAYS ] ;
/* Congestion Alarms */
__le16 cgn_alarm_min [ 60 ] ;
__le32 cgn_alarm_hr [ 24 ] ;
__le32 cgn_alarm_day [ LPFC_MAX_CGN_DAYS ] ;
2021-12-08 11:59:57 -08:00
struct_group ( cgn_stat ,
uint8_t cgn_stat_npm ; /* Notifications per minute */
/* Start Time */
uint8_t cgn_stat_month ;
uint8_t cgn_stat_day ;
uint8_t cgn_stat_year ;
uint8_t cgn_stat_hour ;
uint8_t cgn_stat_minute ;
uint8_t cgn_pad2 [ 2 ] ;
__le32 cgn_notification ;
__le32 cgn_peer_notification ;
__le32 link_integ_notification ;
__le32 delivery_notification ;
uint8_t cgn_stat_cgn_month ; /* Last congestion notification FPIN */
uint8_t cgn_stat_cgn_day ;
uint8_t cgn_stat_cgn_year ;
uint8_t cgn_stat_cgn_hour ;
uint8_t cgn_stat_cgn_min ;
uint8_t cgn_stat_cgn_sec ;
uint8_t cgn_stat_peer_month ; /* Last peer congestion FPIN */
uint8_t cgn_stat_peer_day ;
uint8_t cgn_stat_peer_year ;
uint8_t cgn_stat_peer_hour ;
uint8_t cgn_stat_peer_min ;
uint8_t cgn_stat_peer_sec ;
uint8_t cgn_stat_lnk_month ; /* Last link integrity FPIN */
uint8_t cgn_stat_lnk_day ;
uint8_t cgn_stat_lnk_year ;
uint8_t cgn_stat_lnk_hour ;
uint8_t cgn_stat_lnk_min ;
uint8_t cgn_stat_lnk_sec ;
uint8_t cgn_stat_del_month ; /* Last delivery notification FPIN */
uint8_t cgn_stat_del_day ;
uint8_t cgn_stat_del_year ;
uint8_t cgn_stat_del_hour ;
uint8_t cgn_stat_del_min ;
uint8_t cgn_stat_del_sec ;
) ;
2021-08-16 09:28:51 -07:00
__le32 cgn_info_crc ;
# define LPFC_CGN_CRC32_MAGIC_NUMBER 0x1EDC6F41
# define LPFC_CGN_CRC32_SEED 0xFFFFFFFF
} ;
# define LPFC_CGN_INFO_SZ (sizeof(struct lpfc_cgn_info) - \
sizeof ( uint32_t ) )
2021-08-16 09:28:54 -07:00
struct lpfc_cgn_stat {
atomic64_t total_bytes ;
atomic64_t rcv_bytes ;
atomic64_t rx_latency ;
# define LPFC_CGN_NOT_SENT 0xFFFFFFFFFFFFFFFFLL
atomic_t rx_io_cnt ;
} ;
2021-08-16 09:28:50 -07:00
struct lpfc_cgn_acqe_stat {
atomic64_t alarm ;
atomic64_t warn ;
} ;
2007-06-17 19:56:36 -05:00
struct lpfc_vport {
struct lpfc_hba * phba ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
struct list_head listentry ;
2007-06-17 19:56:36 -05:00
uint8_t port_type ;
# define LPFC_PHYSICAL_PORT 1
# define LPFC_NPIV_PORT 2
# define LPFC_FABRIC_PORT 3
enum discovery_state port_state ;
2007-06-17 19:56:38 -05:00
uint16_t vpi ;
2009-05-22 14:51:39 -04:00
uint16_t vfi ;
2009-11-18 15:39:16 -05:00
uint8_t vpi_state ;
# define LPFC_VPI_REGISTERED 0x1
2007-06-17 19:56:36 -05:00
uint32_t fc_flag ; /* FC flags */
/* Several of these flags are HBA centric and should be moved to
* phba - > link_flag ( e . g . FC_PTP , FC_PUBLIC_LOOP )
*/
2007-06-17 19:56:38 -05:00
# define FC_PT2PT 0x1 /* pt2pt with no fabric */
# define FC_PT2PT_PLOGI 0x2 /* pt2pt initiate PLOGI */
# define FC_DISC_TMO 0x4 /* Discovery timer running */
# define FC_PUBLIC_LOOP 0x8 /* Public loop */
# define FC_LBIT 0x10 /* LOGIN bit in loopinit set */
# define FC_RSCN_MODE 0x20 /* RSCN cmd rcv'ed */
# define FC_NLP_MORE 0x40 /* More node to process in node tbl */
# define FC_OFFLINE_MODE 0x80 /* Interface is offline for diag */
# define FC_FABRIC 0x100 /* We are fabric attached */
2010-03-15 11:25:44 -04:00
# define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */
2007-06-17 19:56:38 -05:00
# define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */
2010-03-15 11:25:44 -04:00
# define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/
2007-06-17 19:56:38 -05:00
# define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */
# define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */
# define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */
# define FC_BYPASSED_MODE 0x20000 /* NPort is in bypassed mode */
# define FC_VPORT_NEEDS_REG_VPI 0x80000 /* Needs to have its vpi registered */
# define FC_RSCN_DEFERRED 0x100000 /* A deferred RSCN being processed */
2009-07-19 10:01:26 -04:00
# define FC_VPORT_NEEDS_INIT_VPI 0x200000 /* Need to INIT_VPI before FDISC */
2010-01-26 23:08:03 -05:00
# define FC_VPORT_CVL_RCVD 0x400000 /* VLink failed due to CVL */
# define FC_VFI_REGISTERED 0x800000 /* VFI is registered */
# define FC_FDISC_COMPLETED 0x1000000 /* FDISC completed */
2011-02-16 12:39:44 -05:00
# define FC_DISC_DELAYED 0x2000000 /* Delay NPort discovery */
2007-06-17 19:56:36 -05:00
2007-10-27 13:37:17 -04:00
uint32_t ct_flags ;
# define FC_CT_RFF_ID 0x1 /* RFF_ID accepted by switch */
# define FC_CT_RNN_ID 0x2 /* RNN_ID accepted by switch */
# define FC_CT_RSNN_NN 0x4 /* RSNN_NN accepted by switch */
# define FC_CT_RSPN_ID 0x8 /* RSPN_ID accepted by switch */
# define FC_CT_RFT_ID 0x10 /* RFT_ID accepted by switch */
2007-06-17 19:56:36 -05:00
struct list_head fc_nodes ;
/* Keep counters for the number of entries in each list. */
uint16_t fc_plogi_cnt ;
uint16_t fc_adisc_cnt ;
uint16_t fc_reglogin_cnt ;
uint16_t fc_prli_cnt ;
uint16_t fc_unmap_cnt ;
uint16_t fc_map_cnt ;
uint16_t fc_npr_cnt ;
uint16_t fc_unused_cnt ;
struct serv_parm fc_sparam ; /* buffer for our service parameters */
uint32_t fc_myDID ; /* fibre channel S_ID */
uint32_t fc_prevDID ; /* previous fibre channel S_ID */
2011-02-16 12:39:44 -05:00
struct lpfc_name fabric_portname ;
struct lpfc_name fabric_nodename ;
2007-06-17 19:56:36 -05:00
int32_t stopped ; /* HBA has not been restarted since last ERATT */
uint8_t fc_linkspeed ; /* Link speed after last READ_LA */
2017-02-12 13:52:31 -08:00
uint32_t num_disc_nodes ; /* in addition to hba_state */
uint32_t gidft_inp ; /* cnt of outstanding GID_FTs */
2007-06-17 19:56:36 -05:00
uint32_t fc_nlp_cnt ; /* outstanding NODELIST requests */
uint32_t fc_rscn_id_cnt ; /* count of RSCNs payloads in list */
2008-02-08 18:50:14 -05:00
uint32_t fc_rscn_flush ; /* flag use of fc_rscn_id_list */
2007-06-17 19:56:36 -05:00
struct lpfc_dmabuf * fc_rscn_id_list [ FC_MAX_HOLD_RSCN ] ;
struct lpfc_name fc_nodename ; /* fc nodename */
struct lpfc_name fc_portname ; /* fc portname */
struct lpfc_work_evt disc_timeout_evt ;
struct timer_list fc_disctmo ; /* Discovery rescue timer */
uint8_t fc_ns_retry ; /* retries for fabric nameserver */
uint32_t fc_prli_sent ; /* cntr for outstanding PRLIs */
spinlock_t work_port_lock ;
uint32_t work_port_events ; /* Timeout to be handled */
2007-06-17 19:56:39 -05:00
# define WORKER_DISC_TMO 0x1 /* vport: Discovery timeout */
# define WORKER_ELS_TMO 0x2 /* vport: ELS timeout */
2011-02-16 12:39:44 -05:00
# define WORKER_DELAYED_DISC_TMO 0x8 /* vport: delayed discovery */
2007-06-17 19:56:39 -05:00
# define WORKER_MBOX_TMO 0x100 /* hba: MBOX timeout */
# define WORKER_HB_TMO 0x200 /* hba: Heart beat timeout */
2008-02-03 17:28:22 +02:00
# define WORKER_FABRIC_BLOCK_TMO 0x400 /* hba: fabric block timeout */
2007-06-17 19:56:39 -05:00
# define WORKER_RAMP_DOWN_QUEUE 0x800 /* hba: Decrease Q depth */
# define WORKER_RAMP_UP_QUEUE 0x1000 /* hba: Increase Q depth */
2010-06-07 15:24:45 -04:00
# define WORKER_SERVICE_TXQ 0x2000 /* hba: IOCBs on the txq */
2021-06-08 10:05:47 +05:30
# define WORKER_CHECK_INACTIVE_VMID 0x4000 /* hba: check inactive vmids */
# define WORKER_CHECK_VMID_ISSUE_QFPA 0x8000 / * vport: Check if qfpa needs
* to be issued */
2007-06-17 19:56:36 -05:00
struct timer_list els_tmofunc ;
2011-02-16 12:39:44 -05:00
struct timer_list delayed_disc_tmo ;
2007-06-17 19:56:36 -05:00
uint8_t load_flag ;
# define FC_LOADING 0x1 /* HBA in process of loading drvr */
# define FC_UNLOADING 0x2 /* HBA in process of unloading drvr */
2015-12-16 18:11:58 -05:00
# define FC_ALLOW_FDMI 0x4 /* port is ready for FDMI requests */
2021-06-08 10:05:47 +05:30
# define FC_ALLOW_VMID 0x8 /* Allow VMID I/Os */
# define FC_DEREGISTER_ALL_APP_ID 0x10 /* Deregister all VMIDs */
2007-08-02 11:09:59 -04:00
/* Vport Config Parameters */
uint32_t cfg_scan_down ;
uint32_t cfg_lun_queue_depth ;
uint32_t cfg_nodev_tmo ;
uint32_t cfg_devloss_tmo ;
uint32_t cfg_restrict_login ;
uint32_t cfg_peer_port_login ;
uint32_t cfg_fcp_class ;
uint32_t cfg_use_adisc ;
uint32_t cfg_discovery_threads ;
2007-08-02 11:10:09 -04:00
uint32_t cfg_log_verbose ;
2019-01-28 11:14:38 -08:00
uint32_t cfg_enable_fc4_type ;
2007-08-02 11:09:59 -04:00
uint32_t cfg_max_luns ;
2007-10-27 13:37:17 -04:00
uint32_t cfg_enable_da_id ;
2008-09-07 11:52:04 -04:00
uint32_t cfg_max_scsicmpl_time ;
2010-07-14 15:32:10 -04:00
uint32_t cfg_tgt_queue_depth ;
2013-07-15 18:35:04 -04:00
uint32_t cfg_first_burst_size ;
2007-08-02 11:09:59 -04:00
uint32_t dev_loss_tmo_changed ;
2021-06-08 10:05:47 +05:30
/* VMID parameters */
u8 lpfc_vmid_host_uuid [ LPFC_COMPRESS_VMID_SIZE ] ;
u32 max_vmid ; /* maximum VMIDs allowed per port */
u32 cur_vmid_cnt ; /* Current VMID count */
# define LPFC_MIN_VMID 4
# define LPFC_MAX_VMID 255
u32 vmid_inactivity_timeout ; /* Time after which the VMID */
/* deregisters from switch */
u32 vmid_priority_tagging ;
# define LPFC_VMID_PRIO_TAG_DISABLE 0 /* Disable */
# define LPFC_VMID_PRIO_TAG_SUP_TARGETS 1 /* Allow supported targets only */
# define LPFC_VMID_PRIO_TAG_ALL_TARGETS 2 /* Allow all targets */
unsigned long * vmid_priority_range ;
# define LPFC_VMID_MAX_PRIORITY_RANGE 256
# define LPFC_VMID_PRIORITY_BITMAP_SIZE 32
u8 vmid_flag ;
# define LPFC_VMID_IN_USE 0x1
# define LPFC_VMID_ISSUE_QFPA 0x2
# define LPFC_VMID_QFPA_CMPL 0x4
# define LPFC_VMID_QOS_ENABLED 0x8
# define LPFC_VMID_TIMER_ENBLD 0x10
struct fc_qfpa_res * qfpa_res ;
2007-08-02 11:10:31 -04:00
struct fc_vport * fc_vport ;
2021-06-08 10:05:47 +05:30
struct lpfc_vmid * vmid ;
DECLARE_HASHTABLE ( hash_table , 8 ) ;
rwlock_t vmid_lock ;
struct lpfc_vmid_priority_info vmid_priority ;
2008-12-04 22:40:07 -05:00
# ifdef CONFIG_SCSI_LPFC_DEBUG_FS
2007-08-02 11:10:31 -04:00
struct dentry * debug_disc_trc ;
struct dentry * debug_nodelist ;
2017-02-12 13:52:33 -08:00
struct dentry * debug_nvmestat ;
2019-01-28 11:14:25 -08:00
struct dentry * debug_scsistat ;
2020-03-22 11:13:02 -07:00
struct dentry * debug_ioktime ;
2020-03-22 11:13:00 -07:00
struct dentry * debug_hdwqstat ;
2007-08-02 11:10:31 -04:00
struct dentry * vport_debugfs_root ;
struct lpfc_debugfs_trc * disc_trc ;
atomic_t disc_trc_cnt ;
# endif
2008-09-07 11:52:10 -04:00
uint8_t stat_data_enabled ;
uint8_t stat_data_blocked ;
2009-05-22 14:51:39 -04:00
struct list_head rcv_buffer_list ;
2009-10-02 15:17:02 -04:00
unsigned long rcv_buffer_time_stamp ;
2009-05-22 14:51:39 -04:00
uint32_t vport_flag ;
# define STATIC_VPORT 1
2017-04-21 16:05:02 -07:00
# define FAWWPN_SET 2
# define FAWWPN_PARAM_CHG 4
2015-12-16 18:11:58 -05:00
uint16_t fdmi_num_disc ;
uint32_t fdmi_hba_mask ;
uint32_t fdmi_port_mask ;
2017-02-12 13:52:30 -08:00
/* There is a single nvme instance per vport. */
struct nvme_fc_local_port * localport ;
uint8_t nvmei_support ; /* driver supports NVME Initiator */
uint32_t last_fcp_wqidx ;
2018-10-23 13:41:08 -07:00
uint32_t rcv_flogi_cnt ; /* How many unsol FLOGIs ACK'd. */
2007-06-17 19:56:36 -05:00
} ;
2007-06-17 19:56:37 -05:00
struct hbq_s {
uint16_t entry_count ; /* Current number of HBQ slots */
2007-10-27 13:37:53 -04:00
uint16_t buffer_count ; /* Current number of buffers posted */
2007-06-17 19:56:37 -05:00
uint32_t next_hbqPutIdx ; /* Index to next HBQ slot to use */
uint32_t hbqPutIdx ; /* HBQ slot to use */
uint32_t local_hbqGetIdx ; /* Local copy of Get index from Port */
2007-08-02 11:10:31 -04:00
void * hbq_virt ; /* Virtual ptr to this hbq */
struct list_head hbq_buffer_list ; /* buffers assigned to this HBQ */
/* Callback for HBQ buffer allocation */
struct hbq_dmabuf * ( * hbq_alloc_buffer ) ( struct lpfc_hba * ) ;
/* Callback for HBQ buffer free */
void ( * hbq_free_buffer ) ( struct lpfc_hba * ,
struct hbq_dmabuf * ) ;
2007-06-17 19:56:37 -05:00
} ;
2007-08-02 11:10:31 -04:00
/* this matches the position in the lpfc_hbq_defs array */
2007-06-17 19:56:38 -05:00
# define LPFC_ELS_HBQ 0
2017-02-12 13:52:30 -08:00
# define LPFC_MAX_HBQS 1
2007-06-17 19:56:37 -05:00
2007-10-27 13:38:11 -04:00
enum hba_temp_state {
HBA_NORMAL_TEMP ,
HBA_OVER_TEMP
} ;
2008-02-08 18:49:51 -05:00
enum intr_type_t {
NONE = 0 ,
INTx ,
MSI ,
MSIX ,
} ;
2013-01-03 15:43:37 -05:00
# define LPFC_CT_CTX_MAX 64
2009-07-19 10:01:32 -04:00
struct unsol_rcv_ct_ctx {
uint32_t ctxt_id ;
uint32_t SID ;
2013-01-03 15:43:37 -05:00
uint32_t valid ;
# define UNSOL_INVALID 0
# define UNSOL_VALID 1
2011-07-22 18:36:52 -04:00
uint16_t oxid ;
uint16_t rxid ;
2009-07-19 10:01:32 -04:00
} ;
2010-11-20 23:11:48 -05:00
# define LPFC_USER_LINK_SPEED_AUTO 0 /* auto select (default)*/
# define LPFC_USER_LINK_SPEED_1G 1 /* 1 Gigabaud */
# define LPFC_USER_LINK_SPEED_2G 2 /* 2 Gigabaud */
# define LPFC_USER_LINK_SPEED_4G 4 /* 4 Gigabaud */
# define LPFC_USER_LINK_SPEED_8G 8 /* 8 Gigabaud */
# define LPFC_USER_LINK_SPEED_10G 10 /* 10 Gigabaud */
# define LPFC_USER_LINK_SPEED_16G 16 /* 16 Gigabaud */
2015-08-31 16:48:17 -04:00
# define LPFC_USER_LINK_SPEED_32G 32 /* 32 Gigabaud */
2018-02-22 08:18:45 -08:00
# define LPFC_USER_LINK_SPEED_64G 64 /* 64 Gigabaud */
# define LPFC_USER_LINK_SPEED_MAX LPFC_USER_LINK_SPEED_64G
# define LPFC_LINK_SPEED_STRING "0, 1, 2, 4, 8, 10, 16, 32, 64"
2010-11-20 23:11:48 -05:00
2011-05-24 11:44:28 -04:00
enum nemb_type {
nemb_mse = 1 ,
nemb_hbd
} ;
enum mbox_type {
mbox_rd = 1 ,
mbox_wr
} ;
enum dma_type {
dma_mbox = 1 ,
dma_ebuf
} ;
enum sta_type {
sta_pre_addr = 1 ,
sta_pos_addr
} ;
struct lpfc_mbox_ext_buf_ctx {
uint32_t state ;
# define LPFC_BSG_MBOX_IDLE 0
# define LPFC_BSG_MBOX_HOST 1
# define LPFC_BSG_MBOX_PORT 2
# define LPFC_BSG_MBOX_DONE 3
# define LPFC_BSG_MBOX_ABTS 4
enum nemb_type nembType ;
enum mbox_type mboxType ;
uint32_t numBuf ;
uint32_t mbxTag ;
uint32_t seqNum ;
struct lpfc_dmabuf * mbx_dmabuf ;
struct list_head ext_dmabuf_list ;
} ;
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
struct lpfc_epd_pool {
/* Expedite pool */
struct list_head list ;
u32 count ;
spinlock_t lock ; /* lock for expedite pool */
} ;
2019-10-18 14:18:27 -07:00
enum ras_state {
INACTIVE ,
REG_INPROGRESS ,
ACTIVE
} ;
2018-09-10 10:30:50 -07:00
struct lpfc_ras_fwlog {
uint8_t * fwlog_buff ;
uint32_t fw_buffcount ; /* Buffer size posted to FW */
# define LPFC_RAS_BUFF_ENTERIES 16 /* Each entry can hold max of 64k */
# define LPFC_RAS_MAX_ENTRY_SIZE (64 * 1024)
# define LPFC_RAS_MIN_BUFF_POST_SIZE (256 * 1024)
# define LPFC_RAS_MAX_BUFF_POST_SIZE (1024 * 1024)
uint32_t fw_loglevel ; /* Log level set */
struct lpfc_dmabuf lwpd ;
struct list_head fwlog_buff_list ;
/* RAS support status on adapter */
bool ras_hwsupport ; /* RAS Support available on HW or not */
bool ras_enabled ; /* Ras Enabled for the function */
# define LPFC_RAS_DISABLE_LOGGING 0x00
# define LPFC_RAS_ENABLE_LOGGING 0x01
2019-10-18 14:18:27 -07:00
enum ras_state state ; /* RAS logging running state */
2018-09-10 10:30:50 -07:00
} ;
2020-06-30 14:50:00 -07:00
# define DBG_LOG_STR_SZ 256
# define DBG_LOG_SZ 256
struct dbg_log_ent {
char log [ DBG_LOG_STR_SZ ] ;
u64 t_ns ;
} ;
2020-05-01 14:43:06 -07:00
enum lpfc_irq_chann_mode {
/* Assign IRQs to all possible cpus that have hardware queues */
NORMAL_MODE ,
/* Assign IRQs only to cpus on the same numa node as HBA */
NUMA_MODE ,
/* Assign IRQs only on non-hyperthreaded CPUs. This is the
* same as normal_mode , but assign IRQS only on physical CPUs .
*/
NHT_MODE ,
} ;
2005-04-17 16:05:31 -05:00
struct lpfc_hba {
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
/* SCSI interface function jump table entries */
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
struct lpfc_io_buf * ( * lpfc_get_scsi_buf )
2019-01-28 11:14:27 -08:00
( struct lpfc_hba * phba , struct lpfc_nodelist * ndlp ,
struct scsi_cmnd * cmnd ) ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
int ( * lpfc_scsi_prep_dma_buf )
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
( struct lpfc_hba * , struct lpfc_io_buf * ) ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
void ( * lpfc_scsi_unprep_dma_buf )
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
( struct lpfc_hba * , struct lpfc_io_buf * ) ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
void ( * lpfc_release_scsi_buf )
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
( struct lpfc_hba * , struct lpfc_io_buf * ) ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
void ( * lpfc_rampdown_queue_depth )
( struct lpfc_hba * ) ;
void ( * lpfc_scsi_prep_cmnd )
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
( struct lpfc_vport * , struct lpfc_io_buf * ,
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
struct lpfc_nodelist * ) ;
2020-11-15 11:26:42 -08:00
int ( * lpfc_scsi_prep_cmnd_buf )
( struct lpfc_vport * vport ,
struct lpfc_io_buf * lpfc_cmd ,
uint8_t tmo ) ;
2022-02-24 18:23:04 -08:00
int ( * lpfc_scsi_prep_task_mgmt_cmd )
( struct lpfc_vport * vport ,
struct lpfc_io_buf * lpfc_cmd ,
u64 lun , u8 task_mgmt_cmd ) ;
2012-01-18 16:25:09 -05:00
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
/* IOCB interface function jump table entries */
int ( * __lpfc_sli_issue_iocb )
( struct lpfc_hba * , uint32_t ,
struct lpfc_iocbq * , uint32_t ) ;
2020-11-15 11:26:41 -08:00
int ( * __lpfc_sli_issue_fcp_io )
( struct lpfc_hba * phba , uint32_t ring_number ,
struct lpfc_iocbq * piocb , uint32_t flag ) ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
void ( * __lpfc_sli_release_iocbq ) ( struct lpfc_hba * ,
struct lpfc_iocbq * ) ;
int ( * lpfc_hba_down_post ) ( struct lpfc_hba * phba ) ;
void ( * lpfc_scsi_cmd_iocb_cmpl )
( struct lpfc_hba * , struct lpfc_iocbq * , struct lpfc_iocbq * ) ;
/* MBOX interface function jump table entries */
int ( * lpfc_sli_issue_mbox )
( struct lpfc_hba * , LPFC_MBOXQ_t * , uint32_t ) ;
2012-01-18 16:25:09 -05:00
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
/* Slow-path IOCB process function jump table entries */
void ( * lpfc_sli_handle_slow_ring_event )
( struct lpfc_hba * phba , struct lpfc_sli_ring * pring ,
uint32_t mask ) ;
2012-01-18 16:25:09 -05:00
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
/* INIT device interface function jump table entries */
int ( * lpfc_sli_hbq_to_firmware )
( struct lpfc_hba * , uint32_t , struct hbq_dmabuf * ) ;
int ( * lpfc_sli_brdrestart )
( struct lpfc_hba * ) ;
int ( * lpfc_sli_brdready )
( struct lpfc_hba * , uint32_t ) ;
void ( * lpfc_handle_eratt )
( struct lpfc_hba * ) ;
void ( * lpfc_stop_port )
( struct lpfc_hba * ) ;
2010-02-12 14:42:33 -05:00
int ( * lpfc_hba_init_link )
2010-06-07 15:23:35 -04:00
( struct lpfc_hba * , uint32_t ) ;
2010-02-12 14:42:33 -05:00
int ( * lpfc_hba_down_link )
2010-06-07 15:23:35 -04:00
( struct lpfc_hba * , uint32_t ) ;
2011-03-11 16:05:52 -05:00
int ( * lpfc_selective_reset )
( struct lpfc_hba * ) ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
2012-01-18 16:25:09 -05:00
int ( * lpfc_bg_scsi_prep_dma_buf )
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
( struct lpfc_hba * , struct lpfc_io_buf * ) ;
2022-02-24 18:22:55 -08:00
/* Prep SLI WQE/IOCB jump table entries */
void ( * __lpfc_sli_prep_els_req_rsp ) ( struct lpfc_iocbq * cmdiocbq ,
struct lpfc_vport * vport ,
struct lpfc_dmabuf * bmp ,
u16 cmd_size , u32 did , u32 elscmd ,
u8 tmo , u8 expect_rsp ) ;
2022-02-24 18:23:03 -08:00
void ( * __lpfc_sli_prep_gen_req ) ( struct lpfc_iocbq * cmdiocbq ,
struct lpfc_dmabuf * bmp , u16 rpi ,
u32 num_entry , u8 tmo ) ;
void ( * __lpfc_sli_prep_xmit_seq64 ) ( struct lpfc_iocbq * cmdiocbq ,
struct lpfc_dmabuf * bmp , u16 rpi ,
u16 ox_id , u32 num_entry , u8 rctl ,
u8 last_seq , u8 cr_cx_cmd ) ;
2022-02-24 18:23:05 -08:00
void ( * __lpfc_sli_prep_abort_xri ) ( struct lpfc_iocbq * cmdiocbq ,
u16 ulp_context , u16 iotag ,
u8 ulp_class , u16 cqid , bool ia ) ;
2012-01-18 16:25:09 -05:00
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
/* expedite pool */
struct lpfc_epd_pool epd_pool ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
/* SLI4 specific HBA data structure */
struct lpfc_sli4_hba sli4_hba ;
2017-09-29 17:34:34 -07:00
struct workqueue_struct * wq ;
scsi: lpfc: Rework EQ/CQ processing to address interrupt coalescing
When driving high iop counts, auto_imax coalescing kicks in and drives the
performance to extremely small iops levels.
There are two issues:
1) auto_imax is enabled by default. The auto algorithm, when iops gets
high, divides the iops by the hdwq count and uses that value to
calculate EQ_Delay. The EQ_Delay is set uniformly on all EQs whether
they have load or not. The EQ_delay is only manipulated every 5s (a
long time). Thus there were large 5s swings of no interrupt delay
followed by large/maximum delay, before repeating.
2) When processing a CQ, the driver got mixed up on the rate of when
to ring the doorbell to keep the chip appraised of the eqe or cqe
consumption as well as how how long to sit in the thread and
process queue entries. Currently, the driver capped its work at
64 entries (very small) and exited/rearmed the CQ. Thus, on heavy
loads, additional overheads were taken to exit and re-enter the
interrupt handler. Worse, if in the large/maximum coalescing
windows,k it could be a while before getting back to servicing.
The issues are corrected by the following:
- A change in defaults. Auto_imax is turned OFF and fcp_imax is set
to 0. Thus all interrupts are immediate.
- Cleanup of field names and their meanings. Existing names were
non-intuitive or used for duplicate things.
- Added max_proc_limit field, to control the length of time the
handlers would service completions.
- Reworked EQ handling:
Added common routine that walks eq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after eqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Moved lpfc_sli4_eq_flush(), which does similar action, to same area.
Replaced the 2 individual loops that walk an eq with a call to the
common routine.
Slightly revised lpfc_sli4_hba_handle_eqe() calling syntax.
Added per-cpu counters to detect interrupt rates and scale
interrupt coalescing values.
- Reworked CQ handling:
Added common routine that walks cq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after cqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Replaced the 3 individual loops that walk a cq with a call to the
common routine.
Redefined lpfc_sli4_sp_handle_mcqe() to commong handler definition with
queue reference. Add increment for mbox completion to handler.
- Added a new module/sysfs attribute: lpfc_cq_max_proc_limit To allow
dynamic changing of the CQ max_proc_limit value being used.
Although this leaves an EQ as an immediate interrupt, that interrupt will
only occur if a CQ bound to it is in an armed state and has cqe's to
process. By staying in the cq processing routine longer, high loads will
avoid generating more interrupts as they will only rearm as the processing
thread exits. The immediately interrupt is also beneficial to idle or
lower-processing CQ's as they get serviced immediately without being
penalized by sharing an EQ with a more loaded CQ.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:33 -08:00
struct delayed_work eq_delay_work ;
2017-09-29 17:34:34 -07:00
2020-06-30 14:49:59 -07:00
# define LPFC_IDLE_STAT_DELAY 1000
struct delayed_work idle_stat_delay_work ;
2005-04-17 16:05:31 -05:00
struct lpfc_sli sli ;
[SCSI] lpfc 8.3.2 : Reorganization for SLI4
Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.
For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2009-05-22 14:50:54 -04:00
uint8_t pci_dev_grp ; /* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
uint32_t sli_rev ; /* SLI2, SLI3, or SLI4 */
2007-06-17 19:56:37 -05:00
uint32_t sli3_options ; /* Mask of enabled SLI3 options */
2008-08-24 21:49:55 -04:00
# define LPFC_SLI3_HBQ_ENABLED 0x01
# define LPFC_SLI3_NPIV_ENABLED 0x02
# define LPFC_SLI3_VPORT_TEARDOWN 0x04
# define LPFC_SLI3_CRP_ENABLED 0x08
2008-12-04 22:39:46 -05:00
# define LPFC_SLI3_BG_ENABLED 0x20
2009-05-22 14:51:39 -04:00
# define LPFC_SLI3_DSS_ENABLED 0x40
2011-02-16 12:39:24 -05:00
# define LPFC_SLI4_PERFH_ENABLED 0x80
# define LPFC_SLI4_PHWQ_ENABLED 0x100
2007-06-17 19:56:37 -05:00
uint32_t iocb_cmd_size ;
uint32_t iocb_rsp_size ;
2007-06-17 19:56:36 -05:00
2018-10-23 13:41:11 -07:00
struct lpfc_trunk_link trunk_link ;
2007-06-17 19:56:36 -05:00
enum hba_state link_state ;
uint32_t link_flag ; /* link state flags */
2007-08-02 11:10:37 -04:00
# define LS_LOOPBACK_MODE 0x1 /* NPort is in Loopback mode */
2007-06-17 19:56:36 -05:00
/* This flag is set while issuing */
/* INIT_LINK mailbox command */
2007-06-17 19:56:38 -05:00
# define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */
2008-02-08 18:49:39 -05:00
# define LS_IGNORE_ERATT 0x4 /* intr handler should ignore ERATT */
2017-05-15 15:20:51 -07:00
# define LS_MDS_LINK_DOWN 0x8 /* MDS Diagnostics Link Down */
2020-10-20 13:27:17 -07:00
# define LS_MDS_LOOPBACK 0x10 /* MDS Diagnostics Link Up (Loopback) */
# define LS_CT_VEN_RPA 0x20 /* Vendor RPA sent to switch */
2007-06-17 19:56:36 -05:00
2008-08-24 21:50:30 -04:00
uint32_t hba_flag ; /* hba generic flags */
# define HBA_ERATT_HANDLED 0x1 /* This flag is set when eratt handled */
2009-05-22 14:51:39 -04:00
# define DEFER_ERATT 0x2 /* Deferred error attention in progress */
2010-11-20 23:11:48 -05:00
# define HBA_FCOE_MODE 0x4 /* HBA function in FCoE Mode */
2009-10-02 15:17:02 -04:00
# define HBA_SP_QUEUE_EVT 0x8 /* Slow-path qevt posted to worker thread*/
2009-05-22 14:51:39 -04:00
# define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */
2019-10-18 14:18:30 -07:00
# define HBA_PERSISTENT_TOPO 0x20 /* Persistent topology support in hba */
2020-10-20 13:27:12 -07:00
# define ELS_XRI_ABORT_EVENT 0x40 /* ELS_XRI abort event was queued */
2009-05-22 14:51:39 -04:00
# define ASYNC_EVENT 0x80
2009-07-19 10:01:10 -04:00
# define LINK_DISABLED 0x100 /* Link disabled by user */
2010-10-22 11:06:08 -04:00
# define FCF_TS_INPROG 0x200 /* FCF table scan in progress */
# define FCF_RR_INPROG 0x400 /* FCF roundrobin flogi in progress */
# define HBA_FIP_SUPPORT 0x800 /* FIP support in HBA */
# define HBA_AER_ENABLED 0x1000 /* AER enabled with HBA */
# define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */
2010-11-20 23:11:55 -05:00
# define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */
2019-08-14 16:57:11 -07:00
# define HBA_IOQ_FLUSH 0x8000 /* FCP/NVME I/O queues being flushed */
2016-07-06 12:35:56 -07:00
# define HBA_RECOVERABLE_UE 0x20000 /* Firmware supports recoverable UE */
2016-10-13 15:06:16 -07:00
# define HBA_FORCED_LINK_SPEED 0x40000 / *
* Firmware supports Forced Link Speed
* capability
*/
scsi: lpfc: Fix EEH support for NVMe I/O
Injecting errors on the PCI slot while the driver is handling NVMe I/O will
cause crashes and hangs.
There are several rather difficult scenarios occurring. The main issue is
that the adapter can report a PCI error before or simultaneously to the PCI
subsystem reporting the error. Both paths have different entry points and
currently there is no interlock between them. Thus multiple teardown paths
are competing and all heck breaks loose.
Complicating things is the NVMs path. To a large degree, I/O was able to be
shutdown for a full FC port on the SCSI stack. But on NVMe, there isn't a
similar call. At best, it works on a per-controller basis, but even at the
controller level, it's a controller "reset" call. All of which means I/O is
still flowing on different CPUs with reset paths expecting hw access
(mailbox commands) to execute properly.
The following modifications are made:
- A new flag is set in PCI error entrypoints so the driver can track being
called by that path.
- An interlock is added in the SLI hw error path and the PCI error path
such that only one of the paths proceeds with the teardown logic.
- RPI cleanup is patched such that RPIs are marked unregistered w/o mbx
cmds in cases of hw error.
- If entering the SLI port re-init calls, a case where SLI error teardown
was quick and beat the PCI calls now reporting error, check whether the
SLI port is still live on the PCI bus.
- In the PCI reset code to bring the adapter back, recheck the IRQ
settings. Different checks for SLI3 vs SLI4.
- In I/O completions, that may be called as part of the cleanup or
underway just before the hw error, check the state of the adapter. If
in error, shortcut handling that would expect further adapter
completions as the hw error won't be sending them.
- In routines waiting on I/O completions, which may have been in progress
prior to the hw error, detect the device is being torn down and abort
from their waits and just give up. This points to a larger issue in the
driver on ref-counting for data structures, as it doesn't have
ref-counting on q and port structures. We'll do this fix for now as it
would be a major rework to be done differently.
- Fix the NVMe cleanup to simulate NVMe I/O completions if I/O is being
failed back due to hw error.
- In I/O buf allocation, done at the start of new I/Os, check hw state and
fail if hw error.
Link: https://lore.kernel.org/r/20210910233159.115896-10-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2021-09-10 16:31:54 -07:00
# define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */
scsi: lpfc: Defer LS_ACC to FLOGI on point to point logins
The current discovery state machine the driver treated FLOGI oddly. When
point to point, an FLOGI is to be exchanged by the two ports, with the port
with the most significant WWN then proceeding with PLOGI. The
implementation in the driver was keyed to closely with "what have I sent",
not with what has happened between the two endpoints. Thus, it blatantly
would ACC an FLOGI, but reject PLOGI's until it had its FLOGI ACC'd. The
problem is - the sending of FLOGI may be delayed for some reason, or the
response to FLOGI held off by the other side. In the failing situation the
other side sent an FLOGI, which was ACC'd, then sent PLOGIs which were then
rjt'd until the retry count for the PLOGIs were exceeded and the port gave
up. The FLOGI may have been very late in transmit, or the response held off
until the PLOGIs failed. Given the other port had the higher WWN, no PLOGIs
would occur and communication stopped.
Correct the situation by changing the FLOGI handling. Defer any response to
an FLOGI until the driver has sent its FLOGI as well. Then, upon either
completion of the sent FLOGI, or upon sending an ACC to a received FLOGI
(which may be received before or just after FLOGI was sent). the driver
will act on who has the higher WWN. if the other port does, the driver will
noop any handling of an FLOGI response (if outstanding) and wait for PLOGI.
If the local port does, the driver will transition to sending PLOGI and
will noop any action on responding to an FLOGI (if not yet received).
Fortunately, to implement this, it only took another state flag and
deferring any FLOGI response if the FLOGI has yet to be transmit. All
subsequent actions were already in place.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2018-11-29 16:09:36 -08:00
# define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
2021-12-03 16:26:42 -08:00
# define HBA_SHORT_CMF 0x200000 /* shorter CMF timer routine */
2021-08-16 09:28:54 -07:00
# define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */
2020-01-27 16:23:03 -08:00
# define HBA_DEFER_FLOGI 0x800000 /* Defer FLOGI till read_sparm cmpl */
2021-08-16 09:28:54 -07:00
# define HBA_SETUP 0x1000000 /* Signifies HBA setup is completed */
2021-01-04 10:02:27 -08:00
# define HBA_NEEDS_CFG_PORT 0x2000000 /* SLI3 - needs a CONFIG_PORT mbox */
scsi: lpfc: Implement health checking when aborting I/O
Several errors have occurred where the adapter stops or fails but does not
raise the register values for the driver to detect failure. Thus driver is
unaware of the failure. The failure typically results in I/O timeouts, the
I/O timeout handler failing (after several seconds), and the error handler
escalating recovery policy and resulting in more errors. Eventually, the
driver is in a position where things have spiraled and it can't do recovery
because other recovery ops are still outstanding and it becomes unusable.
Resolve the situation by having the I/O timeout handler (actually a els,
SCSI I/O, NVMe ls, or NVMe I/O timeout), in addition to aborting the I/O,
perform a mailbox command and look for a response from the hardware. If
the mailbox command fails, it will mark the adapter offline and then invoke
the adapter reset handler to clean up.
The new I/O timeout test will be limited to a test every 5s. If there are
multiple I/O timeouts concurrently, only the 1st I/O timeout will generate
the mailbox command. Further testing will only occur once a timeout occurs
after a 5s delay from the last mailbox command has expired.
Link: https://lore.kernel.org/r/20210104180240.46824-14-jsmart2021@gmail.com
Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2021-01-04 10:02:38 -08:00
# define HBA_HBEAT_INP 0x4000000 /* mbox HBEAT is in progress */
# define HBA_HBEAT_TMO 0x8000000 /* HBEAT initiated after timeout */
2021-03-01 09:18:11 -08:00
# define HBA_FLOGI_OUTSTANDING 0x10000000 /* FLOGI is outstanding */
2017-02-12 13:52:30 -08:00
2021-12-03 16:26:40 -08:00
struct completion * fw_dump_cmpl ; /* cmpl event tracker for fw_dump */
2009-10-02 15:17:02 -04:00
uint32_t fcp_ring_in_use ; /* When polling test if intr-hndlr active*/
2008-08-24 21:49:55 -04:00
struct lpfc_dmabuf slim2p ;
MAILBOX_t * mbox ;
2010-03-15 11:25:20 -04:00
uint32_t * mbox_ext ;
2011-05-24 11:44:28 -04:00
struct lpfc_mbox_ext_buf_ctx mbox_ext_buf_ctx ;
2008-08-24 21:50:30 -04:00
uint32_t ha_copy ;
2008-08-24 21:49:55 -04:00
struct _PCB * pcb ;
struct _IOCB * IOCBs ;
2007-06-17 19:56:36 -05:00
2008-08-24 21:49:55 -04:00
struct lpfc_dmabuf hbqslimp ;
2007-06-17 19:56:36 -05:00
2005-04-17 16:05:31 -05:00
uint16_t pci_cfg_value ;
uint8_t fc_linkspeed ; /* Link speed after last READ_LA */
uint32_t fc_eventTag ; /* event tag for link attention */
2009-10-02 15:16:39 -04:00
uint32_t link_events ;
2005-04-17 16:05:31 -05:00
/* These fields used to be binfo */
uint32_t fc_pref_DID ; /* preferred D_ID */
2007-06-17 19:56:38 -05:00
uint8_t fc_pref_ALPA ; /* preferred AL_PA */
2010-10-22 11:05:53 -04:00
uint32_t fc_edtovResol ; /* E_D_TOV timer resolution */
2005-04-17 16:05:31 -05:00
uint32_t fc_edtov ; /* E_D_TOV timer value */
uint32_t fc_arbtov ; /* ARB_TOV timer value */
uint32_t fc_ratov ; /* R_A_TOV timer value */
uint32_t fc_rttov ; /* R_T_TOV timer value */
uint32_t fc_altov ; /* AL_TOV timer value */
uint32_t fc_crtov ; /* C_R_TOV timer value */
struct serv_parm fc_fabparam ; /* fabric service parameters buffer */
uint8_t alpa_map [ 128 ] ; /* AL_PA map from READ_LA */
uint32_t lmt ;
uint32_t fc_topology ; /* link topology, from LINK INIT */
2013-04-17 20:15:19 -04:00
uint32_t fc_topology_changed ; /* link topology, from LINK INIT */
2005-04-17 16:05:31 -05:00
struct lpfc_stats fc_stat ;
struct lpfc_nodelist fc_fcpnodev ; /* nodelist entry for no device */
uint32_t nport_event_cnt ; /* timestamp for nlplist entry */
2007-06-17 19:56:36 -05:00
uint8_t wwnn [ 8 ] ;
uint8_t wwpn [ 8 ] ;
2005-04-17 16:05:31 -05:00
uint32_t RandomData [ 7 ] ;
2016-07-06 12:36:00 -07:00
uint8_t fcp_embed_io ;
2017-02-12 13:52:30 -08:00
uint8_t nvmet_support ; /* driver supports NVMET */
2017-02-12 13:52:34 -08:00
# define LPFC_NVMET_MAX_PORTS 32
2016-07-06 12:36:00 -07:00
uint8_t mds_diags_support ;
2017-08-23 16:55:47 -07:00
uint8_t bbcredit_support ;
2018-01-30 15:58:46 -08:00
uint8_t enab_exp_wqcq_pages ;
2019-08-14 16:57:10 -07:00
u8 nsler ; /* Firmware supports FC-NVMe-2 SLER */
2005-04-17 16:05:31 -05:00
2007-08-02 11:09:59 -04:00
/* HBA Config Parameters */
2005-04-17 16:05:31 -05:00
uint32_t cfg_ack0 ;
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
uint32_t cfg_xri_rebalancing ;
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.
Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI
to contain the exchanges Scatter/Gather List. This caps the number of SGL
elements that can be in the SGL. There are not extensions to extend the
list out of the 2 pages.
The G7 hardware adds a SGE type that allows the SGL to be vectored to a
different scatter/gather list segment. And that segment can contain a SGE
to go to another segment and so on. The initial segment must still be
pre-registered for the XRI, but it can be a much smaller amount (256Bytes)
as it can now be dynamically grown. This much smaller allocation can
handle the SG list for most normal I/O, and the dynamic aspect allows it to
support many MB's if needed.
The implementation creates a pool which contains "segments" and which is
initially sized to hold the initial small segment per xri. If an I/O
requires additional segments, they are allocated from the pool. If the
pool has no more segments, the pool is grown based on what is now
needed. After the I/O completes, the additional segments are returned to
the pool for use by other I/Os. Once allocated, the additional segments are
not released under the assumption of "if needed once, it will be needed
again". Pools are kept on a per-hardware queue basis, which is typically
1:1 per cpu, but may be shared by multiple cpus.
The switch to the smaller initial allocation significantly reduces the
memory footprint of the driver (which only grows if large ios are
issued). Based on the several K of XRIs for the adapter, the 8KB->256B
reduction can conserve 32MBs or more.
It has been observed with per-cpu resource pools that allocating a resource
on CPU A, may be put back on CPU B. While the get routines are distributed
evenly, only a limited subset of CPUs may be handling the put routines.
This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because
all the resources are being put on a limited subset of CPUs.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 16:57:09 -07:00
uint32_t cfg_xpsgl ;
2007-08-02 11:10:21 -04:00
uint32_t cfg_enable_npiv ;
2010-11-20 23:11:55 -05:00
uint32_t cfg_enable_rrq ;
2005-04-17 16:05:31 -05:00
uint32_t cfg_topology ;
uint32_t cfg_link_speed ;
2011-07-22 18:37:52 -04:00
# define LPFC_FCF_FOV 1 /* Fast fcf failover */
# define LPFC_FCF_PRIORITY 2 /* Priority fcf failover */
uint32_t cfg_fcf_failover_policy ;
2012-08-03 12:36:42 -04:00
uint32_t cfg_fcp_io_sched ;
2018-10-23 13:41:10 -07:00
uint32_t cfg_ns_query ;
2012-10-31 14:44:42 -04:00
uint32_t cfg_fcp2_no_tgt_reset ;
2005-04-17 16:05:31 -05:00
uint32_t cfg_cr_delay ;
uint32_t cfg_cr_count ;
2006-02-28 22:33:08 -05:00
uint32_t cfg_multi_ring_support ;
2006-12-02 13:34:16 -05:00
uint32_t cfg_multi_ring_rctl ;
uint32_t cfg_multi_ring_type ;
[SCSI] lpfc 8.1.1 : Add polled-mode support
- Add functionality to run in polled mode only. Includes run time
attribute to enable mode.
- Enable runtime writable hba settings for coallescing and delay parameters
Customers have requested a mode in the driver to run strictly polled.
This is generally to support an environment where the server is extremely
loaded and is looking to reclaim some cpu cycles from adapter interrupt
handling.
This patch adds a new "poll" attribute, and the following behavior:
if value is 0 (default):
The driver uses the normal method for i/o completion. It uses the
firmware feature of interrupt coalesing. The firmware allows a
minimum number of i/o completions before an interrupt, or a maximum
time delay between interrupts. By default, the driver sets these
to no delay (disabled) or 1 i/o - meaning coalescing is disabled.
Attributes were provided to change the coalescing values, but it was
a module-load time only and global across all adapters.
This patch allows them to be writable on a per-adapter basis.
if value is 1 :
Interrupts are left enabled, expecting that the user has tuned the
interrupt coalescing values. When this setting is enabled, the driver
will attempt to service completed i/o whenever new i/o is submitted
to the adapter. If the coalescing values are large, and the i/o
generation rate steady, an interrupt will be avoided by servicing
completed i/o prior to the coalescing thresholds kicking in. However,
if the i/o completion load is high enough or i/o generation slow, the
coalescion values will ensure that completed i/o is serviced in a timely
fashion.
if value is 3 :
Turns off FCP i/o interrupts altogether. The coalescing values now have
no effect. A new attribute "poll_tmo" (default 10ms) exists to set
the polling interval for i/o completion. When this setting is enabled,
the driver will attempt to service completed i/o and restart the
interval timer whenever new i/o is submitted. This behavior allows for
servicing of completed i/o sooner than the interval timer, but ensures
that if no i/o is being issued, then the interval timer will kick in
to service the outstanding i/o.
Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
2005-11-29 16:32:13 -05:00
uint32_t cfg_poll ;
uint32_t cfg_poll_tmo ;
2013-09-06 12:22:46 -04:00
uint32_t cfg_task_mgmt_tmo ;
2006-12-02 13:34:56 -05:00
uint32_t cfg_use_msi ;
2017-06-01 21:07:10 -07:00
uint32_t cfg_auto_imax ;
2009-05-22 14:51:39 -04:00
uint32_t cfg_fcp_imax ;
2019-05-14 14:58:08 -07:00
uint32_t cfg_force_rscn ;
scsi: lpfc: Rework EQ/CQ processing to address interrupt coalescing
When driving high iop counts, auto_imax coalescing kicks in and drives the
performance to extremely small iops levels.
There are two issues:
1) auto_imax is enabled by default. The auto algorithm, when iops gets
high, divides the iops by the hdwq count and uses that value to
calculate EQ_Delay. The EQ_Delay is set uniformly on all EQs whether
they have load or not. The EQ_delay is only manipulated every 5s (a
long time). Thus there were large 5s swings of no interrupt delay
followed by large/maximum delay, before repeating.
2) When processing a CQ, the driver got mixed up on the rate of when
to ring the doorbell to keep the chip appraised of the eqe or cqe
consumption as well as how how long to sit in the thread and
process queue entries. Currently, the driver capped its work at
64 entries (very small) and exited/rearmed the CQ. Thus, on heavy
loads, additional overheads were taken to exit and re-enter the
interrupt handler. Worse, if in the large/maximum coalescing
windows,k it could be a while before getting back to servicing.
The issues are corrected by the following:
- A change in defaults. Auto_imax is turned OFF and fcp_imax is set
to 0. Thus all interrupts are immediate.
- Cleanup of field names and their meanings. Existing names were
non-intuitive or used for duplicate things.
- Added max_proc_limit field, to control the length of time the
handlers would service completions.
- Reworked EQ handling:
Added common routine that walks eq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after eqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Moved lpfc_sli4_eq_flush(), which does similar action, to same area.
Replaced the 2 individual loops that walk an eq with a call to the
common routine.
Slightly revised lpfc_sli4_hba_handle_eqe() calling syntax.
Added per-cpu counters to detect interrupt rates and scale
interrupt coalescing values.
- Reworked CQ handling:
Added common routine that walks cq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after cqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Replaced the 3 individual loops that walk a cq with a call to the
common routine.
Redefined lpfc_sli4_sp_handle_mcqe() to commong handler definition with
queue reference. Add increment for mbox completion to handler.
- Added a new module/sysfs attribute: lpfc_cq_max_proc_limit To allow
dynamic changing of the CQ max_proc_limit value being used.
Although this leaves an EQ as an immediate interrupt, that interrupt will
only occur if a CQ bound to it is in an armed state and has cqe's to
process. By staying in the cq processing routine longer, high loads will
avoid generating more interrupts as they will only rearm as the processing
thread exits. The immediately interrupt is also beneficial to idle or
lower-processing CQ's as they get serviced immediately without being
penalized by sharing an EQ with a more loaded CQ.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:33 -08:00
uint32_t cfg_cq_poll_threshold ;
uint32_t cfg_cq_max_proc_limit ;
2013-04-17 20:19:16 -04:00
uint32_t cfg_fcp_cpu_map ;
scsi: lpfc: Mitigate high memory pre-allocation by SCSI-MQ
When SCSI-MQ is enabled, the SCSI-MQ layers will do pre-allocation of MQ
resources based on shost values set by the driver. In newer cases of the
driver, which attempts to set nr_hw_queues to the cpu count, the
multipliers become excessive, with a single shost having SCSI-MQ
pre-allocation reaching into the multiple GBytes range. NPIV, which
creates additional shosts, only multiply this overhead. On lower-memory
systems, this can exhaust system memory very quickly, resulting in a system
crash or failures in the driver or elsewhere due to low memory conditions.
After testing several scenarios, the situation can be mitigated by limiting
the value set in shost->nr_hw_queues to 4. Although the shost values were
changed, the driver still had per-cpu hardware queues of its own that
allowed parallelization per-cpu. Testing revealed that even with the
smallish number for nr_hw_queues for SCSI-MQ, performance levels remained
near maximum with the within-driver affiinitization.
A module parameter was created to allow the value set for the nr_hw_queues
to be tunable.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-15 19:36:49 -07:00
uint32_t cfg_fcp_mq_threshold ;
scsi: lpfc: Replace io_channels for nvme and fcp with general hdw_queues per cpu
Currently, both nvme and fcp each have their own concept of an io_channel,
which is a combination wq/cq and associated msix. Different cpus would
share an io_channel.
The driver is now moving to per-cpu wq/cq pairs and msix vectors. The
driver will still use separate wq/cq pairs per protocol on each cpu, but
the protocols will share the msix vector.
Given the elimination of the nvme and fcp io channels, the module
parameters will be removed. A new parameter, lpfc_hdw_queue is added which
allows the wq/cq pair allocation per cpu to be overridden and allocated to
lesser value. If lpfc_hdw_queue is zero, the number of pairs allocated will
be based on the number of cpus. If non-zero, the parameter specifies the
number of queues to allocate. At this time, the maximum non-zero value is
64.
To manage this new paradigm, a new hardware queue structure is created to
track queue activity and relationships.
As MSIX vector allocation must be known before setting up the
relationships, msix allocation now occurs before queue datastructures are
allocated. If the number of vectors allocated is less than the desired
hardware queues, the hardware queue counts will be reduced to the number of
vectors
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:21 -08:00
uint32_t cfg_hdw_queue ;
2019-01-28 11:14:31 -08:00
uint32_t cfg_irq_chann ;
2017-02-12 13:52:34 -08:00
uint32_t cfg_suppress_rsp ;
2017-02-12 13:52:30 -08:00
uint32_t cfg_nvme_oas ;
2018-02-22 08:18:50 -08:00
uint32_t cfg_nvme_embed_cmd ;
2018-04-09 14:24:24 -07:00
uint32_t cfg_nvmet_mrq_post ;
2017-02-12 13:52:35 -08:00
uint32_t cfg_nvmet_mrq ;
2017-02-12 13:52:34 -08:00
uint32_t cfg_enable_nvmet ;
2017-02-12 13:52:30 -08:00
uint32_t cfg_nvme_enable_fb ;
2017-02-12 13:52:35 -08:00
uint32_t cfg_nvmet_fb_size ;
2013-04-17 20:16:15 -04:00
uint32_t cfg_total_seg_cnt ;
2005-04-17 16:05:31 -05:00
uint32_t cfg_sg_seg_cnt ;
2017-04-21 16:05:01 -07:00
uint32_t cfg_nvme_seg_cnt ;
2018-09-10 10:30:42 -07:00
uint32_t cfg_scsi_seg_cnt ;
2005-04-17 16:05:31 -05:00
uint32_t cfg_sg_dma_buf_size ;
2007-08-02 11:09:59 -04:00
uint32_t cfg_hba_queue_depth ;
2008-01-11 01:52:48 -05:00
uint32_t cfg_enable_hba_reset ;
uint32_t cfg_enable_hba_heartbeat ;
2014-02-20 09:56:45 -05:00
uint32_t cfg_fof ;
uint32_t cfg_EnableXLane ;
uint8_t cfg_oas_tgt_wwpn [ 8 ] ;
uint8_t cfg_oas_vpt_wwpn [ 8 ] ;
uint32_t cfg_oas_lun_state ;
# define OAS_LUN_ENABLE 1
# define OAS_LUN_DISABLE 0
uint32_t cfg_oas_lun_status ;
# define OAS_LUN_STATUS_EXISTS 0x01
uint32_t cfg_oas_flags ;
# define OAS_FIND_ANY_VPORT 0x01
# define OAS_FIND_ANY_TARGET 0x02
# define OAS_LUN_VALID 0x04
2016-07-06 12:36:05 -07:00
uint32_t cfg_oas_priority ;
2014-02-20 09:56:45 -05:00
uint32_t cfg_XLanePriority ;
2008-12-04 22:39:46 -05:00
uint32_t cfg_enable_bg ;
2016-10-13 15:06:06 -07:00
uint32_t cfg_prot_mask ;
uint32_t cfg_prot_guard ;
2010-03-15 11:25:20 -04:00
uint32_t cfg_hostmem_hgp ;
2009-05-22 14:51:39 -04:00
uint32_t cfg_log_verbose ;
2019-01-28 11:14:38 -08:00
uint32_t cfg_enable_fc4_type ;
2022-02-07 10:05:16 -08:00
# define LPFC_ENABLE_FCP 1
# define LPFC_ENABLE_NVME 2
# define LPFC_ENABLE_BOTH 3
# if (IS_ENABLED(CONFIG_NVME_FC))
# define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_BOTH
# define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_BOTH
# else
# define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_FCP
# define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_FCP
# endif
2009-10-02 15:16:56 -04:00
uint32_t cfg_aer_support ;
2011-05-24 11:42:11 -04:00
uint32_t cfg_sriov_nr_virtfn ;
2012-10-31 14:44:33 -04:00
uint32_t cfg_request_firmware_upgrade ;
2010-02-12 14:42:33 -05:00
uint32_t cfg_suppress_link_up ;
2013-12-17 20:29:47 -05:00
uint32_t cfg_rrq_xri_bitmap_sz ;
2021-05-14 12:55:57 -07:00
u32 cfg_fcp_wait_abts_rsp ;
2016-07-06 12:36:08 -07:00
uint32_t cfg_delay_discovery ;
2016-07-06 12:36:09 -07:00
uint32_t cfg_sli_mode ;
2010-02-26 14:13:54 -05:00
# define LPFC_INITIALIZE_LINK 0 /* do normal init_link mbox */
# define LPFC_DELAY_INIT_LINK 1 /* layered driver hold off */
# define LPFC_DELAY_INIT_LINK_INDEFINITELY 2 /* wait, manual intervention */
2015-12-16 18:11:58 -05:00
uint32_t cfg_fdmi_on ;
# define LPFC_FDMI_NO_SUPPORT 0 /* FDMI not supported */
# define LPFC_FDMI_SUPPORT 1 /* FDMI supported? */
uint32_t cfg_enable_SmartSAN ;
2016-07-06 12:36:00 -07:00
uint32_t cfg_enable_mds_diags ;
2018-09-10 10:30:50 -07:00
uint32_t cfg_ras_fwlog_level ;
uint32_t cfg_ras_fwlog_buffsize ;
uint32_t cfg_ras_fwlog_func ;
2018-02-22 08:18:43 -08:00
uint32_t cfg_enable_bbcr ; /* Enable BB Credit Recovery */
uint32_t cfg_enable_dpp ; /* Enable Direct Packet Push */
2018-06-26 08:24:26 -07:00
uint32_t cfg_enable_pbde ;
2020-10-20 13:27:17 -07:00
uint32_t cfg_enable_mi ;
2017-02-12 13:52:34 -08:00
struct nvmet_fc_target_port * targetport ;
2005-04-17 16:05:31 -05:00
lpfc_vpd_t vpd ; /* vital product data */
2021-06-08 10:05:47 +05:30
u32 cfg_max_vmid ; /* maximum VMIDs allowed per port */
u32 cfg_vmid_app_header ;
# define LPFC_VMID_APP_HEADER_DISABLE 0
# define LPFC_VMID_APP_HEADER_ENABLE 1
u32 cfg_vmid_priority_tagging ;
u32 cfg_vmid_inactivity_timeout ; /* Time after which the VMID */
/* deregisters from switch */
2005-04-17 16:05:31 -05:00
struct pci_dev * pcidev ;
struct list_head work_list ;
uint32_t work_ha ; /* Host Attention Bits for WT */
uint32_t work_ha_mask ; /* HA Bits owned by WT */
uint32_t work_hs ; /* HS stored in case of ERRAT */
uint32_t work_status [ 2 ] ; /* Extra status from SLIM */
2008-06-14 22:52:53 -04:00
wait_queue_head_t work_waitq ;
2005-04-17 16:05:31 -05:00
struct task_struct * worker_thread ;
2008-08-24 21:50:00 -04:00
unsigned long data_flags ;
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.
Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI
to contain the exchanges Scatter/Gather List. This caps the number of SGL
elements that can be in the SGL. There are not extensions to extend the
list out of the 2 pages.
The G7 hardware adds a SGE type that allows the SGL to be vectored to a
different scatter/gather list segment. And that segment can contain a SGE
to go to another segment and so on. The initial segment must still be
pre-registered for the XRI, but it can be a much smaller amount (256Bytes)
as it can now be dynamically grown. This much smaller allocation can
handle the SG list for most normal I/O, and the dynamic aspect allows it to
support many MB's if needed.
The implementation creates a pool which contains "segments" and which is
initially sized to hold the initial small segment per xri. If an I/O
requires additional segments, they are allocated from the pool. If the
pool has no more segments, the pool is grown based on what is now
needed. After the I/O completes, the additional segments are returned to
the pool for use by other I/Os. Once allocated, the additional segments are
not released under the assumption of "if needed once, it will be needed
again". Pools are kept on a per-hardware queue basis, which is typically
1:1 per cpu, but may be shared by multiple cpus.
The switch to the smaller initial allocation significantly reduces the
memory footprint of the driver (which only grows if large ios are
issued). Based on the several K of XRIs for the adapter, the 8KB->256B
reduction can conserve 32MBs or more.
It has been observed with per-cpu resource pools that allocating a resource
on CPU A, may be put back on CPU B. While the get routines are distributed
evenly, only a limited subset of CPUs may be handling the put routines.
This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because
all the resources are being put on a limited subset of CPUs.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 16:57:09 -07:00
uint32_t border_sge_num ;
2005-04-17 16:05:31 -05:00
2008-02-08 18:50:25 -05:00
uint32_t hbq_in_use ; /* HBQs in use flag */
2007-06-17 19:56:37 -05:00
uint32_t hbq_count ; /* Count of configured HBQs */
2007-06-17 19:56:38 -05:00
struct hbq_s hbqs [ LPFC_MAX_HBQS ] ; /* local copy of hbq indicies */
2007-06-17 19:56:37 -05:00
2017-02-12 13:52:30 -08:00
atomic_t fcp_qidx ; /* next FCP WQ (RR Policy) */
atomic_t nvme_qidx ; /* next NVME WQ (RR Policy) */
2009-07-19 10:01:03 -04:00
2016-07-06 12:36:11 -07:00
phys_addr_t pci_bar0_map ; /* Physical address for PCI BAR0 */
phys_addr_t pci_bar1_map ; /* Physical address for PCI BAR1 */
phys_addr_t pci_bar2_map ; /* Physical address for PCI BAR2 */
2005-04-17 16:05:31 -05:00
void __iomem * slim_memmap_p ; /* Kernel memory mapped address for
PCI BAR0 */
void __iomem * ctrl_regs_memmap_p ; /* Kernel memory mapped address for
PCI BAR2 */
2013-01-03 15:44:00 -05:00
void __iomem * pci_bar0_memmap_p ; /* Kernel memory mapped address for
PCI BAR0 with dual - ULP support */
void __iomem * pci_bar2_memmap_p ; /* Kernel memory mapped address for
PCI BAR2 with dual - ULP support */
void __iomem * pci_bar4_memmap_p ; /* Kernel memory mapped address for
PCI BAR4 with dual - ULP support */
# define PCI_64BIT_BAR0 0
# define PCI_64BIT_BAR2 2
# define PCI_64BIT_BAR4 4
2005-04-17 16:05:31 -05:00
void __iomem * MBslimaddr ; /* virtual address for mbox cmds */
void __iomem * HAregaddr ; /* virtual address for host attn reg */
void __iomem * CAregaddr ; /* virtual address for chip attn reg */
void __iomem * HSregaddr ; /* virtual address for host status
reg */
void __iomem * HCregaddr ; /* virtual address for host ctl reg */
2007-06-17 19:56:37 -05:00
struct lpfc_hgp __iomem * host_gp ; /* Host side get/put pointers */
2008-08-24 21:49:55 -04:00
struct lpfc_pgp * port_gp ;
2007-06-17 19:56:37 -05:00
uint32_t __iomem * hbq_put ; /* Address in SLIM to HBQ put ptrs */
2007-06-17 19:56:38 -05:00
uint32_t * hbq_get ; /* Host mem address of HBQ get ptrs */
2007-06-17 19:56:37 -05:00
2005-04-17 16:05:31 -05:00
int brd_no ; /* FC board number */
char SerialNumber [ 32 ] ; /* adapter Serial Number */
char OptionROMVersion [ 32 ] ; /* adapter BIOS / Fcode version */
2019-03-12 16:30:23 -07:00
char BIOSVersion [ 16 ] ; /* Boot BIOS version */
2005-04-17 16:05:31 -05:00
char ModelDesc [ 256 ] ; /* Model Description */
char ModelName [ 80 ] ; /* Model Name */
char ProgramType [ 256 ] ; /* Program Type */
char Port [ 20 ] ; /* Port No */
uint8_t vpd_flag ; /* VPD data flag */
# define VPD_MODEL_DESC 0x1 /* valid vpd model description */
# define VPD_MODEL_NAME 0x2 /* valid vpd model name */
# define VPD_PROGRAM_TYPE 0x4 /* valid vpd program type */
# define VPD_PORT 0x8 /* valid vpd port data */
# define VPD_MASK 0xf /* mask for any vpd data */
2016-12-30 06:57:47 -08:00
[SCSI] lpfc 8.1.1 : Add polled-mode support
- Add functionality to run in polled mode only. Includes run time
attribute to enable mode.
- Enable runtime writable hba settings for coallescing and delay parameters
Customers have requested a mode in the driver to run strictly polled.
This is generally to support an environment where the server is extremely
loaded and is looking to reclaim some cpu cycles from adapter interrupt
handling.
This patch adds a new "poll" attribute, and the following behavior:
if value is 0 (default):
The driver uses the normal method for i/o completion. It uses the
firmware feature of interrupt coalesing. The firmware allows a
minimum number of i/o completions before an interrupt, or a maximum
time delay between interrupts. By default, the driver sets these
to no delay (disabled) or 1 i/o - meaning coalescing is disabled.
Attributes were provided to change the coalescing values, but it was
a module-load time only and global across all adapters.
This patch allows them to be writable on a per-adapter basis.
if value is 1 :
Interrupts are left enabled, expecting that the user has tuned the
interrupt coalescing values. When this setting is enabled, the driver
will attempt to service completed i/o whenever new i/o is submitted
to the adapter. If the coalescing values are large, and the i/o
generation rate steady, an interrupt will be avoided by servicing
completed i/o prior to the coalescing thresholds kicking in. However,
if the i/o completion load is high enough or i/o generation slow, the
coalescion values will ensure that completed i/o is serviced in a timely
fashion.
if value is 3 :
Turns off FCP i/o interrupts altogether. The coalescing values now have
no effect. A new attribute "poll_tmo" (default 10ms) exists to set
the polling interval for i/o completion. When this setting is enabled,
the driver will attempt to service completed i/o and restart the
interval timer whenever new i/o is submitted. This behavior allows for
servicing of completed i/o sooner than the interval timer, but ensures
that if no i/o is being issued, then the interval timer will kick in
to service the outstanding i/o.
Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
2005-11-29 16:32:13 -05:00
struct timer_list fcp_poll_timer ;
2008-08-24 21:50:30 -04:00
struct timer_list eratt_poll ;
2016-07-06 12:35:56 -07:00
uint32_t eratt_poll_interval ;
[SCSI] lpfc 8.1.1 : Add polled-mode support
- Add functionality to run in polled mode only. Includes run time
attribute to enable mode.
- Enable runtime writable hba settings for coallescing and delay parameters
Customers have requested a mode in the driver to run strictly polled.
This is generally to support an environment where the server is extremely
loaded and is looking to reclaim some cpu cycles from adapter interrupt
handling.
This patch adds a new "poll" attribute, and the following behavior:
if value is 0 (default):
The driver uses the normal method for i/o completion. It uses the
firmware feature of interrupt coalesing. The firmware allows a
minimum number of i/o completions before an interrupt, or a maximum
time delay between interrupts. By default, the driver sets these
to no delay (disabled) or 1 i/o - meaning coalescing is disabled.
Attributes were provided to change the coalescing values, but it was
a module-load time only and global across all adapters.
This patch allows them to be writable on a per-adapter basis.
if value is 1 :
Interrupts are left enabled, expecting that the user has tuned the
interrupt coalescing values. When this setting is enabled, the driver
will attempt to service completed i/o whenever new i/o is submitted
to the adapter. If the coalescing values are large, and the i/o
generation rate steady, an interrupt will be avoided by servicing
completed i/o prior to the coalescing thresholds kicking in. However,
if the i/o completion load is high enough or i/o generation slow, the
coalescion values will ensure that completed i/o is serviced in a timely
fashion.
if value is 3 :
Turns off FCP i/o interrupts altogether. The coalescing values now have
no effect. A new attribute "poll_tmo" (default 10ms) exists to set
the polling interval for i/o completion. When this setting is enabled,
the driver will attempt to service completed i/o and restart the
interval timer whenever new i/o is submitted. This behavior allows for
servicing of completed i/o sooner than the interval timer, but ensures
that if no i/o is being issued, then the interval timer will kick in
to service the outstanding i/o.
Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
2005-11-29 16:32:13 -05:00
2008-12-04 22:39:46 -05:00
uint64_t bg_guard_err_cnt ;
uint64_t bg_apptag_err_cnt ;
uint64_t bg_reftag_err_cnt ;
2005-04-17 16:05:31 -05:00
/* fastpath list. */
2013-04-17 20:17:40 -04:00
spinlock_t scsi_buf_list_get_lock ; /* SCSI buf alloc list lock */
spinlock_t scsi_buf_list_put_lock ; /* SCSI buf free list lock */
struct list_head lpfc_scsi_buf_list_get ;
struct list_head lpfc_scsi_buf_list_put ;
2005-04-17 16:05:31 -05:00
uint32_t total_scsi_bufs ;
struct list_head lpfc_iocb_list ;
uint32_t total_iocbq_bufs ;
2010-11-20 23:11:55 -05:00
struct list_head active_rrq_list ;
2007-06-17 19:56:36 -05:00
spinlock_t hbalock ;
2021-08-16 09:28:54 -07:00
struct work_struct unblock_request_work ; /* SCSI layer unblock IOs */
2005-04-17 16:05:31 -05:00
2017-07-06 10:13:05 +02:00
/* dma_mem_pools */
struct dma_pool * lpfc_sg_dma_buf_pool ;
struct dma_pool * lpfc_mbuf_pool ;
struct dma_pool * lpfc_hrb_pool ; /* header receive buffer pool */
struct dma_pool * lpfc_drb_pool ; /* data receive buffer pool */
struct dma_pool * lpfc_nvmet_drb_pool ; /* data receive buffer pool */
struct dma_pool * lpfc_hbq_pool ; /* SLI3 hbq buffer pool */
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.
Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI
to contain the exchanges Scatter/Gather List. This caps the number of SGL
elements that can be in the SGL. There are not extensions to extend the
list out of the 2 pages.
The G7 hardware adds a SGE type that allows the SGL to be vectored to a
different scatter/gather list segment. And that segment can contain a SGE
to go to another segment and so on. The initial segment must still be
pre-registered for the XRI, but it can be a much smaller amount (256Bytes)
as it can now be dynamically grown. This much smaller allocation can
handle the SG list for most normal I/O, and the dynamic aspect allows it to
support many MB's if needed.
The implementation creates a pool which contains "segments" and which is
initially sized to hold the initial small segment per xri. If an I/O
requires additional segments, they are allocated from the pool. If the
pool has no more segments, the pool is grown based on what is now
needed. After the I/O completes, the additional segments are returned to
the pool for use by other I/Os. Once allocated, the additional segments are
not released under the assumption of "if needed once, it will be needed
again". Pools are kept on a per-hardware queue basis, which is typically
1:1 per cpu, but may be shared by multiple cpus.
The switch to the smaller initial allocation significantly reduces the
memory footprint of the driver (which only grows if large ios are
issued). Based on the several K of XRIs for the adapter, the 8KB->256B
reduction can conserve 32MBs or more.
It has been observed with per-cpu resource pools that allocating a resource
on CPU A, may be put back on CPU B. While the get routines are distributed
evenly, only a limited subset of CPUs may be handling the put routines.
This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because
all the resources are being put on a limited subset of CPUs.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 16:57:09 -07:00
struct dma_pool * lpfc_cmd_rsp_buf_pool ;
2005-04-17 16:05:31 -05:00
struct lpfc_dma_pool lpfc_mbuf_safety_pool ;
mempool_t * mbox_mem_pool ;
mempool_t * nlp_mem_pool ;
2010-11-20 23:11:55 -05:00
mempool_t * rrq_pool ;
2013-12-17 20:29:47 -05:00
mempool_t * active_rrq_pool ;
2005-08-10 15:03:01 -04:00
struct fc_host_statistics link_stats ;
2020-05-01 14:43:06 -07:00
enum lpfc_irq_chann_mode irq_chann_mode ;
2008-02-08 18:49:51 -05:00
enum intr_type_t intr_type ;
2008-12-04 22:39:35 -05:00
uint32_t intr_mode ;
# define LPFC_INTR_ERROR 0xFFFFFFFF
2007-06-17 19:56:36 -05:00
struct list_head port_list ;
2018-09-10 10:30:46 -07:00
spinlock_t port_list_lock ; /* lock for port_list mutations */
2007-08-02 11:09:51 -04:00
struct lpfc_vport * pport ; /* physical lpfc_vport pointer */
uint16_t max_vpi ; /* Maximum virtual nports */
2018-11-29 16:09:33 -08:00
# define LPFC_MAX_VPI 0xFF /* Max number VPI supported 0 - 0xff */
# define LPFC_MAX_VPORTS 0x100 /* Max vports per port, with pport */
2009-05-22 14:51:39 -04:00
uint16_t max_vports ; /*
* For IOV HBAs max_vpi can change
* after a reset . max_vports is max
* number of vports present . This can
* be greater than max_vpi .
*/
uint16_t vpi_base ;
uint16_t vfi_base ;
2007-08-02 11:09:51 -04:00
unsigned long * vpi_bmask ; /* vpi allocation table */
2011-05-24 11:44:12 -04:00
uint16_t * vpi_ids ;
uint16_t vpi_count ;
struct list_head lpfc_vpi_blk_list ;
2007-06-17 19:56:38 -05:00
/* Data structure used by fabric iocb scheduler */
struct list_head fabric_iocb_list ;
atomic_t fabric_iocb_count ;
struct timer_list fabric_block_timer ;
unsigned long bit_flags ;
# define FABRIC_COMANDS_BLOCKED 0
atomic_t num_rsrc_err ;
atomic_t num_cmd_success ;
unsigned long last_rsrc_error_time ;
unsigned long last_ramp_down_time ;
2008-12-04 22:40:07 -05:00
# ifdef CONFIG_SCSI_LPFC_DEBUG_FS
2007-06-17 19:56:39 -05:00
struct dentry * hba_debugfs_root ;
atomic_t debugfs_vport_count ;
scsi: lpfc: Adapt partitioned XRI lists to efficient sharing
The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.
Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool. The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.
On io completion, a cpu will push the XRI back on to its private pool. A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.
On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.
Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:28 -08:00
struct dentry * debug_multixri_pools ;
2007-08-02 11:10:21 -04:00
struct dentry * debug_hbqinfo ;
2008-01-11 01:53:23 -05:00
struct dentry * debug_dumpHostSlim ;
struct dentry * debug_dumpHBASlim ;
2011-10-10 21:34:11 -04:00
struct dentry * debug_InjErrLBA ; /* LBA to inject errors at */
2012-03-01 22:38:29 -05:00
struct dentry * debug_InjErrNPortID ; /* NPortID to inject errors at */
struct dentry * debug_InjErrWWPN ; /* WWPN to inject errors at */
2011-10-10 21:34:11 -04:00
struct dentry * debug_writeGuard ; /* inject write guard_tag errors */
struct dentry * debug_writeApp ; /* inject write app_tag errors */
struct dentry * debug_writeRef ; /* inject write ref_tag errors */
2012-01-18 16:25:09 -05:00
struct dentry * debug_readGuard ; /* inject read guard_tag errors */
2011-10-10 21:34:11 -04:00
struct dentry * debug_readApp ; /* inject read app_tag errors */
struct dentry * debug_readRef ; /* inject read ref_tag errors */
2017-02-12 13:52:33 -08:00
struct dentry * debug_nvmeio_trc ;
struct lpfc_debugfs_nvmeio_trc * nvmeio_trc ;
2019-01-28 11:14:22 -08:00
struct dentry * debug_hdwqinfo ;
2019-01-28 11:14:31 -08:00
# ifdef LPFC_HDWQ_LOCK_STAT
struct dentry * debug_lockstat ;
# endif
2021-08-16 09:28:57 -07:00
struct dentry * debug_cgn_buffer ;
struct dentry * debug_rx_monitor ;
2019-10-18 14:18:27 -07:00
struct dentry * debug_ras_log ;
2017-02-12 13:52:33 -08:00
atomic_t nvmeio_trc_cnt ;
uint32_t nvmeio_trc_size ;
uint32_t nvmeio_trc_output_idx ;
2011-10-10 21:34:11 -04:00
/* T10 DIF error injection */
uint32_t lpfc_injerr_wgrd_cnt ;
uint32_t lpfc_injerr_wapp_cnt ;
uint32_t lpfc_injerr_wref_cnt ;
2012-01-18 16:25:09 -05:00
uint32_t lpfc_injerr_rgrd_cnt ;
2011-10-10 21:34:11 -04:00
uint32_t lpfc_injerr_rapp_cnt ;
uint32_t lpfc_injerr_rref_cnt ;
2012-03-01 22:38:29 -05:00
uint32_t lpfc_injerr_nportid ;
struct lpfc_name lpfc_injerr_wwpn ;
2011-10-10 21:34:11 -04:00
sector_t lpfc_injerr_lba ;
2012-01-18 16:25:09 -05:00
# define LPFC_INJERR_LBA_OFF (sector_t)(-1)
2011-10-10 21:34:11 -04:00
2007-08-02 11:09:43 -04:00
struct dentry * debug_slow_ring_trc ;
struct lpfc_debugfs_trc * slow_ring_trc ;
atomic_t slow_ring_trc_cnt ;
2011-02-16 12:40:06 -05:00
/* iDiag debugfs sub-directory */
struct dentry * idiag_root ;
struct dentry * idiag_pci_cfg ;
2011-07-22 18:37:42 -04:00
struct dentry * idiag_bar_acc ;
2011-02-16 12:40:06 -05:00
struct dentry * idiag_que_info ;
2011-04-16 11:03:04 -04:00
struct dentry * idiag_que_acc ;
struct dentry * idiag_drb_acc ;
2011-07-22 18:37:42 -04:00
struct dentry * idiag_ctl_acc ;
struct dentry * idiag_mbx_acc ;
struct dentry * idiag_ext_acc ;
2017-02-12 13:52:28 -08:00
uint8_t lpfc_idiag_last_eq ;
2007-06-17 19:56:39 -05:00
# endif
2017-02-12 13:52:33 -08:00
uint16_t nvmeio_trc_on ;
2007-06-17 19:56:39 -05:00
2008-01-11 01:52:36 -05:00
/* Used for deferred freeing of ELS data buffers */
struct list_head elsbuf ;
int elsbuf_cnt ;
int elsbuf_prev_cnt ;
2007-10-27 13:37:05 -04:00
uint8_t temp_sensor_support ;
2007-06-17 19:56:39 -05:00
/* Fields used for heart beat. */
unsigned long last_completion_time ;
2010-08-04 16:11:18 -04:00
unsigned long skipped_hb ;
2007-06-17 19:56:39 -05:00
struct timer_list hb_tmofunc ;
2010-11-20 23:11:55 -05:00
struct timer_list rrq_tmr ;
2008-08-24 21:50:06 -04:00
enum hba_temp_state over_temp_state ;
2007-10-27 13:38:00 -04:00
/*
* Following bit will be set for all buffer tags which are not
* associated with any HBQ .
*/
# define QUE_BUFTAG_BIT (1<<31)
uint32_t buffer_tag_count ;
2008-08-24 21:50:06 -04:00
int wait_4_mlo_maint_flg ;
wait_queue_head_t wait_4_mlo_m_q ;
2008-09-07 11:52:10 -04:00
/* data structure used for latency data collection */
# define LPFC_NO_BUCKET 0
# define LPFC_LINEAR_BUCKET 1
# define LPFC_POWER2_BUCKET 2
uint8_t bucket_type ;
uint32_t bucket_base ;
uint32_t bucket_step ;
/* Maximum number of events that can be outstanding at any time*/
# define LPFC_MAX_EVT_COUNT 512
atomic_t fast_event_count ;
2009-07-19 10:01:21 -04:00
uint32_t fcoe_eventtag ;
uint32_t fcoe_eventtag_at_fcf_scan ;
2012-03-01 22:35:45 -05:00
uint32_t fcoe_cvl_eventtag ;
uint32_t fcoe_cvl_eventtag_attn ;
2009-05-22 14:51:39 -04:00
struct lpfc_fcf fcf ;
uint8_t fc_map [ 3 ] ;
uint8_t valid_vlan ;
uint16_t vlan_id ;
struct list_head fcf_conn_rec_list ;
2009-07-19 10:01:32 -04:00
scsi: lpfc: Defer LS_ACC to FLOGI on point to point logins
The current discovery state machine the driver treated FLOGI oddly. When
point to point, an FLOGI is to be exchanged by the two ports, with the port
with the most significant WWN then proceeding with PLOGI. The
implementation in the driver was keyed to closely with "what have I sent",
not with what has happened between the two endpoints. Thus, it blatantly
would ACC an FLOGI, but reject PLOGI's until it had its FLOGI ACC'd. The
problem is - the sending of FLOGI may be delayed for some reason, or the
response to FLOGI held off by the other side. In the failing situation the
other side sent an FLOGI, which was ACC'd, then sent PLOGIs which were then
rjt'd until the retry count for the PLOGIs were exceeded and the port gave
up. The FLOGI may have been very late in transmit, or the response held off
until the PLOGIs failed. Given the other port had the higher WWN, no PLOGIs
would occur and communication stopped.
Correct the situation by changing the FLOGI handling. Defer any response to
an FLOGI until the driver has sent its FLOGI as well. Then, upon either
completion of the sent FLOGI, or upon sending an ACC to a received FLOGI
(which may be received before or just after FLOGI was sent). the driver
will act on who has the higher WWN. if the other port does, the driver will
noop any handling of an FLOGI response (if outstanding) and wait for PLOGI.
If the local port does, the driver will transition to sending PLOGI and
will noop any action on responding to an FLOGI (if not yet received).
Fortunately, to implement this, it only took another state flag and
deferring any FLOGI response if the FLOGI has yet to be transmit. All
subsequent actions were already in place.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2018-11-29 16:09:36 -08:00
bool defer_flogi_acc_flag ;
uint16_t defer_flogi_acc_rx_id ;
uint16_t defer_flogi_acc_ox_id ;
2010-01-26 23:08:55 -05:00
spinlock_t ct_ev_lock ; /* synchronize access to ct_ev_waiters */
2009-07-19 10:01:32 -04:00
struct list_head ct_ev_waiters ;
2013-01-03 15:43:37 -05:00
struct unsol_rcv_ct_ctx ct_ctx [ LPFC_CT_CTX_MAX ] ;
2009-07-19 10:01:32 -04:00
uint32_t ctx_idx ;
2021-06-08 10:05:47 +05:30
struct timer_list inactive_vmid_poll ;
2010-02-26 14:15:00 -05:00
2018-09-10 10:30:50 -07:00
/* RAS Support */
struct lpfc_ras_fwlog ras_fwlog ;
2010-02-26 14:15:00 -05:00
uint8_t menlo_flag ; /* menlo generic flags */
# define HBA_MENLO_SUPPORT 0x1 /* HBA supports menlo commands */
2010-06-07 15:24:45 -04:00
uint32_t iocb_cnt ;
uint32_t iocb_max ;
2010-06-08 18:31:54 -04:00
atomic_t sdev_cnt ;
2014-02-20 09:56:45 -05:00
spinlock_t devicelock ; /* lock for luns list */
mempool_t * device_data_mem_pool ;
struct list_head luns ;
2016-07-06 12:35:54 -07:00
# define LPFC_TRANSGRESSION_HIGH_TEMPERATURE 0x0080
# define LPFC_TRANSGRESSION_LOW_TEMPERATURE 0x0040
# define LPFC_TRANSGRESSION_HIGH_VOLTAGE 0x0020
# define LPFC_TRANSGRESSION_LOW_VOLTAGE 0x0010
# define LPFC_TRANSGRESSION_HIGH_TXBIAS 0x0008
# define LPFC_TRANSGRESSION_LOW_TXBIAS 0x0004
# define LPFC_TRANSGRESSION_HIGH_TXPOWER 0x0002
# define LPFC_TRANSGRESSION_LOW_TXPOWER 0x0001
# define LPFC_TRANSGRESSION_HIGH_RXPOWER 0x8000
# define LPFC_TRANSGRESSION_LOW_RXPOWER 0x4000
uint16_t sfp_alarm ;
uint16_t sfp_warning ;
2017-02-12 13:52:33 -08:00
# ifdef CONFIG_SCSI_LPFC_DEBUG_FS
2020-03-22 11:13:00 -07:00
uint16_t hdwqstat_on ;
2017-02-12 13:52:33 -08:00
# define LPFC_CHECK_OFF 0
# define LPFC_CHECK_NVME_IO 1
2020-03-22 11:13:00 -07:00
# define LPFC_CHECK_NVMET_IO 2
# define LPFC_CHECK_SCSI_IO 4
2017-02-12 13:52:33 -08:00
uint16_t ktime_on ;
uint64_t ktime_data_samples ;
uint64_t ktime_status_samples ;
uint64_t ktime_last_cmd ;
uint64_t ktime_seg1_total ;
uint64_t ktime_seg1_min ;
uint64_t ktime_seg1_max ;
uint64_t ktime_seg2_total ;
uint64_t ktime_seg2_min ;
uint64_t ktime_seg2_max ;
uint64_t ktime_seg3_total ;
uint64_t ktime_seg3_min ;
uint64_t ktime_seg3_max ;
uint64_t ktime_seg4_total ;
uint64_t ktime_seg4_min ;
uint64_t ktime_seg4_max ;
uint64_t ktime_seg5_total ;
uint64_t ktime_seg5_min ;
uint64_t ktime_seg5_max ;
uint64_t ktime_seg6_total ;
uint64_t ktime_seg6_min ;
uint64_t ktime_seg6_max ;
uint64_t ktime_seg7_total ;
uint64_t ktime_seg7_min ;
uint64_t ktime_seg7_max ;
uint64_t ktime_seg8_total ;
uint64_t ktime_seg8_min ;
uint64_t ktime_seg8_max ;
uint64_t ktime_seg9_total ;
uint64_t ktime_seg9_min ;
uint64_t ktime_seg9_max ;
uint64_t ktime_seg10_total ;
uint64_t ktime_seg10_min ;
uint64_t ktime_seg10_max ;
# endif
2021-08-16 09:28:50 -07:00
/* CMF objects */
2021-08-16 09:28:54 -07:00
struct lpfc_cgn_stat __percpu * cmf_stat ;
uint32_t cmf_interval_rate ; /* timer interval limit in ms */
uint32_t cmf_timer_cnt ;
2021-08-16 09:28:53 -07:00
# define LPFC_CMF_INTERVAL 90
2021-08-16 09:28:54 -07:00
uint64_t cmf_link_byte_count ;
uint64_t cmf_max_line_rate ;
uint64_t cmf_max_bytes_per_interval ;
uint64_t cmf_last_sync_bw ;
2021-08-16 09:28:53 -07:00
# define LPFC_CMF_BLK_SIZE 512
2021-08-16 09:28:54 -07:00
struct hrtimer cmf_timer ;
atomic_t cmf_bw_wait ;
atomic_t cmf_busy ;
atomic_t cmf_stop_io ; /* To block request and stop IO's */
uint32_t cmf_active_mode ;
uint32_t cmf_info_per_interval ;
2021-08-16 09:28:53 -07:00
# define LPFC_MAX_CMF_INFO 32
2021-08-16 09:28:54 -07:00
struct timespec64 cmf_latency ; /* Interval congestion timestamp */
uint32_t cmf_last_ts ; /* Interval congestion time (ms) */
uint32_t cmf_active_info ;
2021-08-16 09:28:53 -07:00
2021-08-16 09:28:50 -07:00
/* Signal / FPIN handling for Congestion Mgmt */
u8 cgn_reg_fpin ; /* Negotiated value from RDF */
u8 cgn_init_reg_fpin ; /* Initial value from READ_CONFIG */
# define LPFC_CGN_FPIN_NONE 0x0
# define LPFC_CGN_FPIN_WARN 0x1
# define LPFC_CGN_FPIN_ALARM 0x2
# define LPFC_CGN_FPIN_BOTH (LPFC_CGN_FPIN_WARN | LPFC_CGN_FPIN_ALARM)
u8 cgn_reg_signal ; /* Negotiated value from EDC */
u8 cgn_init_reg_signal ; /* Initial value from READ_CONFIG */
/* cgn_reg_signal and cgn_init_reg_signal use
* enum fc_edc_cg_signal_cap_types
*/
u16 cgn_fpin_frequency ;
# define LPFC_FPIN_INIT_FREQ 0xffff
u32 cgn_sig_freq ;
u32 cgn_acqe_cnt ;
2021-08-16 09:28:55 -07:00
/* RX monitor handling for CMF */
struct rxtable_entry * rxtable ; /* RX_monitor information */
atomic_t rxtable_idx_head ;
# define LPFC_RXMONITOR_TABLE_IN_USE (LPFC_MAX_RXMONITOR_ENTRY + 73)
atomic_t rxtable_idx_tail ;
atomic_t rx_max_read_cnt ; /* Maximum read bytes */
2021-08-16 09:28:54 -07:00
uint64_t rx_block_cnt ;
2021-08-16 09:28:52 -07:00
/* Congestion parameters from flash */
struct lpfc_cgn_param cgn_p ;
2021-08-16 09:28:50 -07:00
/* Statistics counter for ACQE cgn alarms and warnings */
struct lpfc_cgn_acqe_stat cgn_acqe_stat ;
/* Congestion buffer information */
2021-08-16 09:28:51 -07:00
struct lpfc_dmabuf * cgn_i ; /* Congestion Info buffer */
2021-08-16 09:28:50 -07:00
atomic_t cgn_fabric_warn_cnt ; /* Total warning cgn events for info */
atomic_t cgn_fabric_alarm_cnt ; /* Total alarm cgn events for info */
atomic_t cgn_sync_warn_cnt ; /* Total warning events for SYNC wqe */
atomic_t cgn_sync_alarm_cnt ; /* Total alarm events for SYNC wqe */
2021-08-16 09:28:51 -07:00
atomic_t cgn_driver_evt_cnt ; /* Total driver cgn events for fmw */
atomic_t cgn_latency_evt_cnt ;
struct timespec64 cgn_daily_ts ;
atomic64_t cgn_latency_evt ; /* Avg latency per minute */
unsigned long cgn_evt_timestamp ;
# define LPFC_CGN_TIMER_TO_MIN 60000 /* ms in a minute */
uint32_t cgn_evt_minute ;
# define LPFC_SEC_MIN 60
# define LPFC_MIN_HOUR 60
# define LPFC_HOUR_DAY 24
# define LPFC_MIN_DAY (LPFC_MIN_HOUR * LPFC_HOUR_DAY)
2019-11-04 16:57:05 -08:00
struct hlist_node cpuhp ; /* used for cpuhp per hba callback */
struct timer_list cpuhp_poll_timer ;
struct list_head poll_list ; /* slowpath eq polling list */
# define LPFC_POLL_HB 1 /* slowpath heartbeat */
# define LPFC_POLL_FASTPATH 0 /* called from fastpath */
# define LPFC_POLL_SLOWPATH 1 /* called from slowpath */
2019-12-18 15:58:02 -08:00
char os_host_name [ MAXHOSTNAMELEN ] ;
2020-03-22 11:12:56 -07:00
/* SCSI host template information - for physical port */
struct scsi_host_template port_template ;
/* SCSI host template information - for all vports */
struct scsi_host_template vport_template ;
2020-06-30 14:50:00 -07:00
atomic_t dbg_log_idx ;
atomic_t dbg_log_cnt ;
atomic_t dbg_log_dmping ;
struct dbg_log_ent dbg_log [ DBG_LOG_SZ ] ;
2005-04-17 16:05:31 -05:00
} ;
2021-08-16 09:28:55 -07:00
# define LPFC_MAX_RXMONITOR_ENTRY 800
2021-08-16 09:28:58 -07:00
# define LPFC_MAX_RXMONITOR_DUMP 32
2021-08-16 09:28:55 -07:00
struct rxtable_entry {
2021-12-03 16:26:41 -08:00
uint64_t cmf_bytes ; /* Total no of read bytes for CMF_SYNC_WQE */
2021-08-16 09:28:55 -07:00
uint64_t total_bytes ; /* Total no of read bytes requested */
uint64_t rcv_bytes ; /* Total no of read bytes completed */
uint64_t avg_io_size ;
uint64_t avg_io_latency ; /* Average io latency in microseconds */
uint64_t max_read_cnt ; /* Maximum read bytes */
uint64_t max_bytes_per_interval ;
uint32_t cmf_busy ;
uint32_t cmf_info ; /* CMF_SYNC_WQE info */
uint32_t io_cnt ;
uint32_t timer_utilization ;
uint32_t timer_interval ;
} ;
2007-06-17 19:56:36 -05:00
static inline struct Scsi_Host *
lpfc_shost_from_vport ( struct lpfc_vport * vport )
{
return container_of ( ( void * ) vport , struct Scsi_Host , hostdata [ 0 ] ) ;
}
2007-04-25 09:52:49 -04:00
static inline void
2007-06-17 19:56:36 -05:00
lpfc_set_loopback_flag ( struct lpfc_hba * phba )
{
2007-04-25 09:52:49 -04:00
if ( phba - > cfg_topology = = FLAGS_LOCAL_LB )
2007-06-17 19:56:36 -05:00
phba - > link_flag | = LS_LOOPBACK_MODE ;
2007-04-25 09:52:49 -04:00
else
2007-06-17 19:56:36 -05:00
phba - > link_flag & = ~ LS_LOOPBACK_MODE ;
}
static inline int
lpfc_is_link_up ( struct lpfc_hba * phba )
{
return phba - > link_state = = LPFC_LINK_UP | |
2007-06-17 19:56:38 -05:00
phba - > link_state = = LPFC_CLEAR_LA | |
phba - > link_state = = LPFC_HBA_READY ;
2007-04-25 09:52:49 -04:00
}
2005-04-17 16:05:31 -05:00
2008-06-14 22:52:53 -04:00
static inline void
lpfc_worker_wake_up ( struct lpfc_hba * phba )
{
/* Set the lpfc data pending flag */
set_bit ( LPFC_DATA_READY , & phba - > data_flags ) ;
/* Wake up worker thread */
wake_up ( & phba - > work_waitq ) ;
return ;
}
2011-03-11 16:06:12 -05:00
static inline int
lpfc_readl ( void __iomem * addr , uint32_t * data )
{
uint32_t temp ;
temp = readl ( addr ) ;
if ( temp = = 0xffffffff )
return - EIO ;
* data = temp ;
return 0 ;
}
static inline int
2008-08-24 21:50:30 -04:00
lpfc_sli_read_hs ( struct lpfc_hba * phba )
{
/*
* There was a link / board error . Read the status register to retrieve
* the error event and process it .
*/
phba - > sli . slistat . err_attn_event + + ;
2011-03-11 16:06:12 -05:00
/* Save status info and check for unplug error */
if ( lpfc_readl ( phba - > HSregaddr , & phba - > work_hs ) | |
lpfc_readl ( phba - > MBslimaddr + 0xa8 , & phba - > work_status [ 0 ] ) | |
lpfc_readl ( phba - > MBslimaddr + 0xac , & phba - > work_status [ 1 ] ) ) {
return - EIO ;
}
2008-08-24 21:50:30 -04:00
/* Clear chip Host Attention error bit */
writel ( HA_ERATT , phba - > HAregaddr ) ;
readl ( phba - > HAregaddr ) ; /* flush */
phba - > pport - > stopped = 1 ;
2011-03-11 16:06:12 -05:00
return 0 ;
2008-08-24 21:50:30 -04:00
}
2017-02-12 13:52:30 -08:00
static inline struct lpfc_sli_ring *
lpfc_phba_elsring ( struct lpfc_hba * phba )
{
2018-11-29 16:09:32 -08:00
/* Return NULL if sli_rev has become invalid due to bad fw */
if ( phba - > sli_rev ! = LPFC_SLI_REV4 & &
phba - > sli_rev ! = LPFC_SLI_REV3 & &
phba - > sli_rev ! = LPFC_SLI_REV2 )
return NULL ;
2017-05-15 15:20:39 -07:00
if ( phba - > sli_rev = = LPFC_SLI_REV4 ) {
if ( phba - > sli4_hba . els_wq )
return phba - > sli4_hba . els_wq - > pring ;
else
return NULL ;
}
2017-02-12 13:52:30 -08:00
return & phba - > sli . sli3_ring [ LPFC_ELS_RING ] ;
}
scsi: lpfc: Rework EQ/CQ processing to address interrupt coalescing
When driving high iop counts, auto_imax coalescing kicks in and drives the
performance to extremely small iops levels.
There are two issues:
1) auto_imax is enabled by default. The auto algorithm, when iops gets
high, divides the iops by the hdwq count and uses that value to
calculate EQ_Delay. The EQ_Delay is set uniformly on all EQs whether
they have load or not. The EQ_delay is only manipulated every 5s (a
long time). Thus there were large 5s swings of no interrupt delay
followed by large/maximum delay, before repeating.
2) When processing a CQ, the driver got mixed up on the rate of when
to ring the doorbell to keep the chip appraised of the eqe or cqe
consumption as well as how how long to sit in the thread and
process queue entries. Currently, the driver capped its work at
64 entries (very small) and exited/rearmed the CQ. Thus, on heavy
loads, additional overheads were taken to exit and re-enter the
interrupt handler. Worse, if in the large/maximum coalescing
windows,k it could be a while before getting back to servicing.
The issues are corrected by the following:
- A change in defaults. Auto_imax is turned OFF and fcp_imax is set
to 0. Thus all interrupts are immediate.
- Cleanup of field names and their meanings. Existing names were
non-intuitive or used for duplicate things.
- Added max_proc_limit field, to control the length of time the
handlers would service completions.
- Reworked EQ handling:
Added common routine that walks eq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after eqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Moved lpfc_sli4_eq_flush(), which does similar action, to same area.
Replaced the 2 individual loops that walk an eq with a call to the
common routine.
Slightly revised lpfc_sli4_hba_handle_eqe() calling syntax.
Added per-cpu counters to detect interrupt rates and scale
interrupt coalescing values.
- Reworked CQ handling:
Added common routine that walks cq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after cqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Replaced the 3 individual loops that walk a cq with a call to the
common routine.
Redefined lpfc_sli4_sp_handle_mcqe() to commong handler definition with
queue reference. Add increment for mbox completion to handler.
- Added a new module/sysfs attribute: lpfc_cq_max_proc_limit To allow
dynamic changing of the CQ max_proc_limit value being used.
Although this leaves an EQ as an immediate interrupt, that interrupt will
only occur if a CQ bound to it is in an armed state and has cqe's to
process. By staying in the cq processing routine longer, high loads will
avoid generating more interrupts as they will only rearm as the processing
thread exits. The immediately interrupt is also beneficial to idle or
lower-processing CQ's as they get serviced immediately without being
penalized by sharing an EQ with a more loaded CQ.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:33 -08:00
scsi: lpfc: Change default IRQ model on AMD architectures
The current driver attempts to allocate an interrupt vector per cpu using
the systems managed IRQ allocator (flag PCI_IRQ_AFFINITY). The system IRQ
allocator will either provide the per-cpu vector, or return fewer
vectors. When fewer vectors, they are evenly spread between the numa nodes
on the system. When run on an AMD architecture, if interrupts occur to a
cpu that is not in the same numa node as the adapter generating the
interrupt, there are extreme costs and overheads in performance. Thus, if
1:1 vector allocation is used, or the "balanced" vectors in the other numa
nodes, performance can be hit significantly.
A much more performant model is to allocate interrupts only on the cpus
that are in the numa node where the adapter resides. I/O completion is
still performed by the cpu where the I/O was generated. Unfortunately,
there is no flag to request the managed IRQ subsystem allocate vectors only
for the CPUs in the numa node as the adapter.
On AMD architecture, revert the irq allocation to the normal style
(non-managed) and then use irq_set_affinity_hint() to set the cpu
affinity and disable user-space rebalancing.
Tie the support into CPU offline/online. If the cpu being offlined owns a
vector, the vector is re-affinitized to one of the other CPUs on the same
numa node. If there are no more CPUs on the numa node, the vector has all
affinity removed and lets the system determine where it's serviced.
Similarly, when the cpu that owned a vector comes online, the vector is
reaffinitized to the cpu.
Link: https://lore.kernel.org/r/20191105005708.7399-10-jsmart2021@gmail.com
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-11-04 16:57:06 -08:00
/**
2020-05-01 14:43:06 -07:00
* lpfc_next_online_cpu - Finds next online CPU on cpumask
* @ mask : Pointer to phba ' s cpumask member .
scsi: lpfc: Change default IRQ model on AMD architectures
The current driver attempts to allocate an interrupt vector per cpu using
the systems managed IRQ allocator (flag PCI_IRQ_AFFINITY). The system IRQ
allocator will either provide the per-cpu vector, or return fewer
vectors. When fewer vectors, they are evenly spread between the numa nodes
on the system. When run on an AMD architecture, if interrupts occur to a
cpu that is not in the same numa node as the adapter generating the
interrupt, there are extreme costs and overheads in performance. Thus, if
1:1 vector allocation is used, or the "balanced" vectors in the other numa
nodes, performance can be hit significantly.
A much more performant model is to allocate interrupts only on the cpus
that are in the numa node where the adapter resides. I/O completion is
still performed by the cpu where the I/O was generated. Unfortunately,
there is no flag to request the managed IRQ subsystem allocate vectors only
for the CPUs in the numa node as the adapter.
On AMD architecture, revert the irq allocation to the normal style
(non-managed) and then use irq_set_affinity_hint() to set the cpu
affinity and disable user-space rebalancing.
Tie the support into CPU offline/online. If the cpu being offlined owns a
vector, the vector is re-affinitized to one of the other CPUs on the same
numa node. If there are no more CPUs on the numa node, the vector has all
affinity removed and lets the system determine where it's serviced.
Similarly, when the cpu that owned a vector comes online, the vector is
reaffinitized to the cpu.
Link: https://lore.kernel.org/r/20191105005708.7399-10-jsmart2021@gmail.com
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-11-04 16:57:06 -08:00
* @ start : starting cpu index
*
* Note : If no valid cpu found , then nr_cpu_ids is returned .
*
* */
static inline unsigned int
2020-05-01 14:43:06 -07:00
lpfc_next_online_cpu ( const struct cpumask * mask , unsigned int start )
scsi: lpfc: Change default IRQ model on AMD architectures
The current driver attempts to allocate an interrupt vector per cpu using
the systems managed IRQ allocator (flag PCI_IRQ_AFFINITY). The system IRQ
allocator will either provide the per-cpu vector, or return fewer
vectors. When fewer vectors, they are evenly spread between the numa nodes
on the system. When run on an AMD architecture, if interrupts occur to a
cpu that is not in the same numa node as the adapter generating the
interrupt, there are extreme costs and overheads in performance. Thus, if
1:1 vector allocation is used, or the "balanced" vectors in the other numa
nodes, performance can be hit significantly.
A much more performant model is to allocate interrupts only on the cpus
that are in the numa node where the adapter resides. I/O completion is
still performed by the cpu where the I/O was generated. Unfortunately,
there is no flag to request the managed IRQ subsystem allocate vectors only
for the CPUs in the numa node as the adapter.
On AMD architecture, revert the irq allocation to the normal style
(non-managed) and then use irq_set_affinity_hint() to set the cpu
affinity and disable user-space rebalancing.
Tie the support into CPU offline/online. If the cpu being offlined owns a
vector, the vector is re-affinitized to one of the other CPUs on the same
numa node. If there are no more CPUs on the numa node, the vector has all
affinity removed and lets the system determine where it's serviced.
Similarly, when the cpu that owned a vector comes online, the vector is
reaffinitized to the cpu.
Link: https://lore.kernel.org/r/20191105005708.7399-10-jsmart2021@gmail.com
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-11-04 16:57:06 -08:00
{
unsigned int cpu_it ;
2020-05-01 14:43:06 -07:00
for_each_cpu_wrap ( cpu_it , mask , start ) {
scsi: lpfc: Change default IRQ model on AMD architectures
The current driver attempts to allocate an interrupt vector per cpu using
the systems managed IRQ allocator (flag PCI_IRQ_AFFINITY). The system IRQ
allocator will either provide the per-cpu vector, or return fewer
vectors. When fewer vectors, they are evenly spread between the numa nodes
on the system. When run on an AMD architecture, if interrupts occur to a
cpu that is not in the same numa node as the adapter generating the
interrupt, there are extreme costs and overheads in performance. Thus, if
1:1 vector allocation is used, or the "balanced" vectors in the other numa
nodes, performance can be hit significantly.
A much more performant model is to allocate interrupts only on the cpus
that are in the numa node where the adapter resides. I/O completion is
still performed by the cpu where the I/O was generated. Unfortunately,
there is no flag to request the managed IRQ subsystem allocate vectors only
for the CPUs in the numa node as the adapter.
On AMD architecture, revert the irq allocation to the normal style
(non-managed) and then use irq_set_affinity_hint() to set the cpu
affinity and disable user-space rebalancing.
Tie the support into CPU offline/online. If the cpu being offlined owns a
vector, the vector is re-affinitized to one of the other CPUs on the same
numa node. If there are no more CPUs on the numa node, the vector has all
affinity removed and lets the system determine where it's serviced.
Similarly, when the cpu that owned a vector comes online, the vector is
reaffinitized to the cpu.
Link: https://lore.kernel.org/r/20191105005708.7399-10-jsmart2021@gmail.com
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-11-04 16:57:06 -08:00
if ( cpu_online ( cpu_it ) )
break ;
}
return cpu_it ;
}
scsi: lpfc: Rework EQ/CQ processing to address interrupt coalescing
When driving high iop counts, auto_imax coalescing kicks in and drives the
performance to extremely small iops levels.
There are two issues:
1) auto_imax is enabled by default. The auto algorithm, when iops gets
high, divides the iops by the hdwq count and uses that value to
calculate EQ_Delay. The EQ_Delay is set uniformly on all EQs whether
they have load or not. The EQ_delay is only manipulated every 5s (a
long time). Thus there were large 5s swings of no interrupt delay
followed by large/maximum delay, before repeating.
2) When processing a CQ, the driver got mixed up on the rate of when
to ring the doorbell to keep the chip appraised of the eqe or cqe
consumption as well as how how long to sit in the thread and
process queue entries. Currently, the driver capped its work at
64 entries (very small) and exited/rearmed the CQ. Thus, on heavy
loads, additional overheads were taken to exit and re-enter the
interrupt handler. Worse, if in the large/maximum coalescing
windows,k it could be a while before getting back to servicing.
The issues are corrected by the following:
- A change in defaults. Auto_imax is turned OFF and fcp_imax is set
to 0. Thus all interrupts are immediate.
- Cleanup of field names and their meanings. Existing names were
non-intuitive or used for duplicate things.
- Added max_proc_limit field, to control the length of time the
handlers would service completions.
- Reworked EQ handling:
Added common routine that walks eq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after eqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Moved lpfc_sli4_eq_flush(), which does similar action, to same area.
Replaced the 2 individual loops that walk an eq with a call to the
common routine.
Slightly revised lpfc_sli4_hba_handle_eqe() calling syntax.
Added per-cpu counters to detect interrupt rates and scale
interrupt coalescing values.
- Reworked CQ handling:
Added common routine that walks cq, applying notify interval and max
processing limits. Use queue_claimed to claim ownership of the queue
while processing. Always rearm the queue whenever the common routine
is called.
Rework queue element processing, namely to eliminate hba_index vs
host_index. Only one index is necessary. The queue entry can be
marked invalid and the host_index updated immediately after cqe
processing.
After rework, xx_release routines are now DB write functions. Renamed
the routines as such.
Replaced the 3 individual loops that walk a cq with a call to the
common routine.
Redefined lpfc_sli4_sp_handle_mcqe() to commong handler definition with
queue reference. Add increment for mbox completion to handler.
- Added a new module/sysfs attribute: lpfc_cq_max_proc_limit To allow
dynamic changing of the CQ max_proc_limit value being used.
Although this leaves an EQ as an immediate interrupt, that interrupt will
only occur if a CQ bound to it is in an armed state and has cqe's to
process. By staying in the cq processing routine longer, high loads will
avoid generating more interrupts as they will only rearm as the processing
thread exits. The immediately interrupt is also beneficial to idle or
lower-processing CQ's as they get serviced immediately without being
penalized by sharing an EQ with a more loaded CQ.
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-01-28 11:14:33 -08:00
/**
* lpfc_sli4_mod_hba_eq_delay - update EQ delay
* @ phba : Pointer to HBA context object .
* @ q : The Event Queue to update .
* @ delay : The delay value ( in us ) to be written .
*
* */
static inline void
lpfc_sli4_mod_hba_eq_delay ( struct lpfc_hba * phba , struct lpfc_queue * eq ,
u32 delay )
{
struct lpfc_register reg_data ;
reg_data . word0 = 0 ;
bf_set ( lpfc_sliport_eqdelay_id , & reg_data , eq - > queue_id ) ;
bf_set ( lpfc_sliport_eqdelay_delay , & reg_data , delay ) ;
writel ( reg_data . word0 , phba - > sli4_hba . u . if_type2 . EQDregaddr ) ;
eq - > q_mode = delay ;
}
2020-02-10 09:31:55 -08:00
/*
* Macro that declares tables and a routine to perform enum type to
* ascii string lookup .
*
* Defines a < key , value > table for an enum . Uses xxx_INIT defines for
* the enum to populate the table . Macro defines a routine ( named
* by caller ) that will search all elements of the table for the key
* and return the name string if found or " Unrecognized " if not found .
*/
# define DECLARE_ENUM2STR_LOOKUP(routine, enum_name, enum_init) \
static struct { \
enum enum_name value ; \
char * name ; \
} fc_ # # enum_name # # _e2str_names [ ] = enum_init ; \
static const char * routine ( enum enum_name table_key ) \
{ \
int i ; \
char * name = " Unrecognized " ; \
\
for ( i = 0 ; i < ARRAY_SIZE ( fc_ # # enum_name # # _e2str_names ) ; i + + ) { \
if ( fc_ # # enum_name # # _e2str_names [ i ] . value = = table_key ) { \
name = fc_ # # enum_name # # _e2str_names [ i ] . name ; \
break ; \
} \
} \
return name ; \
}
2021-06-08 10:05:47 +05:30
/**
* lpfc_is_vmid_enabled - returns if VMID is enabled for either switch types
* @ phba : Pointer to HBA context object .
*
* Relationship between the enable , target support and if vmid tag is required
* for the particular combination
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* Switch Enable Flag Target Support VMID Needed
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* App Id 0 NA N
* App Id 1 0 N
* App Id 1 1 Y
* Pr Tag 0 NA N
* Pr Tag 1 0 N
* Pr Tag 1 1 Y
* Pr Tag 2 * Y
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* */
static inline int lpfc_is_vmid_enabled ( struct lpfc_hba * phba )
{
return phba - > cfg_vmid_app_header | | phba - > cfg_vmid_priority_tagging ;
}
2022-02-24 18:22:53 -08:00
static inline
u8 get_job_ulpstatus ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return bf_get ( lpfc_wcqe_c_status , & iocbq - > wcqe_cmpl ) ;
else
return iocbq - > iocb . ulpStatus ;
}
static inline
u32 get_job_word4 ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return iocbq - > wcqe_cmpl . parameter ;
else
return iocbq - > iocb . un . ulpWord [ 4 ] ;
}
static inline
u8 get_job_cmnd ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return bf_get ( wqe_cmnd , & iocbq - > wqe . generic . wqe_com ) ;
else
return iocbq - > iocb . ulpCommand ;
}
static inline
u16 get_job_ulpcontext ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return bf_get ( wqe_ctxt_tag , & iocbq - > wqe . generic . wqe_com ) ;
else
return iocbq - > iocb . ulpContext ;
}
2022-02-24 18:22:55 -08:00
2022-02-24 18:22:56 -08:00
static inline
u16 get_job_rcvoxid ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return bf_get ( wqe_rcvoxid , & iocbq - > wqe . generic . wqe_com ) ;
else
return iocbq - > iocb . unsli3 . rcvsli3 . ox_id ;
}
2022-02-24 18:23:03 -08:00
static inline
u32 get_job_data_placed ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return iocbq - > wcqe_cmpl . total_data_placed ;
else
return iocbq - > iocb . un . genreq64 . bdl . bdeSize ;
}
2022-02-24 18:23:05 -08:00
static inline
u32 get_job_abtsiotag ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return iocbq - > wqe . abort_cmd . wqe_com . abort_tag ;
else
return iocbq - > iocb . un . acxri . abortIoTag ;
}
2022-02-24 18:22:55 -08:00
static inline
u32 get_job_els_rsp64_did ( struct lpfc_hba * phba , struct lpfc_iocbq * iocbq )
{
if ( phba - > sli_rev = = LPFC_SLI_REV4 )
return bf_get ( wqe_els_did , & iocbq - > wqe . els_req . wqe_dest ) ;
else
return iocbq - > iocb . un . elsreq64 . remoteID ;
}