Merge branch 'wip/dl-for-next' into for-next
Due to concurrent work by myself and Jason, a normal fast forward merge was not possible. This brings in a number of hfi1 changes, mainly the hfi1 TID RDMA support (roughly 10,000 LOC change), which was reviewed and integrated over a period of days. Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
commit
82771f2033
@ -24,6 +24,7 @@ hfi1-y := \
|
||||
mad.o \
|
||||
mmu_rb.o \
|
||||
msix.o \
|
||||
opfn.o \
|
||||
pcie.o \
|
||||
pio.o \
|
||||
pio_copy.o \
|
||||
|
@ -4253,6 +4253,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
|
||||
access_sw_pio_drain),
|
||||
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
|
||||
access_sw_kmem_wait),
|
||||
[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
|
||||
hfi1_access_sw_tid_wait),
|
||||
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
|
||||
access_sw_send_schedule),
|
||||
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
|
||||
@ -5222,6 +5224,17 @@ int is_bx(struct hfi1_devdata *dd)
|
||||
return (chip_rev_minor & 0xF0) == 0x10;
|
||||
}
|
||||
|
||||
/* return true is kernel urg disabled for rcd */
|
||||
bool is_urg_masked(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
u64 mask;
|
||||
u32 is = IS_RCVURGENT_START + rcd->ctxt;
|
||||
u8 bit = is % 64;
|
||||
|
||||
mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
|
||||
return !(mask & BIT_ULL(bit));
|
||||
}
|
||||
|
||||
/*
|
||||
* Append string s to buffer buf. Arguments curp and len are the current
|
||||
* position and remaining length, respectively.
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef _CHIP_H
|
||||
#define _CHIP_H
|
||||
/*
|
||||
* Copyright(c) 2015 - 2017 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd);
|
||||
u32 hdrqempty(struct hfi1_ctxtdata *rcd);
|
||||
int is_ax(struct hfi1_devdata *dd);
|
||||
int is_bx(struct hfi1_devdata *dd);
|
||||
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
|
||||
u32 read_physical_state(struct hfi1_devdata *dd);
|
||||
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
|
||||
const char *opa_lstate_name(u32 lstate);
|
||||
@ -926,6 +927,7 @@ enum {
|
||||
C_SW_PIO_WAIT,
|
||||
C_SW_PIO_DRAIN,
|
||||
C_SW_KMEM_WAIT,
|
||||
C_SW_TID_WAIT,
|
||||
C_SW_SEND_SCHED,
|
||||
C_SDMA_DESC_FETCHED_CNT,
|
||||
C_SDMA_INT_CNT,
|
||||
|
@ -340,6 +340,10 @@ struct diag_pkt {
|
||||
|
||||
#define HFI1_PSM_IOC_BASE_SEQ 0x0
|
||||
|
||||
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
|
||||
#define HFI1_KDETH_BTH_SEQ_SHIFT 11
|
||||
#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
|
||||
|
||||
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
|
||||
{
|
||||
return __le64_to_cpu(*((__le64 *)rbuf));
|
||||
|
@ -1575,25 +1575,32 @@ drop:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void handle_eflags(struct hfi1_packet *packet)
|
||||
static void show_eflags_errs(struct hfi1_packet *packet)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
u32 rte = rhf_rcv_type_err(packet->rhf);
|
||||
|
||||
dd_dev_err(rcd->dd,
|
||||
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
|
||||
rcd->ctxt, packet->rhf,
|
||||
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
|
||||
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
|
||||
packet->rhf & RHF_DC_ERR ? "dc " : "",
|
||||
packet->rhf & RHF_TID_ERR ? "tid " : "",
|
||||
packet->rhf & RHF_LEN_ERR ? "len " : "",
|
||||
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
|
||||
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
|
||||
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
|
||||
rte);
|
||||
}
|
||||
|
||||
void handle_eflags(struct hfi1_packet *packet)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
|
||||
rcv_hdrerr(rcd, rcd->ppd, packet);
|
||||
if (rhf_err_flags(packet->rhf))
|
||||
dd_dev_err(rcd->dd,
|
||||
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
|
||||
rcd->ctxt, packet->rhf,
|
||||
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
|
||||
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
|
||||
packet->rhf & RHF_DC_ERR ? "dc " : "",
|
||||
packet->rhf & RHF_TID_ERR ? "tid " : "",
|
||||
packet->rhf & RHF_LEN_ERR ? "len " : "",
|
||||
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
|
||||
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
|
||||
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
|
||||
rte);
|
||||
show_eflags_errs(packet);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1699,11 +1706,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet)
|
||||
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
|
||||
return RHF_RCV_CONTINUE;
|
||||
|
||||
if (unlikely(rhf_err_flags(packet->rhf)))
|
||||
handle_eflags(packet);
|
||||
if (unlikely(rhf_err_flags(packet->rhf))) {
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
|
||||
dd_dev_err(packet->rcd->dd,
|
||||
"Unhandled expected packet received. Dropping.\n");
|
||||
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
|
||||
return RHF_RCV_CONTINUE;
|
||||
}
|
||||
|
||||
hfi1_kdeth_expected_rcv(packet);
|
||||
return RHF_RCV_CONTINUE;
|
||||
}
|
||||
|
||||
@ -1712,11 +1722,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
|
||||
hfi1_setup_9B_packet(packet);
|
||||
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
|
||||
return RHF_RCV_CONTINUE;
|
||||
if (unlikely(rhf_err_flags(packet->rhf)))
|
||||
handle_eflags(packet);
|
||||
|
||||
dd_dev_err(packet->rcd->dd,
|
||||
"Unhandled eager packet received. Dropping.\n");
|
||||
trace_hfi1_rcvhdr(packet);
|
||||
if (unlikely(rhf_err_flags(packet->rhf))) {
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
|
||||
show_eflags_errs(packet);
|
||||
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
|
||||
return RHF_RCV_CONTINUE;
|
||||
}
|
||||
|
||||
hfi1_kdeth_eager_rcv(packet);
|
||||
return RHF_RCV_CONTINUE;
|
||||
}
|
||||
|
||||
|
@ -73,6 +73,7 @@
|
||||
|
||||
#include "chip_registers.h"
|
||||
#include "common.h"
|
||||
#include "opfn.h"
|
||||
#include "verbs.h"
|
||||
#include "pio.h"
|
||||
#include "chip.h"
|
||||
@ -98,6 +99,8 @@
|
||||
#define NEIGHBOR_TYPE_HFI 0
|
||||
#define NEIGHBOR_TYPE_SWITCH 1
|
||||
|
||||
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
|
||||
|
||||
extern unsigned long hfi1_cap_mask;
|
||||
#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
|
||||
#define HFI1_CAP_UGET_MASK(mask, cap) \
|
||||
@ -195,6 +198,14 @@ struct exp_tid_set {
|
||||
};
|
||||
|
||||
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
|
||||
|
||||
struct tid_queue {
|
||||
struct list_head queue_head;
|
||||
/* queue head for QP TID resource waiters */
|
||||
u32 enqueue; /* count of tid enqueues */
|
||||
u32 dequeue; /* count of tid dequeues */
|
||||
};
|
||||
|
||||
struct hfi1_ctxtdata {
|
||||
/* rcvhdrq base, needs mmap before useful */
|
||||
void *rcvhdrq;
|
||||
@ -288,6 +299,12 @@ struct hfi1_ctxtdata {
|
||||
/* PSM Specific fields */
|
||||
/* lock protecting all Expected TID data */
|
||||
struct mutex exp_mutex;
|
||||
/* lock protecting all Expected TID data of kernel contexts */
|
||||
spinlock_t exp_lock;
|
||||
/* Queue for QP's waiting for HW TID flows */
|
||||
struct tid_queue flow_queue;
|
||||
/* Queue for QP's waiting for HW receive array entries */
|
||||
struct tid_queue rarr_queue;
|
||||
/* when waiting for rcv or pioavail */
|
||||
wait_queue_head_t wait;
|
||||
/* uuid from PSM */
|
||||
@ -320,6 +337,9 @@ struct hfi1_ctxtdata {
|
||||
*/
|
||||
u8 subctxt_cnt;
|
||||
|
||||
/* Bit mask to track free TID RDMA HW flows */
|
||||
unsigned long flow_mask;
|
||||
struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
|
||||
};
|
||||
|
||||
/**
|
||||
@ -2100,7 +2120,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
|
||||
SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
|
||||
#endif
|
||||
HFI1_PKT_USER_SC_INTEGRITY;
|
||||
else
|
||||
else if (ctxt_type != SC_KERNEL)
|
||||
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
|
||||
|
||||
/* turn on send-side job key checks if !A0 */
|
||||
|
@ -72,7 +72,6 @@
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
|
||||
|
||||
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
|
||||
/*
|
||||
* min buffers we want to have per context, after driver
|
||||
*/
|
||||
@ -371,6 +370,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
|
||||
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
|
||||
|
||||
mutex_init(&rcd->exp_mutex);
|
||||
spin_lock_init(&rcd->exp_lock);
|
||||
INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
|
||||
INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
|
||||
|
||||
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
|
||||
|
||||
@ -473,6 +475,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
|
||||
GFP_KERNEL, numa);
|
||||
if (!rcd->opstats)
|
||||
goto bail;
|
||||
|
||||
/* Initialize TID flow generations for the context */
|
||||
hfi1_kern_init_ctxt_generations(rcd);
|
||||
}
|
||||
|
||||
*context = rcd;
|
||||
@ -772,6 +777,8 @@ static void enable_chip(struct hfi1_devdata *dd)
|
||||
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
|
||||
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
|
||||
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
|
||||
if (HFI1_CAP_IS_KSET(TID_RDMA))
|
||||
rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
|
||||
hfi1_rcvctrl(dd, rcvmask, rcd);
|
||||
sc_enable(rcd->sc);
|
||||
hfi1_rcd_put(rcd);
|
||||
@ -927,6 +934,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
|
||||
lastfail = hfi1_create_rcvhdrq(dd, rcd);
|
||||
if (!lastfail)
|
||||
lastfail = hfi1_setup_eagerbufs(rcd);
|
||||
if (!lastfail)
|
||||
lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
|
||||
if (lastfail) {
|
||||
dd_dev_err(dd,
|
||||
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
|
||||
@ -1497,6 +1506,13 @@ static int __init hfi1_mod_init(void)
|
||||
/* sanitize link CRC options */
|
||||
link_crc_mask &= SUPPORTED_CRCS;
|
||||
|
||||
ret = opfn_init();
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to allocate opfn_wq");
|
||||
goto bail_dev;
|
||||
}
|
||||
|
||||
hfi1_compute_tid_rdma_flow_wt();
|
||||
/*
|
||||
* These must be called before the driver is registered with
|
||||
* the PCI subsystem.
|
||||
@ -1527,6 +1543,7 @@ module_init(hfi1_mod_init);
|
||||
static void __exit hfi1_mod_cleanup(void)
|
||||
{
|
||||
pci_unregister_driver(&hfi1_pci_driver);
|
||||
opfn_exit();
|
||||
node_affinity_destroy_all();
|
||||
hfi1_dbg_exit();
|
||||
|
||||
@ -1581,7 +1598,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
|
||||
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
|
||||
|
||||
if (rcd) {
|
||||
hfi1_clear_tids(rcd);
|
||||
hfi1_free_ctxt_rcv_groups(rcd);
|
||||
hfi1_free_ctxt(rcd);
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include "iowait.h"
|
||||
#include "trace_iowait.h"
|
||||
|
||||
/* 1 priority == 16 starve_cnt */
|
||||
#define IOWAIT_PRIORITY_STARVE_SHIFT 4
|
||||
|
||||
void iowait_set_flag(struct iowait *wait, u32 flag)
|
||||
{
|
||||
trace_hfi1_iowait_set(wait, flag);
|
||||
@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
|
||||
uint seq,
|
||||
bool pkts_sent),
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait))
|
||||
void (*sdma_drained)(struct iowait *wait),
|
||||
void (*init_priority)(struct iowait *wait))
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
|
||||
wait->sleep = sleep;
|
||||
wait->wakeup = wakeup;
|
||||
wait->sdma_drained = sdma_drained;
|
||||
wait->init_priority = init_priority;
|
||||
wait->flags = 0;
|
||||
for (i = 0; i < IOWAIT_SES; i++) {
|
||||
wait->wait[i].iow = wait;
|
||||
@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
|
||||
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
|
||||
return IOWAIT_TID_SE;
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_priority_update_top - update the top priority entry
|
||||
* @w: the iowait struct
|
||||
* @top: a pointer to the top priority entry
|
||||
* @idx: the index of the current iowait in an array
|
||||
* @top_idx: the array index for the iowait entry that has the top priority
|
||||
*
|
||||
* This function is called to compare the priority of a given
|
||||
* iowait with the given top priority entry. The top index will
|
||||
* be returned.
|
||||
*/
|
||||
uint iowait_priority_update_top(struct iowait *w,
|
||||
struct iowait *top,
|
||||
uint idx, uint top_idx)
|
||||
{
|
||||
u8 cnt, tcnt;
|
||||
|
||||
/* Convert priority into starve_cnt and compare the total.*/
|
||||
cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
|
||||
tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
|
||||
top->starved_cnt;
|
||||
if (cnt > tcnt)
|
||||
return idx;
|
||||
else
|
||||
return top_idx;
|
||||
}
|
||||
|
@ -100,6 +100,7 @@ struct iowait_work {
|
||||
* @sleep: no space callback
|
||||
* @wakeup: space callback wakeup
|
||||
* @sdma_drained: sdma count drained
|
||||
* @init_priority: callback to manipulate priority
|
||||
* @lock: lock protected head of wait queue
|
||||
* @iowork: workqueue overhead
|
||||
* @wait_dma: wait for sdma_busy == 0
|
||||
@ -109,7 +110,7 @@ struct iowait_work {
|
||||
* @tx_limit: limit for overflow queuing
|
||||
* @tx_count: number of tx entry's in tx_head'ed list
|
||||
* @flags: wait flags (one per QP)
|
||||
* @wait: SE array
|
||||
* @wait: SE array for multiple legs
|
||||
*
|
||||
* This is to be embedded in user's state structure
|
||||
* (QP or PQ).
|
||||
@ -120,10 +121,13 @@ struct iowait_work {
|
||||
* are callbacks for the ULP to implement
|
||||
* what ever queuing/dequeuing of
|
||||
* the embedded iowait and its containing struct
|
||||
* when a resource shortage like SDMA ring space is seen.
|
||||
* when a resource shortage like SDMA ring space
|
||||
* or PIO credit space is seen.
|
||||
*
|
||||
* Both potentially have locks help
|
||||
* so sleeping is not allowed.
|
||||
* so sleeping is not allowed and it is not
|
||||
* supported to submit txreqs from the wakeup
|
||||
* call directly because of lock conflicts.
|
||||
*
|
||||
* The wait_dma member along with the iow
|
||||
*
|
||||
@ -143,6 +147,7 @@ struct iowait {
|
||||
);
|
||||
void (*wakeup)(struct iowait *wait, int reason);
|
||||
void (*sdma_drained)(struct iowait *wait);
|
||||
void (*init_priority)(struct iowait *wait);
|
||||
seqlock_t *lock;
|
||||
wait_queue_head_t wait_dma;
|
||||
wait_queue_head_t wait_pio;
|
||||
@ -152,6 +157,7 @@ struct iowait {
|
||||
u32 tx_limit;
|
||||
u32 tx_count;
|
||||
u8 starved_cnt;
|
||||
u8 priority;
|
||||
unsigned long flags;
|
||||
struct iowait_work wait[IOWAIT_SES];
|
||||
};
|
||||
@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
|
||||
uint seq,
|
||||
bool pkts_sent),
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait));
|
||||
void (*sdma_drained)(struct iowait *wait),
|
||||
void (*init_priority)(struct iowait *wait));
|
||||
|
||||
/**
|
||||
* iowait_schedule() - schedule the default send engine work
|
||||
@ -185,6 +192,18 @@ static inline bool iowait_schedule(struct iowait *wait,
|
||||
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_tid_schedule - schedule the tid SE
|
||||
* @wait: the iowait structure
|
||||
* @wq: the work queue
|
||||
* @cpu: the cpu
|
||||
*/
|
||||
static inline bool iowait_tid_schedule(struct iowait *wait,
|
||||
struct workqueue_struct *wq, int cpu)
|
||||
{
|
||||
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_sdma_drain() - wait for DMAs to drain
|
||||
*
|
||||
@ -327,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
|
||||
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
|
||||
list);
|
||||
num_desc = tx->num_desc;
|
||||
if (tx->flags & SDMA_TXREQ_F_VIP)
|
||||
w->iow->priority++;
|
||||
}
|
||||
return num_desc;
|
||||
}
|
||||
@ -340,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
|
||||
return num_desc;
|
||||
}
|
||||
|
||||
static inline void iowait_update_priority(struct iowait_work *w)
|
||||
{
|
||||
struct sdma_txreq *tx = NULL;
|
||||
|
||||
if (!list_empty(&w->tx_head)) {
|
||||
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
|
||||
list);
|
||||
if (tx->flags & SDMA_TXREQ_F_VIP)
|
||||
w->iow->priority++;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void iowait_update_all_priority(struct iowait *w)
|
||||
{
|
||||
iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
|
||||
iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
|
||||
}
|
||||
|
||||
static inline void iowait_init_priority(struct iowait *w)
|
||||
{
|
||||
w->priority = 0;
|
||||
if (w->init_priority)
|
||||
w->init_priority(w);
|
||||
}
|
||||
|
||||
static inline void iowait_get_priority(struct iowait *w)
|
||||
{
|
||||
iowait_init_priority(w);
|
||||
iowait_update_all_priority(w);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_queue - Put the iowait on a wait queue
|
||||
* @pkts_sent: have some packets been sent before queuing?
|
||||
@ -356,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
|
||||
/*
|
||||
* To play fair, insert the iowait at the tail of the wait queue if it
|
||||
* has already sent some packets; Otherwise, put it at the head.
|
||||
* However, if it has priority packets to send, also put it at the
|
||||
* head.
|
||||
*/
|
||||
if (pkts_sent) {
|
||||
list_add_tail(&w->list, wait_head);
|
||||
if (pkts_sent)
|
||||
w->starved_cnt = 0;
|
||||
} else {
|
||||
list_add(&w->list, wait_head);
|
||||
else
|
||||
w->starved_cnt++;
|
||||
}
|
||||
|
||||
if (w->priority > 0 || !pkts_sent)
|
||||
list_add(&w->list, wait_head);
|
||||
else
|
||||
list_add_tail(&w->list, wait_head);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -380,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
|
||||
w->starved_cnt = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_starve_find_max - Find the maximum of the starve count
|
||||
* @w: the iowait struct
|
||||
* @max: a variable containing the max starve count
|
||||
* @idx: the index of the current iowait in an array
|
||||
* @max_idx: a variable containing the array index for the
|
||||
* iowait entry that has the max starve count
|
||||
*
|
||||
* This function is called to compare the starve count of a
|
||||
* given iowait with the given max starve count. The max starve
|
||||
* count and the index will be updated if the iowait's start
|
||||
* count is larger.
|
||||
*/
|
||||
static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
|
||||
uint idx, uint *max_idx)
|
||||
{
|
||||
if (w->starved_cnt > *max) {
|
||||
*max = w->starved_cnt;
|
||||
*max_idx = idx;
|
||||
}
|
||||
}
|
||||
/* Update the top priority index */
|
||||
uint iowait_priority_update_top(struct iowait *w,
|
||||
struct iowait *top,
|
||||
uint idx, uint top_idx);
|
||||
|
||||
/**
|
||||
* iowait_packet_queued() - determine if a packet is queued
|
||||
|
323
drivers/infiniband/hw/hfi1/opfn.c
Normal file
323
drivers/infiniband/hw/hfi1/opfn.c
Normal file
@ -0,0 +1,323 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
*/
|
||||
#include "hfi.h"
|
||||
#include "trace.h"
|
||||
#include "qp.h"
|
||||
#include "opfn.h"
|
||||
|
||||
#define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
|
||||
|
||||
#define OPFN_CODE(code) BIT((code) - 1)
|
||||
#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
|
||||
|
||||
struct hfi1_opfn_type {
|
||||
bool (*request)(struct rvt_qp *qp, u64 *data);
|
||||
bool (*response)(struct rvt_qp *qp, u64 *data);
|
||||
bool (*reply)(struct rvt_qp *qp, u64 data);
|
||||
void (*error)(struct rvt_qp *qp);
|
||||
};
|
||||
|
||||
static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
|
||||
[STL_VERBS_EXTD_TID_RDMA] = {
|
||||
.request = tid_rdma_conn_req,
|
||||
.response = tid_rdma_conn_resp,
|
||||
.reply = tid_rdma_conn_reply,
|
||||
.error = tid_rdma_conn_error,
|
||||
},
|
||||
};
|
||||
|
||||
static struct workqueue_struct *opfn_wq;
|
||||
|
||||
static void opfn_schedule_conn_request(struct rvt_qp *qp);
|
||||
|
||||
static bool hfi1_opfn_extended(u32 bth1)
|
||||
{
|
||||
return !!(bth1 & IB_BTHE_E);
|
||||
}
|
||||
|
||||
static void opfn_conn_request(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct ib_atomic_wr wr;
|
||||
u16 mask, capcode;
|
||||
struct hfi1_opfn_type *extd;
|
||||
u64 data;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
trace_hfi1_opfn_state_conn_request(qp);
|
||||
spin_lock_irqsave(&priv->opfn.lock, flags);
|
||||
/*
|
||||
* Exit if the extended bit is not set, or if nothing is requested, or
|
||||
* if we have completed all requests, or if a previous request is in
|
||||
* progress
|
||||
*/
|
||||
if (!priv->opfn.extended || !priv->opfn.requested ||
|
||||
priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
|
||||
goto done;
|
||||
|
||||
mask = priv->opfn.requested & ~priv->opfn.completed;
|
||||
capcode = ilog2(mask & ~(mask - 1)) + 1;
|
||||
if (capcode >= STL_VERBS_EXTD_MAX) {
|
||||
priv->opfn.completed |= OPFN_CODE(capcode);
|
||||
goto done;
|
||||
}
|
||||
|
||||
extd = &hfi1_opfn_handlers[capcode];
|
||||
if (!extd || !extd->request || !extd->request(qp, &data)) {
|
||||
/*
|
||||
* Either there is no handler for this capability or the request
|
||||
* packet could not be generated. Either way, mark it as done so
|
||||
* we don't keep attempting to complete it.
|
||||
*/
|
||||
priv->opfn.completed |= OPFN_CODE(capcode);
|
||||
goto done;
|
||||
}
|
||||
|
||||
trace_hfi1_opfn_data_conn_request(qp, capcode, data);
|
||||
data = (data & ~0xf) | capcode;
|
||||
|
||||
memset(&wr, 0, sizeof(wr));
|
||||
wr.wr.opcode = IB_WR_OPFN;
|
||||
wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
|
||||
wr.compare_add = data;
|
||||
|
||||
priv->opfn.curr = capcode; /* A new request is now in progress */
|
||||
/* Drop opfn.lock before calling ib_post_send() */
|
||||
spin_unlock_irqrestore(&priv->opfn.lock, flags);
|
||||
|
||||
ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
trace_hfi1_opfn_state_conn_request(qp);
|
||||
return;
|
||||
err:
|
||||
trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
|
||||
(u64)ret);
|
||||
spin_lock_irqsave(&priv->opfn.lock, flags);
|
||||
/*
|
||||
* In case of an unexpected error return from ib_post_send
|
||||
* clear opfn.curr and reschedule to try again
|
||||
*/
|
||||
priv->opfn.curr = STL_VERBS_EXTD_NONE;
|
||||
opfn_schedule_conn_request(qp);
|
||||
done:
|
||||
spin_unlock_irqrestore(&priv->opfn.lock, flags);
|
||||
}
|
||||
|
||||
void opfn_send_conn_request(struct work_struct *work)
|
||||
{
|
||||
struct hfi1_opfn_data *od;
|
||||
struct hfi1_qp_priv *qpriv;
|
||||
|
||||
od = container_of(work, struct hfi1_opfn_data, opfn_work);
|
||||
qpriv = container_of(od, struct hfi1_qp_priv, opfn);
|
||||
|
||||
opfn_conn_request(qpriv->owner);
|
||||
}
|
||||
|
||||
/*
|
||||
* When QP s_lock is held in the caller, the OPFN request must be scheduled
|
||||
* to a different workqueue to avoid double locking QP s_lock in call to
|
||||
* ib_post_send in opfn_conn_request
|
||||
*/
|
||||
static void opfn_schedule_conn_request(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
trace_hfi1_opfn_state_sched_conn_request(qp);
|
||||
queue_work(opfn_wq, &priv->opfn.opfn_work);
|
||||
}
|
||||
|
||||
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||
struct ib_atomic_eth *ateth)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
u64 data = be64_to_cpu(ateth->compare_data);
|
||||
struct hfi1_opfn_type *extd;
|
||||
u8 capcode;
|
||||
unsigned long flags;
|
||||
|
||||
trace_hfi1_opfn_state_conn_response(qp);
|
||||
capcode = data & 0xf;
|
||||
trace_hfi1_opfn_data_conn_response(qp, capcode, data);
|
||||
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
|
||||
return;
|
||||
|
||||
extd = &hfi1_opfn_handlers[capcode];
|
||||
|
||||
if (!extd || !extd->response) {
|
||||
e->atomic_data = capcode;
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&priv->opfn.lock, flags);
|
||||
if (priv->opfn.completed & OPFN_CODE(capcode)) {
|
||||
/*
|
||||
* We are receiving a request for a feature that has already
|
||||
* been negotiated. This may mean that the other side has reset
|
||||
*/
|
||||
priv->opfn.completed &= ~OPFN_CODE(capcode);
|
||||
if (extd->error)
|
||||
extd->error(qp);
|
||||
}
|
||||
|
||||
if (extd->response(qp, &data))
|
||||
priv->opfn.completed |= OPFN_CODE(capcode);
|
||||
e->atomic_data = (data & ~0xf) | capcode;
|
||||
trace_hfi1_opfn_state_conn_response(qp);
|
||||
spin_unlock_irqrestore(&priv->opfn.lock, flags);
|
||||
}
|
||||
|
||||
void opfn_conn_reply(struct rvt_qp *qp, u64 data)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct hfi1_opfn_type *extd;
|
||||
u8 capcode;
|
||||
unsigned long flags;
|
||||
|
||||
trace_hfi1_opfn_state_conn_reply(qp);
|
||||
capcode = data & 0xf;
|
||||
trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
|
||||
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&priv->opfn.lock, flags);
|
||||
/*
|
||||
* Either there is no previous request or the reply is not for the
|
||||
* current request
|
||||
*/
|
||||
if (!priv->opfn.curr || capcode != priv->opfn.curr)
|
||||
goto done;
|
||||
|
||||
extd = &hfi1_opfn_handlers[capcode];
|
||||
|
||||
if (!extd || !extd->reply)
|
||||
goto clear;
|
||||
|
||||
if (extd->reply(qp, data))
|
||||
priv->opfn.completed |= OPFN_CODE(capcode);
|
||||
clear:
|
||||
/*
|
||||
* Clear opfn.curr to indicate that the previous request is no longer in
|
||||
* progress
|
||||
*/
|
||||
priv->opfn.curr = STL_VERBS_EXTD_NONE;
|
||||
trace_hfi1_opfn_state_conn_reply(qp);
|
||||
done:
|
||||
spin_unlock_irqrestore(&priv->opfn.lock, flags);
|
||||
}
|
||||
|
||||
void opfn_conn_error(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct hfi1_opfn_type *extd = NULL;
|
||||
unsigned long flags;
|
||||
u16 capcode;
|
||||
|
||||
trace_hfi1_opfn_state_conn_error(qp);
|
||||
trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
|
||||
/*
|
||||
* The QP has gone into the Error state. We have to invalidate all
|
||||
* negotiated feature, including the one in progress (if any). The RC
|
||||
* QP handling will clean the WQE for the connection request.
|
||||
*/
|
||||
spin_lock_irqsave(&priv->opfn.lock, flags);
|
||||
while (priv->opfn.completed) {
|
||||
capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
|
||||
extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
|
||||
if (extd->error)
|
||||
extd->error(qp);
|
||||
priv->opfn.completed &= ~OPFN_CODE(capcode);
|
||||
}
|
||||
priv->opfn.extended = 0;
|
||||
priv->opfn.requested = 0;
|
||||
priv->opfn.curr = STL_VERBS_EXTD_NONE;
|
||||
spin_unlock_irqrestore(&priv->opfn.lock, flags);
|
||||
}
|
||||
|
||||
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
|
||||
{
|
||||
struct ib_qp *ibqp = &qp->ibqp;
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
unsigned long flags;
|
||||
|
||||
if (attr_mask & IB_QP_RETRY_CNT)
|
||||
priv->s_retry = attr->retry_cnt;
|
||||
|
||||
spin_lock_irqsave(&priv->opfn.lock, flags);
|
||||
if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
|
||||
struct tid_rdma_params *local = &priv->tid_rdma.local;
|
||||
|
||||
if (attr_mask & IB_QP_TIMEOUT)
|
||||
priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
|
||||
if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
|
||||
qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
|
||||
tid_rdma_opfn_init(qp, local);
|
||||
/*
|
||||
* We only want to set the OPFN requested bit when the
|
||||
* QP transitions to RTS.
|
||||
*/
|
||||
if (attr_mask & IB_QP_STATE &&
|
||||
attr->qp_state == IB_QPS_RTS) {
|
||||
priv->opfn.requested |= OPFN_MASK(TID_RDMA);
|
||||
/*
|
||||
* If the QP is transitioning to RTS and the
|
||||
* opfn.completed for TID RDMA has already been
|
||||
* set, the QP is being moved *back* into RTS.
|
||||
* We can now renegotiate the TID RDMA
|
||||
* parameters.
|
||||
*/
|
||||
if (priv->opfn.completed &
|
||||
OPFN_MASK(TID_RDMA)) {
|
||||
priv->opfn.completed &=
|
||||
~OPFN_MASK(TID_RDMA);
|
||||
/*
|
||||
* Since the opfn.completed bit was
|
||||
* already set, it is safe to assume
|
||||
* that the opfn.extended is also set.
|
||||
*/
|
||||
opfn_schedule_conn_request(qp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
memset(local, 0, sizeof(*local));
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&priv->opfn.lock, flags);
|
||||
}
|
||||
|
||||
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
|
||||
HFI1_CAP_IS_KSET(OPFN)) {
|
||||
priv->opfn.extended = 1;
|
||||
if (qp->state == IB_QPS_RTS)
|
||||
opfn_conn_request(qp);
|
||||
}
|
||||
}
|
||||
|
||||
int opfn_init(void)
|
||||
{
|
||||
opfn_wq = alloc_workqueue("hfi_opfn",
|
||||
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
|
||||
WQ_MEM_RECLAIM,
|
||||
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
|
||||
if (!opfn_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void opfn_exit(void)
|
||||
{
|
||||
if (opfn_wq) {
|
||||
destroy_workqueue(opfn_wq);
|
||||
opfn_wq = NULL;
|
||||
}
|
||||
}
|
85
drivers/infiniband/hw/hfi1/opfn.h
Normal file
85
drivers/infiniband/hw/hfi1/opfn.h
Normal file
@ -0,0 +1,85 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
*/
|
||||
#ifndef _HFI1_OPFN_H
|
||||
#define _HFI1_OPFN_H
|
||||
|
||||
/**
|
||||
* DOC: Omni Path Feature Negotion (OPFN)
|
||||
*
|
||||
* OPFN is a discovery protocol for Intel Omni-Path fabric that
|
||||
* allows two RC QPs to negotiate a common feature that both QPs
|
||||
* can support. Currently, the only OPA feature that OPFN
|
||||
* supports is TID RDMA.
|
||||
*
|
||||
* Architecture
|
||||
*
|
||||
* OPFN involves the communication between two QPs on the HFI
|
||||
* level on an Omni-Path fabric, and ULPs have no knowledge of
|
||||
* OPFN at all.
|
||||
*
|
||||
* Implementation
|
||||
*
|
||||
* OPFN extends the existing IB RC protocol with the following
|
||||
* changes:
|
||||
* -- Uses Bit 24 (reserved) of DWORD 1 of Base Transport
|
||||
* Header (BTH1) to indicate that the RC QP supports OPFN;
|
||||
* -- Uses a combination of RC COMPARE_SWAP opcode (0x13) and
|
||||
* the address U64_MAX (0xFFFFFFFFFFFFFFFF) as an OPFN
|
||||
* request; The 64-bit data carried with the request/response
|
||||
* contains the parameters for negotiation and will be
|
||||
* defined in tid_rdma.c file;
|
||||
* -- Defines IB_WR_RESERVED3 as IB_WR_OPFN.
|
||||
*
|
||||
* The OPFN communication will be triggered when an RC QP
|
||||
* receives a request with Bit 24 of BTH1 set. The responder QP
|
||||
* will then post send an OPFN request with its local
|
||||
* parameters, which will be sent to the requester QP once all
|
||||
* existing requests on the responder QP side have been sent.
|
||||
* Once the requester QP receives the OPFN request, it will
|
||||
* keep a copy of the responder QP's parameters, and return a
|
||||
* response packet with its own local parameters. The responder
|
||||
* QP receives the response packet and keeps a copy of the requester
|
||||
* QP's parameters. After this exchange, each side has the parameters
|
||||
* for both sides and therefore can select the right parameters
|
||||
* for future transactions
|
||||
*/
|
||||
|
||||
/* STL Verbs Extended */
|
||||
#define IB_BTHE_E_SHIFT 24
|
||||
#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX
|
||||
|
||||
struct ib_atomic_eth;
|
||||
|
||||
enum hfi1_opfn_codes {
|
||||
STL_VERBS_EXTD_NONE = 0,
|
||||
STL_VERBS_EXTD_TID_RDMA,
|
||||
STL_VERBS_EXTD_MAX
|
||||
};
|
||||
|
||||
struct hfi1_opfn_data {
|
||||
u8 extended;
|
||||
u16 requested;
|
||||
u16 completed;
|
||||
enum hfi1_opfn_codes curr;
|
||||
/* serialize opfn function calls */
|
||||
spinlock_t lock;
|
||||
struct work_struct opfn_work;
|
||||
};
|
||||
|
||||
/* WR opcode for OPFN */
|
||||
#define IB_WR_OPFN IB_WR_RESERVED3
|
||||
|
||||
void opfn_send_conn_request(struct work_struct *work);
|
||||
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||
struct ib_atomic_eth *ateth);
|
||||
void opfn_conn_reply(struct rvt_qp *qp, u64 data);
|
||||
void opfn_conn_error(struct rvt_qp *qp);
|
||||
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask);
|
||||
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1);
|
||||
int opfn_init(void);
|
||||
void opfn_exit(void);
|
||||
|
||||
#endif /* _HFI1_OPFN_H */
|
@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
|
||||
struct rvt_qp *qp;
|
||||
struct hfi1_qp_priv *priv;
|
||||
unsigned long flags;
|
||||
uint i, n = 0, max_idx = 0;
|
||||
u8 max_starved_cnt = 0;
|
||||
uint i, n = 0, top_idx = 0;
|
||||
|
||||
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
|
||||
dd->send_contexts[sc->sw_index].type != SC_VL15)
|
||||
@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
|
||||
if (n == ARRAY_SIZE(qps))
|
||||
break;
|
||||
wait = list_first_entry(list, struct iowait, list);
|
||||
iowait_get_priority(wait);
|
||||
qp = iowait_to_qp(wait);
|
||||
priv = qp->priv;
|
||||
list_del_init(&priv->s_iowait.list);
|
||||
priv->s_iowait.lock = NULL;
|
||||
iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
|
||||
if (n) {
|
||||
priv = qps[top_idx]->priv;
|
||||
top_idx = iowait_priority_update_top(wait,
|
||||
&priv->s_iowait,
|
||||
n, top_idx);
|
||||
}
|
||||
|
||||
/* refcount held until actual wake up */
|
||||
qps[n++] = qp;
|
||||
}
|
||||
@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
|
||||
}
|
||||
write_sequnlock_irqrestore(&sc->waitlock, flags);
|
||||
|
||||
/* Wake up the most starved one first */
|
||||
/* Wake up the top-priority one first */
|
||||
if (n)
|
||||
hfi1_qp_wakeup(qps[max_idx],
|
||||
hfi1_qp_wakeup(qps[top_idx],
|
||||
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
|
||||
for (i = 0; i < n; i++)
|
||||
if (i != max_idx)
|
||||
if (i != top_idx)
|
||||
hfi1_qp_wakeup(qps[i],
|
||||
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
|
||||
}
|
||||
|
@ -132,6 +132,18 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
|
||||
.qpt_support = BIT(IB_QPT_RC),
|
||||
},
|
||||
|
||||
[IB_WR_OPFN] = {
|
||||
.length = sizeof(struct ib_atomic_wr),
|
||||
.qpt_support = BIT(IB_QPT_RC),
|
||||
.flags = RVT_OPERATION_USE_RESERVE,
|
||||
},
|
||||
|
||||
[IB_WR_TID_RDMA_WRITE] = {
|
||||
.length = sizeof(struct ib_rdma_wr),
|
||||
.qpt_support = BIT(IB_QPT_RC),
|
||||
.flags = RVT_OPERATION_IGN_RNR_CNT,
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
static void flush_list_head(struct list_head *l)
|
||||
@ -285,6 +297,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
|
||||
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
|
||||
qp_set_16b(qp);
|
||||
}
|
||||
|
||||
opfn_qp_init(qp, attr, attr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -311,6 +325,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
|
||||
|
||||
switch (qp->ibqp.qp_type) {
|
||||
case IB_QPT_RC:
|
||||
hfi1_setup_tid_rdma_wqe(qp, wqe);
|
||||
case IB_QPT_UC:
|
||||
if (wqe->length > 0x80000000U)
|
||||
return -EINVAL;
|
||||
@ -422,6 +437,11 @@ static void hfi1_qp_schedule(struct rvt_qp *qp)
|
||||
if (ret)
|
||||
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
|
||||
}
|
||||
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
|
||||
ret = hfi1_schedule_tid_send(qp);
|
||||
if (ret)
|
||||
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
|
||||
}
|
||||
}
|
||||
|
||||
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
|
||||
@ -441,8 +461,27 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
|
||||
|
||||
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
|
||||
{
|
||||
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
/*
|
||||
* If we are sending a first-leg packet from the second leg,
|
||||
* we need to clear the busy flag from priv->s_flags to
|
||||
* avoid a race condition when the qp wakes up before
|
||||
* the call to hfi1_verbs_send() returns to the second
|
||||
* leg. In that case, the second leg will terminate without
|
||||
* being re-scheduled, resulting in failure to send TID RDMA
|
||||
* WRITE DATA and TID RDMA ACK packets.
|
||||
*/
|
||||
if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
|
||||
priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
|
||||
RVT_S_BUSY);
|
||||
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
|
||||
}
|
||||
} else {
|
||||
priv->s_flags &= ~RVT_S_BUSY;
|
||||
}
|
||||
}
|
||||
|
||||
static int iowait_sleep(
|
||||
@ -479,6 +518,7 @@ static int iowait_sleep(
|
||||
|
||||
ibp->rvp.n_dmawait++;
|
||||
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
|
||||
iowait_get_priority(&priv->s_iowait);
|
||||
iowait_queue(pkts_sent, &priv->s_iowait,
|
||||
&sde->dmawait);
|
||||
priv->s_iowait.lock = &sde->waitlock;
|
||||
@ -528,6 +568,17 @@ static void iowait_sdma_drained(struct iowait *wait)
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
}
|
||||
|
||||
static void hfi1_init_priority(struct iowait *w)
|
||||
{
|
||||
struct rvt_qp *qp = iowait_to_qp(w);
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
if (qp->s_flags & RVT_S_ACK_PENDING)
|
||||
w->priority++;
|
||||
if (priv->s_flags & RVT_S_ACK_PENDING)
|
||||
w->priority++;
|
||||
}
|
||||
|
||||
/**
|
||||
* qp_to_sdma_engine - map a qp to a send engine
|
||||
* @qp: the QP
|
||||
@ -685,10 +736,11 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
|
||||
&priv->s_iowait,
|
||||
1,
|
||||
_hfi1_do_send,
|
||||
NULL,
|
||||
_hfi1_do_tid_send,
|
||||
iowait_sleep,
|
||||
iowait_wakeup,
|
||||
iowait_sdma_drained);
|
||||
iowait_sdma_drained,
|
||||
hfi1_init_priority);
|
||||
return priv;
|
||||
}
|
||||
|
||||
@ -696,6 +748,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
hfi1_qp_priv_tid_free(rdi, qp);
|
||||
kfree(priv->s_ahg);
|
||||
kfree(priv);
|
||||
}
|
||||
@ -729,6 +782,7 @@ void flush_qp_waiters(struct rvt_qp *qp)
|
||||
{
|
||||
lockdep_assert_held(&qp->s_lock);
|
||||
flush_iowait(qp);
|
||||
hfi1_tid_rdma_flush_wait(qp);
|
||||
}
|
||||
|
||||
void stop_send_queue(struct rvt_qp *qp)
|
||||
@ -736,12 +790,16 @@ void stop_send_queue(struct rvt_qp *qp)
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
iowait_cancel_work(&priv->s_iowait);
|
||||
if (cancel_work_sync(&priv->tid_rdma.trigger_work))
|
||||
rvt_put_qp(qp);
|
||||
}
|
||||
|
||||
void quiesce_qp(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
hfi1_del_tid_reap_timer(qp);
|
||||
hfi1_del_tid_retry_timer(qp);
|
||||
iowait_sdma_drain(&priv->s_iowait);
|
||||
qp_pio_drain(qp);
|
||||
flush_tx_list(qp);
|
||||
@ -749,8 +807,13 @@ void quiesce_qp(struct rvt_qp *qp)
|
||||
|
||||
void notify_qp_reset(struct rvt_qp *qp)
|
||||
{
|
||||
hfi1_qp_kern_exp_rcv_clear_all(qp);
|
||||
qp->r_adefered = 0;
|
||||
clear_ahg(qp);
|
||||
|
||||
/* Clear any OPFN state */
|
||||
if (qp->ibqp.qp_type == IB_QPT_RC)
|
||||
opfn_conn_error(qp);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -832,7 +895,8 @@ void notify_error_qp(struct rvt_qp *qp)
|
||||
if (lock) {
|
||||
write_seqlock(lock);
|
||||
if (!list_empty(&priv->s_iowait.list) &&
|
||||
!(qp->s_flags & RVT_S_BUSY)) {
|
||||
!(qp->s_flags & RVT_S_BUSY) &&
|
||||
!(priv->s_flags & RVT_S_BUSY)) {
|
||||
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
|
||||
list_del_init(&priv->s_iowait.list);
|
||||
priv->s_iowait.lock = NULL;
|
||||
@ -841,7 +905,8 @@ void notify_error_qp(struct rvt_qp *qp)
|
||||
write_sequnlock(lock);
|
||||
}
|
||||
|
||||
if (!(qp->s_flags & RVT_S_BUSY)) {
|
||||
if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
|
||||
qp->s_hdrwords = 0;
|
||||
if (qp->s_rdma_mr) {
|
||||
rvt_put_mr(qp->s_rdma_mr);
|
||||
qp->s_rdma_mr = NULL;
|
||||
|
@ -63,11 +63,17 @@ extern const struct rvt_operation_params hfi1_post_parms[];
|
||||
* HFI1_S_AHG_VALID - ahg header valid on chip
|
||||
* HFI1_S_AHG_CLEAR - have send engine clear ahg state
|
||||
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
|
||||
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
|
||||
* HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
|
||||
* HFI1_S_WAIT_HALT - halt the first leg send engine
|
||||
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
|
||||
*/
|
||||
#define HFI1_S_AHG_VALID 0x80000000
|
||||
#define HFI1_S_AHG_CLEAR 0x40000000
|
||||
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
|
||||
#define HFI1_S_WAIT_TID_SPACE 0x10000000
|
||||
#define HFI1_S_WAIT_TID_RESP 0x08000000
|
||||
#define HFI1_S_WAIT_HALT 0x04000000
|
||||
#define HFI1_S_MIN_BIT_MASK 0x01000000
|
||||
|
||||
/*
|
||||
@ -76,6 +82,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
|
||||
|
||||
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
|
||||
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
|
||||
#define HFI1_S_ANY_TID_WAIT_SEND (RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA)
|
||||
|
||||
/*
|
||||
* Send if not busy or waiting for I/O and either
|
||||
|
File diff suppressed because it is too large
Load Diff
51
drivers/infiniband/hw/hfi1/rc.h
Normal file
51
drivers/infiniband/hw/hfi1/rc.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef HFI1_RC_H
|
||||
#define HFI1_RC_H
|
||||
|
||||
/* cut down ridiculously long IB macro names */
|
||||
#define OP(x) IB_OPCODE_RC_##x
|
||||
|
||||
static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
|
||||
{
|
||||
unsigned int next;
|
||||
|
||||
next = n + 1;
|
||||
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
|
||||
next = 0;
|
||||
qp->s_tail_ack_queue = next;
|
||||
qp->s_acked_ack_queue = next;
|
||||
qp->s_ack_state = OP(ACKNOWLEDGE);
|
||||
}
|
||||
|
||||
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
|
||||
struct rvt_qp *qp)
|
||||
{
|
||||
if (list_empty(&qp->rspwait)) {
|
||||
qp->r_flags |= RVT_R_RSP_NAK;
|
||||
rvt_get_qp(qp);
|
||||
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
|
||||
u32 psn, u32 pmtu)
|
||||
{
|
||||
u32 len;
|
||||
|
||||
len = delta_psn(psn, wqe->psn) * pmtu;
|
||||
return rvt_restart_sge(ss, wqe, len);
|
||||
}
|
||||
|
||||
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
|
||||
u8 *prev_ack, bool *scheduled);
|
||||
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
|
||||
struct hfi1_ctxtdata *rcd);
|
||||
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
struct hfi1_ibport *ibp);
|
||||
|
||||
#endif /* HFI1_RC_H */
|
@ -250,7 +250,6 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 bth0, u32 bth1, u32 bth2)
|
||||
{
|
||||
bth1 |= qp->remote_qpn;
|
||||
ohdr->bth[0] = cpu_to_be32(bth0);
|
||||
ohdr->bth[1] = cpu_to_be32(bth1);
|
||||
ohdr->bth[2] = cpu_to_be32(bth2);
|
||||
@ -272,13 +271,13 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
|
||||
*/
|
||||
static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 bth0, u32 bth2, int middle,
|
||||
u32 bth0, u32 bth1, u32 bth2,
|
||||
int middle,
|
||||
struct hfi1_pkt_state *ps)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct hfi1_ibport *ibp = ps->ibp;
|
||||
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
|
||||
u32 bth1 = 0;
|
||||
u32 slid;
|
||||
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
|
||||
u8 l4 = OPA_16B_L4_IB_LOCAL;
|
||||
@ -360,12 +359,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
|
||||
*/
|
||||
static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 bth0, u32 bth2, int middle,
|
||||
u32 bth0, u32 bth1, u32 bth2,
|
||||
int middle,
|
||||
struct hfi1_pkt_state *ps)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct hfi1_ibport *ibp = ps->ibp;
|
||||
u32 bth1 = 0;
|
||||
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
|
||||
u16 lrh0 = HFI1_LRH_BTH;
|
||||
u8 extra_bytes = -ps->s_txreq->s_cur_size & 3;
|
||||
@ -415,7 +414,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
|
||||
|
||||
typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 bth0, u32 bth2, int middle,
|
||||
u32 bth0, u32 bth1, u32 bth2, int middle,
|
||||
struct hfi1_pkt_state *ps);
|
||||
|
||||
/* We support only two types - 9B and 16B for now */
|
||||
@ -425,7 +424,7 @@ static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = {
|
||||
};
|
||||
|
||||
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
|
||||
u32 bth0, u32 bth2, int middle,
|
||||
u32 bth0, u32 bth1, u32 bth2, int middle,
|
||||
struct hfi1_pkt_state *ps)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
@ -446,18 +445,21 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
|
||||
priv->s_ahg->ahgidx = 0;
|
||||
|
||||
/* Make the appropriate header */
|
||||
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps);
|
||||
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth1, bth2, middle,
|
||||
ps);
|
||||
}
|
||||
|
||||
/* when sending, force a reschedule every one of these periods */
|
||||
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
|
||||
|
||||
/**
|
||||
* schedule_send_yield - test for a yield required for QP send engine
|
||||
* hfi1_schedule_send_yield - test for a yield required for QP
|
||||
* send engine
|
||||
* @timeout: Final time for timeout slice for jiffies
|
||||
* @qp: a pointer to QP
|
||||
* @ps: a pointer to a structure with commonly lookup values for
|
||||
* the the send engine progress
|
||||
* @tid - true if it is the tid leg
|
||||
*
|
||||
* This routine checks if the time slice for the QP has expired
|
||||
* for RC QPs, if so an additional work entry is queued. At this
|
||||
@ -465,8 +467,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
|
||||
* returns true if a yield is required, otherwise, false
|
||||
* is returned.
|
||||
*/
|
||||
static bool schedule_send_yield(struct rvt_qp *qp,
|
||||
struct hfi1_pkt_state *ps)
|
||||
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
bool tid)
|
||||
{
|
||||
ps->pkts_sent = true;
|
||||
|
||||
@ -474,8 +476,24 @@ static bool schedule_send_yield(struct rvt_qp *qp,
|
||||
if (!ps->in_thread ||
|
||||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
|
||||
spin_lock_irqsave(&qp->s_lock, ps->flags);
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
hfi1_schedule_send(qp);
|
||||
if (!tid) {
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
hfi1_schedule_send(qp);
|
||||
} else {
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
if (priv->s_flags &
|
||||
HFI1_S_TID_BUSY_SET) {
|
||||
qp->s_flags &= ~RVT_S_BUSY;
|
||||
priv->s_flags &=
|
||||
~(HFI1_S_TID_BUSY_SET |
|
||||
RVT_S_BUSY);
|
||||
} else {
|
||||
priv->s_flags &= ~RVT_S_BUSY;
|
||||
}
|
||||
hfi1_schedule_tid_send(qp);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
|
||||
this_cpu_inc(*ps->ppd->dd->send_schedule);
|
||||
trace_hfi1_rc_expired_time_slice(qp, true);
|
||||
@ -576,6 +594,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
do {
|
||||
/* Check for a constructed packet to be sent. */
|
||||
if (ps.s_txreq) {
|
||||
if (priv->s_flags & HFI1_S_TID_BUSY_SET)
|
||||
qp->s_flags |= RVT_S_BUSY;
|
||||
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
|
||||
/*
|
||||
* If the packet cannot be sent now, return and
|
||||
@ -585,7 +605,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
|
||||
return;
|
||||
|
||||
/* allow other tasks to run */
|
||||
if (schedule_send_yield(qp, &ps))
|
||||
if (hfi1_schedule_send_yield(qp, &ps, false))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&qp->s_lock, ps.flags);
|
||||
|
@ -1747,10 +1747,9 @@ retry:
|
||||
*/
|
||||
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
|
||||
{
|
||||
struct iowait *wait, *nw;
|
||||
struct iowait *wait, *nw, *twait;
|
||||
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
|
||||
uint i, n = 0, seq, max_idx = 0;
|
||||
u8 max_starved_cnt = 0;
|
||||
uint i, n = 0, seq, tidx = 0;
|
||||
|
||||
#ifdef CONFIG_SDMA_VERBOSITY
|
||||
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
|
||||
@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
|
||||
continue;
|
||||
if (n == ARRAY_SIZE(waits))
|
||||
break;
|
||||
iowait_init_priority(wait);
|
||||
num_desc = iowait_get_all_desc(wait);
|
||||
if (num_desc > avail)
|
||||
break;
|
||||
avail -= num_desc;
|
||||
/* Find the most starved wait memeber */
|
||||
iowait_starve_find_max(wait, &max_starved_cnt,
|
||||
n, &max_idx);
|
||||
/* Find the top-priority wait memeber */
|
||||
if (n) {
|
||||
twait = waits[tidx];
|
||||
tidx =
|
||||
iowait_priority_update_top(wait,
|
||||
twait,
|
||||
n,
|
||||
tidx);
|
||||
}
|
||||
list_del_init(&wait->list);
|
||||
waits[n++] = wait;
|
||||
}
|
||||
@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
|
||||
}
|
||||
} while (read_seqretry(&sde->waitlock, seq));
|
||||
|
||||
/* Schedule the most starved one first */
|
||||
/* Schedule the top-priority entry first */
|
||||
if (n)
|
||||
waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
|
||||
waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (i != max_idx)
|
||||
if (i != tidx)
|
||||
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,7 @@ struct sdma_desc {
|
||||
#define SDMA_TXREQ_F_URGENT 0x0001
|
||||
#define SDMA_TXREQ_F_AHG_COPY 0x0002
|
||||
#define SDMA_TXREQ_F_USE_AHG 0x0004
|
||||
#define SDMA_TXREQ_F_VIP 0x0010
|
||||
|
||||
struct sdma_txreq;
|
||||
typedef void (*callback_t)(struct sdma_txreq *, int);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,8 +6,317 @@
|
||||
#ifndef HFI1_TID_RDMA_H
|
||||
#define HFI1_TID_RDMA_H
|
||||
|
||||
#include <linux/circ_buf.h>
|
||||
#include "common.h"
|
||||
|
||||
/* Add a convenience helper */
|
||||
#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
|
||||
#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
|
||||
#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
|
||||
|
||||
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
||||
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
||||
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* Bit definitions for priv->s_flags.
|
||||
* These bit flags overload the bit flags defined for the QP's s_flags.
|
||||
* Due to the fact that these bit fields are used only for the QP priv
|
||||
* s_flags, there are no collisions.
|
||||
*
|
||||
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
|
||||
* HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
|
||||
*/
|
||||
#define HFI1_S_TID_BUSY_SET BIT(0)
|
||||
/* BIT(1) reserved for RVT_S_BUSY. */
|
||||
#define HFI1_R_TID_RSC_TIMER BIT(2)
|
||||
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
|
||||
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
|
||||
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
|
||||
#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
|
||||
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
|
||||
/* BIT(16) reserved for RVT_S_SEND_ONE */
|
||||
#define HFI1_S_TID_RETRY_TIMER BIT(17)
|
||||
/* BIT(18) reserved for RVT_S_ECN. */
|
||||
#define HFI1_R_TID_SW_PSN BIT(19)
|
||||
/* BIT(26) reserved for HFI1_S_WAIT_HALT */
|
||||
/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
|
||||
/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
|
||||
|
||||
/*
|
||||
* Unlike regular IB RDMA VERBS, which do not require an entry
|
||||
* in the s_ack_queue, TID RDMA WRITE requests do because they
|
||||
* generate responses.
|
||||
* Therefore, the s_ack_queue needs to be extended by a certain
|
||||
* amount. The key point is that the queue needs to be extended
|
||||
* without letting the "user" know so they user doesn't end up
|
||||
* using these extra entries.
|
||||
*/
|
||||
#define HFI1_TID_RDMA_WRITE_CNT 8
|
||||
|
||||
struct tid_rdma_params {
|
||||
struct rcu_head rcu_head;
|
||||
u32 qp;
|
||||
u32 max_len;
|
||||
u16 jkey;
|
||||
u8 max_read;
|
||||
u8 max_write;
|
||||
u8 timeout;
|
||||
u8 urg;
|
||||
u8 version;
|
||||
};
|
||||
|
||||
struct tid_rdma_qp_params {
|
||||
struct work_struct trigger_work;
|
||||
struct tid_rdma_params local;
|
||||
struct tid_rdma_params __rcu *remote;
|
||||
};
|
||||
|
||||
/* Track state for each hardware flow */
|
||||
struct tid_flow_state {
|
||||
u32 generation;
|
||||
u32 psn;
|
||||
u32 r_next_psn; /* next PSN to be received (in TID space) */
|
||||
u8 index;
|
||||
u8 last_index;
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
enum tid_rdma_req_state {
|
||||
TID_REQUEST_INACTIVE = 0,
|
||||
TID_REQUEST_INIT,
|
||||
TID_REQUEST_INIT_RESEND,
|
||||
TID_REQUEST_ACTIVE,
|
||||
TID_REQUEST_RESEND,
|
||||
TID_REQUEST_RESEND_ACTIVE,
|
||||
TID_REQUEST_QUEUED,
|
||||
TID_REQUEST_SYNC,
|
||||
TID_REQUEST_RNR_NAK,
|
||||
TID_REQUEST_COMPLETE,
|
||||
};
|
||||
|
||||
struct tid_rdma_request {
|
||||
struct rvt_qp *qp;
|
||||
struct hfi1_ctxtdata *rcd;
|
||||
union {
|
||||
struct rvt_swqe *swqe;
|
||||
struct rvt_ack_entry *ack;
|
||||
} e;
|
||||
|
||||
struct tid_rdma_flow *flows; /* array of tid flows */
|
||||
struct rvt_sge_state ss; /* SGE state for TID RDMA requests */
|
||||
u16 n_flows; /* size of the flow buffer window */
|
||||
u16 setup_head; /* flow index we are setting up */
|
||||
u16 clear_tail; /* flow index we are clearing */
|
||||
u16 flow_idx; /* flow index most recently set up */
|
||||
u16 acked_tail;
|
||||
|
||||
u32 seg_len;
|
||||
u32 total_len;
|
||||
u32 r_ack_psn; /* next expected ack PSN */
|
||||
u32 r_flow_psn; /* IB PSN of next segment start */
|
||||
u32 r_last_acked; /* IB PSN of last ACK'ed packet */
|
||||
u32 s_next_psn; /* IB PSN of next segment start for read */
|
||||
|
||||
u32 total_segs; /* segments required to complete a request */
|
||||
u32 cur_seg; /* index of current segment */
|
||||
u32 comp_seg; /* index of last completed segment */
|
||||
u32 ack_seg; /* index of last ack'ed segment */
|
||||
u32 alloc_seg; /* index of next segment to be allocated */
|
||||
u32 isge; /* index of "current" sge */
|
||||
u32 ack_pending; /* num acks pending for this request */
|
||||
|
||||
enum tid_rdma_req_state state;
|
||||
};
|
||||
|
||||
/*
|
||||
* When header suppression is used, PSNs associated with a "flow" are
|
||||
* relevant (and not the PSNs maintained by verbs). Track per-flow
|
||||
* PSNs here for a TID RDMA segment.
|
||||
*
|
||||
*/
|
||||
struct flow_state {
|
||||
u32 flags;
|
||||
u32 resp_ib_psn; /* The IB PSN of the response for this flow */
|
||||
u32 generation; /* generation of flow */
|
||||
u32 spsn; /* starting PSN in TID space */
|
||||
u32 lpsn; /* last PSN in TID space */
|
||||
u32 r_next_psn; /* next PSN to be received (in TID space) */
|
||||
|
||||
/* For tid rdma read */
|
||||
u32 ib_spsn; /* starting PSN in Verbs space */
|
||||
u32 ib_lpsn; /* last PSn in Verbs space */
|
||||
};
|
||||
|
||||
struct tid_rdma_pageset {
|
||||
dma_addr_t addr : 48; /* Only needed for the first page */
|
||||
u8 idx: 8;
|
||||
u8 count : 7;
|
||||
u8 mapped: 1;
|
||||
};
|
||||
|
||||
/**
|
||||
* kern_tid_node - used for managing TID's in TID groups
|
||||
*
|
||||
* @grp_idx: rcd relative index to tid_group
|
||||
* @map: grp->map captured prior to programming this TID group in HW
|
||||
* @cnt: Only @cnt of available group entries are actually programmed
|
||||
*/
|
||||
struct kern_tid_node {
|
||||
struct tid_group *grp;
|
||||
u8 map;
|
||||
u8 cnt;
|
||||
};
|
||||
|
||||
/* Overall info for a TID RDMA segment */
|
||||
struct tid_rdma_flow {
|
||||
/*
|
||||
* While a TID RDMA segment is being transferred, it uses a QP number
|
||||
* from the "KDETH section of QP numbers" (which is different from the
|
||||
* QP number that originated the request). Bits 11-15 of these QP
|
||||
* numbers identify the "TID flow" for the segment.
|
||||
*/
|
||||
struct flow_state flow_state;
|
||||
struct tid_rdma_request *req;
|
||||
u32 tid_qpn;
|
||||
u32 tid_offset;
|
||||
u32 length;
|
||||
u32 sent;
|
||||
u8 tnode_cnt;
|
||||
u8 tidcnt;
|
||||
u8 tid_idx;
|
||||
u8 idx;
|
||||
u8 npagesets;
|
||||
u8 npkts;
|
||||
u8 pkt;
|
||||
u8 resync_npkts;
|
||||
struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
|
||||
struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
|
||||
u32 tid_entry[TID_RDMA_MAX_PAGES];
|
||||
};
|
||||
|
||||
enum tid_rnr_nak_state {
|
||||
TID_RNR_NAK_INIT = 0,
|
||||
TID_RNR_NAK_SEND,
|
||||
TID_RNR_NAK_SENT,
|
||||
};
|
||||
|
||||
bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
|
||||
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
|
||||
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
|
||||
void tid_rdma_conn_error(struct rvt_qp *qp);
|
||||
void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
|
||||
|
||||
int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
|
||||
int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
|
||||
struct rvt_sge_state *ss, bool *last);
|
||||
int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
|
||||
void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
|
||||
void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
|
||||
|
||||
/**
|
||||
* trdma_clean_swqe - clean flows for swqe if large send queue
|
||||
* @qp: the qp
|
||||
* @wqe: the send wqe
|
||||
*/
|
||||
static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
|
||||
{
|
||||
if (!wqe->priv)
|
||||
return;
|
||||
__trdma_clean_swqe(qp, wqe);
|
||||
}
|
||||
|
||||
void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp);
|
||||
|
||||
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
|
||||
struct ib_qp_init_attr *init_attr);
|
||||
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
|
||||
|
||||
void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);
|
||||
|
||||
int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
|
||||
void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
|
||||
void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
|
||||
|
||||
struct cntr_entry;
|
||||
u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
|
||||
void *context, int vl, int mode, u64 data);
|
||||
|
||||
u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 *bth1, u32 *bth2, u32 *len);
|
||||
u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
struct ib_other_headers *ohdr, u32 *bth1,
|
||||
u32 *bth2, u32 *len);
|
||||
void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet);
|
||||
u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||
struct ib_other_headers *ohdr, u32 *bth0,
|
||||
u32 *bth1, u32 *bth2, u32 *len, bool *last);
|
||||
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
|
||||
bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
|
||||
struct hfi1_pportdata *ppd,
|
||||
struct hfi1_packet *packet);
|
||||
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
u32 *bth2);
|
||||
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
|
||||
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
|
||||
|
||||
void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
|
||||
static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
|
||||
struct rvt_swqe *wqe)
|
||||
{
|
||||
if (wqe->priv &&
|
||||
(wqe->wr.opcode == IB_WR_RDMA_READ ||
|
||||
wqe->wr.opcode == IB_WR_RDMA_WRITE) &&
|
||||
wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
|
||||
setup_tid_rdma_wqe(qp, wqe);
|
||||
}
|
||||
|
||||
u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 *bth1, u32 *bth2, u32 *len);
|
||||
|
||||
void hfi1_compute_tid_rdma_flow_wt(void);
|
||||
|
||||
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
|
||||
|
||||
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||
struct ib_other_headers *ohdr, u32 *bth1,
|
||||
u32 bth2, u32 *len,
|
||||
struct rvt_sge_state **ss);
|
||||
|
||||
void hfi1_del_tid_reap_timer(struct rvt_qp *qp);
|
||||
|
||||
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet);
|
||||
|
||||
bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
|
||||
struct ib_other_headers *ohdr,
|
||||
u32 *bth1, u32 *bth2, u32 *len);
|
||||
|
||||
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
|
||||
|
||||
u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||
struct ib_other_headers *ohdr, u16 iflow,
|
||||
u32 *bth1, u32 *bth2);
|
||||
|
||||
void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);
|
||||
|
||||
void hfi1_add_tid_retry_timer(struct rvt_qp *qp);
|
||||
void hfi1_del_tid_retry_timer(struct rvt_qp *qp);
|
||||
|
||||
u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||
struct ib_other_headers *ohdr, u32 *bth1,
|
||||
u32 *bth2, u16 fidx);
|
||||
|
||||
void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet);
|
||||
|
||||
struct hfi1_pkt_state;
|
||||
int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
|
||||
|
||||
void _hfi1_do_tid_send(struct work_struct *work);
|
||||
|
||||
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
|
||||
|
||||
bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
|
||||
|
||||
#endif /* HFI1_TID_RDMA_H */
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
||||
*/
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
#include "exp_rcv.h"
|
||||
|
||||
static u8 __get_ib_hdr_len(struct ib_header *hdr)
|
||||
{
|
||||
@ -128,6 +129,15 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
|
||||
#define IETH_PRN "ieth rkey:0x%.8x"
|
||||
#define ATOMICACKETH_PRN "origdata:%llx"
|
||||
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
|
||||
#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
|
||||
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
|
||||
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
|
||||
#define TID_READ_RSP_PRN "verbs_qp 0x%x"
|
||||
#define TID_WRITE_REQ_PRN "original_qp 0x%x"
|
||||
#define TID_WRITE_RSP_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
|
||||
#define TID_WRITE_DATA_PRN "verbs_qp 0x%x"
|
||||
#define TID_ACK_PRN "tid_flow_psn 0x%x verbs_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
|
||||
#define TID_RESYNC_PRN "verbs_qp 0x%x"
|
||||
|
||||
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
|
||||
|
||||
@ -322,6 +332,99 @@ const char *parse_everbs_hdrs(
|
||||
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
|
||||
be32_to_cpu(eh->aeth) & IB_MSN_MASK);
|
||||
break;
|
||||
case OP(TID_RDMA, WRITE_REQ):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
|
||||
TID_WRITE_REQ_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.w_req.kdeth0),
|
||||
le32_to_cpu(eh->tid_rdma.w_req.kdeth1),
|
||||
ib_u64_get(&eh->tid_rdma.w_req.reth.vaddr),
|
||||
be32_to_cpu(eh->tid_rdma.w_req.reth.rkey),
|
||||
be32_to_cpu(eh->tid_rdma.w_req.reth.length),
|
||||
be32_to_cpu(eh->tid_rdma.w_req.verbs_qp));
|
||||
break;
|
||||
case OP(TID_RDMA, WRITE_RESP):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
|
||||
TID_WRITE_RSP_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth0),
|
||||
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth1),
|
||||
be32_to_cpu(eh->tid_rdma.w_rsp.aeth) >> 24,
|
||||
parse_syndrome(/* aeth */
|
||||
be32_to_cpu(eh->tid_rdma.w_rsp.aeth)
|
||||
>> 24),
|
||||
(be32_to_cpu(eh->tid_rdma.w_rsp.aeth) &
|
||||
IB_MSN_MASK),
|
||||
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_psn),
|
||||
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_qp),
|
||||
be32_to_cpu(eh->tid_rdma.w_rsp.verbs_qp));
|
||||
break;
|
||||
case OP(TID_RDMA, WRITE_DATA_LAST):
|
||||
case OP(TID_RDMA, WRITE_DATA):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " TID_WRITE_DATA_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.w_data.kdeth0),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth0, KVER),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth0, SH),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth0, INTR),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TIDCTRL),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TID),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth0, OFFSET),
|
||||
le32_to_cpu(eh->tid_rdma.w_data.kdeth1),
|
||||
KDETH_GET(eh->tid_rdma.w_data.kdeth1, JKEY),
|
||||
be32_to_cpu(eh->tid_rdma.w_data.verbs_qp));
|
||||
break;
|
||||
case OP(TID_RDMA, READ_REQ):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
|
||||
TID_READ_REQ_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
|
||||
le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
|
||||
ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
|
||||
be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
|
||||
be32_to_cpu(eh->tid_rdma.r_req.reth.length),
|
||||
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
|
||||
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
|
||||
be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
|
||||
break;
|
||||
case OP(TID_RDMA, READ_RESP):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
|
||||
TID_READ_RSP_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
|
||||
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
|
||||
KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
|
||||
be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
|
||||
parse_syndrome(/* aeth */
|
||||
be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
|
||||
>> 24),
|
||||
(be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
|
||||
IB_MSN_MASK),
|
||||
be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
|
||||
break;
|
||||
case OP(TID_RDMA, ACK):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
|
||||
TID_ACK_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.ack.kdeth0),
|
||||
le32_to_cpu(eh->tid_rdma.ack.kdeth1),
|
||||
be32_to_cpu(eh->tid_rdma.ack.aeth) >> 24,
|
||||
parse_syndrome(/* aeth */
|
||||
be32_to_cpu(eh->tid_rdma.ack.aeth)
|
||||
>> 24),
|
||||
(be32_to_cpu(eh->tid_rdma.ack.aeth) &
|
||||
IB_MSN_MASK),
|
||||
be32_to_cpu(eh->tid_rdma.ack.tid_flow_psn),
|
||||
be32_to_cpu(eh->tid_rdma.ack.verbs_psn),
|
||||
be32_to_cpu(eh->tid_rdma.ack.tid_flow_qp),
|
||||
be32_to_cpu(eh->tid_rdma.ack.verbs_qp));
|
||||
break;
|
||||
case OP(TID_RDMA, RESYNC):
|
||||
trace_seq_printf(p, TID_RDMA_KDETH " " TID_RESYNC_PRN,
|
||||
le32_to_cpu(eh->tid_rdma.resync.kdeth0),
|
||||
le32_to_cpu(eh->tid_rdma.resync.kdeth1),
|
||||
be32_to_cpu(eh->tid_rdma.resync.verbs_qp));
|
||||
break;
|
||||
/* aeth + atomicacketh */
|
||||
case OP(RC, ATOMIC_ACKNOWLEDGE):
|
||||
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
|
||||
@ -394,6 +497,21 @@ const char *print_u32_array(
|
||||
return ret;
|
||||
}
|
||||
|
||||
u8 hfi1_trace_get_tid_ctrl(u32 ent)
|
||||
{
|
||||
return EXP_TID_GET(ent, CTRL);
|
||||
}
|
||||
|
||||
u16 hfi1_trace_get_tid_len(u32 ent)
|
||||
{
|
||||
return EXP_TID_GET(ent, LEN);
|
||||
}
|
||||
|
||||
u16 hfi1_trace_get_tid_idx(u32 ent)
|
||||
{
|
||||
return EXP_TID_GET(ent, IDX);
|
||||
}
|
||||
|
||||
__hfi1_trace_fn(AFFINITY);
|
||||
__hfi1_trace_fn(PKT);
|
||||
__hfi1_trace_fn(PROC);
|
||||
|
@ -63,3 +63,4 @@ __print_symbolic(etype, \
|
||||
#include "trace_tx.h"
|
||||
#include "trace_mmu.h"
|
||||
#include "trace_iowait.h"
|
||||
#include "trace_tid.h"
|
||||
|
@ -79,6 +79,14 @@ __print_symbolic(opcode, \
|
||||
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
|
||||
ib_opcode_name(RC_COMPARE_SWAP), \
|
||||
ib_opcode_name(RC_FETCH_ADD), \
|
||||
ib_opcode_name(TID_RDMA_WRITE_REQ), \
|
||||
ib_opcode_name(TID_RDMA_WRITE_RESP), \
|
||||
ib_opcode_name(TID_RDMA_WRITE_DATA), \
|
||||
ib_opcode_name(TID_RDMA_WRITE_DATA_LAST), \
|
||||
ib_opcode_name(TID_RDMA_READ_REQ), \
|
||||
ib_opcode_name(TID_RDMA_READ_RESP), \
|
||||
ib_opcode_name(TID_RDMA_RESYNC), \
|
||||
ib_opcode_name(TID_RDMA_ACK), \
|
||||
ib_opcode_name(UC_SEND_FIRST), \
|
||||
ib_opcode_name(UC_SEND_MIDDLE), \
|
||||
ib_opcode_name(UC_SEND_LAST), \
|
||||
|
@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
|
||||
TP_ARGS(qp, psn)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(/* event */
|
||||
hfi1_rc_template, hfi1_rc_completion,
|
||||
TP_PROTO(struct rvt_qp *qp, u32 psn),
|
||||
TP_ARGS(qp, psn)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(/* rc_ack */
|
||||
hfi1_rc_ack_template,
|
||||
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
|
||||
struct rvt_swqe *wqe),
|
||||
TP_ARGS(qp, aeth, psn, wqe),
|
||||
TP_STRUCT__entry(/* entry */
|
||||
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
|
||||
__field(u32, qpn)
|
||||
__field(u32, aeth)
|
||||
__field(u32, psn)
|
||||
__field(u8, opcode)
|
||||
__field(u32, spsn)
|
||||
__field(u32, lpsn)
|
||||
),
|
||||
TP_fast_assign(/* assign */
|
||||
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
|
||||
__entry->qpn = qp->ibqp.qp_num;
|
||||
__entry->aeth = aeth;
|
||||
__entry->psn = psn;
|
||||
__entry->opcode = wqe->wr.opcode;
|
||||
__entry->spsn = wqe->psn;
|
||||
__entry->lpsn = wqe->lpsn;
|
||||
),
|
||||
TP_printk(/* print */
|
||||
"[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
|
||||
__get_str(dev),
|
||||
__entry->qpn,
|
||||
__entry->aeth,
|
||||
__entry->psn,
|
||||
__entry->opcode,
|
||||
__entry->spsn,
|
||||
__entry->lpsn
|
||||
)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(/* do_rc_ack */
|
||||
hfi1_rc_ack_template, hfi1_rc_ack_do,
|
||||
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
|
||||
struct rvt_swqe *wqe),
|
||||
TP_ARGS(qp, aeth, psn, wqe)
|
||||
);
|
||||
|
||||
#endif /* __HFI1_TRACE_RC_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2015 - 2017 Intel Corporation.
|
||||
* Copyright(c) 2015 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -128,111 +128,6 @@ TRACE_EVENT(hfi1_receive_interrupt,
|
||||
)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(
|
||||
hfi1_exp_tid_reg_unreg,
|
||||
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
|
||||
u32 npages, unsigned long va, unsigned long pa,
|
||||
dma_addr_t dma),
|
||||
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, ctxt)
|
||||
__field(u16, subctxt)
|
||||
__field(u32, rarr)
|
||||
__field(u32, npages)
|
||||
__field(unsigned long, va)
|
||||
__field(unsigned long, pa)
|
||||
__field(dma_addr_t, dma)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ctxt = ctxt;
|
||||
__entry->subctxt = subctxt;
|
||||
__entry->rarr = rarr;
|
||||
__entry->npages = npages;
|
||||
__entry->va = va;
|
||||
__entry->pa = pa;
|
||||
__entry->dma = dma;
|
||||
),
|
||||
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
|
||||
__entry->ctxt,
|
||||
__entry->subctxt,
|
||||
__entry->rarr,
|
||||
__entry->npages,
|
||||
__entry->pa,
|
||||
__entry->va,
|
||||
__entry->dma
|
||||
)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(
|
||||
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
|
||||
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
|
||||
unsigned long va, unsigned long pa, dma_addr_t dma),
|
||||
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
|
||||
|
||||
DEFINE_EVENT(
|
||||
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
|
||||
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
|
||||
unsigned long va, unsigned long pa, dma_addr_t dma),
|
||||
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
|
||||
|
||||
TRACE_EVENT(
|
||||
hfi1_put_tid,
|
||||
TP_PROTO(struct hfi1_devdata *dd,
|
||||
u32 index, u32 type, unsigned long pa, u16 order),
|
||||
TP_ARGS(dd, index, type, pa, order),
|
||||
TP_STRUCT__entry(
|
||||
DD_DEV_ENTRY(dd)
|
||||
__field(unsigned long, pa);
|
||||
__field(u32, index);
|
||||
__field(u32, type);
|
||||
__field(u16, order);
|
||||
),
|
||||
TP_fast_assign(
|
||||
DD_DEV_ASSIGN(dd);
|
||||
__entry->pa = pa;
|
||||
__entry->index = index;
|
||||
__entry->type = type;
|
||||
__entry->order = order;
|
||||
),
|
||||
TP_printk("[%s] type %s pa %lx index %u order %u",
|
||||
__get_str(dev),
|
||||
show_tidtype(__entry->type),
|
||||
__entry->pa,
|
||||
__entry->index,
|
||||
__entry->order
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hfi1_exp_tid_inval,
|
||||
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
|
||||
u32 npages, dma_addr_t dma),
|
||||
TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, ctxt)
|
||||
__field(u16, subctxt)
|
||||
__field(unsigned long, va)
|
||||
__field(u32, rarr)
|
||||
__field(u32, npages)
|
||||
__field(dma_addr_t, dma)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ctxt = ctxt;
|
||||
__entry->subctxt = subctxt;
|
||||
__entry->va = va;
|
||||
__entry->rarr = rarr;
|
||||
__entry->npages = npages;
|
||||
__entry->dma = dma;
|
||||
),
|
||||
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
|
||||
__entry->ctxt,
|
||||
__entry->subctxt,
|
||||
__entry->rarr,
|
||||
__entry->npages,
|
||||
__entry->va,
|
||||
__entry->dma
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hfi1_mmu_invalidate,
|
||||
TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
|
||||
unsigned long start, unsigned long end),
|
||||
|
1610
drivers/infiniband/hw/hfi1/trace_tid.h
Normal file
1610
drivers/infiniband/hw/hfi1/trace_tid.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -114,19 +114,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
|
||||
__field(u32, qpn)
|
||||
__field(u32, flags)
|
||||
__field(u32, s_flags)
|
||||
__field(u32, ps_flags)
|
||||
__field(unsigned long, iow_flags)
|
||||
),
|
||||
TP_fast_assign(
|
||||
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
|
||||
__entry->flags = flags;
|
||||
__entry->qpn = qp->ibqp.qp_num;
|
||||
__entry->s_flags = qp->s_flags;
|
||||
__entry->ps_flags =
|
||||
((struct hfi1_qp_priv *)qp->priv)->s_flags;
|
||||
__entry->iow_flags =
|
||||
((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
|
||||
),
|
||||
TP_printk(
|
||||
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
|
||||
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx",
|
||||
__get_str(dev),
|
||||
__entry->qpn,
|
||||
__entry->flags,
|
||||
__entry->s_flags
|
||||
__entry->s_flags,
|
||||
__entry->ps_flags,
|
||||
__entry->iow_flags
|
||||
)
|
||||
);
|
||||
|
||||
@ -838,6 +846,12 @@ DEFINE_EVENT(
|
||||
TP_ARGS(qp, flag)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(/* event */
|
||||
hfi1_do_send_template, hfi1_rc_do_tid_send,
|
||||
TP_PROTO(struct rvt_qp *qp, bool flag),
|
||||
TP_ARGS(qp, flag)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(
|
||||
hfi1_do_send_template, hfi1_rc_expired_time_slice,
|
||||
TP_PROTO(struct rvt_qp *qp, bool flag),
|
||||
|
@ -271,7 +271,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
|
||||
ps->s_txreq->ss = &qp->s_sge;
|
||||
ps->s_txreq->s_cur_size = len;
|
||||
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
|
||||
mask_psn(qp->s_psn++), middle, ps);
|
||||
qp->remote_qpn, mask_psn(qp->s_psn++),
|
||||
middle, ps);
|
||||
return 1;
|
||||
|
||||
done_free_tx:
|
||||
|
@ -48,7 +48,6 @@
|
||||
*/
|
||||
|
||||
#include "hfi.h"
|
||||
|
||||
#include "exp_rcv.h"
|
||||
|
||||
struct tid_pageset {
|
||||
|
@ -144,8 +144,10 @@ static int defer_packet_queue(
|
||||
*/
|
||||
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
|
||||
write_seqlock(&sde->waitlock);
|
||||
if (list_empty(&pq->busy.list))
|
||||
if (list_empty(&pq->busy.list)) {
|
||||
iowait_get_priority(&pq->busy);
|
||||
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
|
||||
}
|
||||
write_sequnlock(&sde->waitlock);
|
||||
return -EBUSY;
|
||||
eagain:
|
||||
@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
|
||||
pq->mm = fd->mm;
|
||||
|
||||
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
|
||||
activate_packet_queue, NULL);
|
||||
activate_packet_queue, NULL, NULL);
|
||||
pq->reqidx = 0;
|
||||
|
||||
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
|
||||
@ -1126,7 +1128,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
|
||||
0xffffffull),
|
||||
psn = val & mask;
|
||||
if (expct)
|
||||
psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
|
||||
psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
|
||||
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
|
||||
else
|
||||
psn = psn + frags;
|
||||
return psn & mask;
|
||||
|
@ -161,10 +161,12 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
|
||||
*/
|
||||
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
|
||||
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
|
||||
[IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
|
||||
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
|
||||
[IB_WR_SEND] = IB_WC_SEND,
|
||||
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
|
||||
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
|
||||
[IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
|
||||
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
|
||||
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
|
||||
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
|
||||
@ -200,6 +202,14 @@ const u8 hdr_len_by_opcode[256] = {
|
||||
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
|
||||
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
|
||||
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
|
||||
[IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
|
||||
[IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
|
||||
/* UC */
|
||||
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
|
||||
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
|
||||
@ -243,6 +253,17 @@ static const opcode_handler opcode_handler_tbl[256] = {
|
||||
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
|
||||
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
|
||||
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
|
||||
|
||||
/* TID RDMA has separate handlers for different opcodes.*/
|
||||
[IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
|
||||
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
|
||||
[IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
|
||||
[IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
|
||||
[IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
|
||||
[IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
|
||||
|
||||
/* UC */
|
||||
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
|
||||
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
|
||||
@ -308,7 +329,7 @@ static inline opcode_handler qp_ok(struct hfi1_packet *packet)
|
||||
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
|
||||
{
|
||||
#ifdef CONFIG_FAULT_INJECTION
|
||||
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
|
||||
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) {
|
||||
/*
|
||||
* In order to drop non-IB traffic we
|
||||
* set PbcInsertHrc to NONE (0x2).
|
||||
@ -319,8 +340,9 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
|
||||
* packet will not be delivered to the
|
||||
* correct context.
|
||||
*/
|
||||
pbc &= ~PBC_INSERT_HCRC_SMASK;
|
||||
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
|
||||
else
|
||||
} else {
|
||||
/*
|
||||
* In order to drop regular verbs
|
||||
* traffic we set the PbcTestEbp
|
||||
@ -330,10 +352,129 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
|
||||
* triggered and will be dropped.
|
||||
*/
|
||||
pbc |= PBC_TEST_EBP;
|
||||
}
|
||||
#endif
|
||||
return pbc;
|
||||
}
|
||||
|
||||
static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet)
|
||||
{
|
||||
if (packet->qp->ibqp.qp_type != IB_QPT_RC ||
|
||||
!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
|
||||
return NULL;
|
||||
if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA)
|
||||
return opcode_handler_tbl[opcode];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
struct ib_header *hdr = packet->hdr;
|
||||
u32 tlen = packet->tlen;
|
||||
struct hfi1_pportdata *ppd = rcd->ppd;
|
||||
struct hfi1_ibport *ibp = &ppd->ibport_data;
|
||||
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
|
||||
opcode_handler opcode_handler;
|
||||
unsigned long flags;
|
||||
u32 qp_num;
|
||||
int lnh;
|
||||
u8 opcode;
|
||||
|
||||
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
|
||||
if (unlikely(tlen < 15 * sizeof(u32)))
|
||||
goto drop;
|
||||
|
||||
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
|
||||
if (lnh != HFI1_LRH_BTH)
|
||||
goto drop;
|
||||
|
||||
packet->ohdr = &hdr->u.oth;
|
||||
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
|
||||
|
||||
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
|
||||
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
|
||||
|
||||
/* verbs_qp can be picked up from any tid_rdma header struct */
|
||||
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) &
|
||||
RVT_QPN_MASK;
|
||||
|
||||
rcu_read_lock();
|
||||
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
|
||||
if (!packet->qp)
|
||||
goto drop_rcu;
|
||||
spin_lock_irqsave(&packet->qp->r_lock, flags);
|
||||
opcode_handler = tid_qp_ok(opcode, packet);
|
||||
if (likely(opcode_handler))
|
||||
opcode_handler(packet);
|
||||
else
|
||||
goto drop_unlock;
|
||||
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return;
|
||||
drop_unlock:
|
||||
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
|
||||
drop_rcu:
|
||||
rcu_read_unlock();
|
||||
drop:
|
||||
ibp->rvp.n_pkt_drops++;
|
||||
}
|
||||
|
||||
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
struct ib_header *hdr = packet->hdr;
|
||||
u32 tlen = packet->tlen;
|
||||
struct hfi1_pportdata *ppd = rcd->ppd;
|
||||
struct hfi1_ibport *ibp = &ppd->ibport_data;
|
||||
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
|
||||
opcode_handler opcode_handler;
|
||||
unsigned long flags;
|
||||
u32 qp_num;
|
||||
int lnh;
|
||||
u8 opcode;
|
||||
|
||||
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
|
||||
if (unlikely(tlen < 15 * sizeof(u32)))
|
||||
goto drop;
|
||||
|
||||
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
|
||||
if (lnh != HFI1_LRH_BTH)
|
||||
goto drop;
|
||||
|
||||
packet->ohdr = &hdr->u.oth;
|
||||
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
|
||||
|
||||
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
|
||||
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
|
||||
|
||||
/* verbs_qp can be picked up from any tid_rdma header struct */
|
||||
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) &
|
||||
RVT_QPN_MASK;
|
||||
|
||||
rcu_read_lock();
|
||||
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
|
||||
if (!packet->qp)
|
||||
goto drop_rcu;
|
||||
spin_lock_irqsave(&packet->qp->r_lock, flags);
|
||||
opcode_handler = tid_qp_ok(opcode, packet);
|
||||
if (likely(opcode_handler))
|
||||
opcode_handler(packet);
|
||||
else
|
||||
goto drop_unlock;
|
||||
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return;
|
||||
drop_unlock:
|
||||
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
|
||||
drop_rcu:
|
||||
rcu_read_unlock();
|
||||
drop:
|
||||
ibp->rvp.n_pkt_drops++;
|
||||
}
|
||||
|
||||
static int hfi1_do_pkey_check(struct hfi1_packet *packet)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd = packet->rcd;
|
||||
@ -504,11 +645,28 @@ static void verbs_sdma_complete(
|
||||
hfi1_put_txreq(tx);
|
||||
}
|
||||
|
||||
void hfi1_wait_kmem(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
struct ib_qp *ibqp = &qp->ibqp;
|
||||
struct ib_device *ibdev = ibqp->device;
|
||||
struct hfi1_ibdev *dev = to_idev(ibdev);
|
||||
|
||||
if (list_empty(&priv->s_iowait.list)) {
|
||||
if (list_empty(&dev->memwait))
|
||||
mod_timer(&dev->mem_timer, jiffies + 1);
|
||||
qp->s_flags |= RVT_S_WAIT_KMEM;
|
||||
list_add_tail(&priv->s_iowait.list, &dev->memwait);
|
||||
priv->s_iowait.lock = &dev->iowait_lock;
|
||||
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
|
||||
rvt_get_qp(qp);
|
||||
}
|
||||
}
|
||||
|
||||
static int wait_kmem(struct hfi1_ibdev *dev,
|
||||
struct rvt_qp *qp,
|
||||
struct hfi1_pkt_state *ps)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
@ -517,15 +675,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
|
||||
write_seqlock(&dev->iowait_lock);
|
||||
list_add_tail(&ps->s_txreq->txreq.list,
|
||||
&ps->wait->tx_head);
|
||||
if (list_empty(&priv->s_iowait.list)) {
|
||||
if (list_empty(&dev->memwait))
|
||||
mod_timer(&dev->mem_timer, jiffies + 1);
|
||||
qp->s_flags |= RVT_S_WAIT_KMEM;
|
||||
list_add_tail(&priv->s_iowait.list, &dev->memwait);
|
||||
priv->s_iowait.lock = &dev->iowait_lock;
|
||||
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
|
||||
rvt_get_qp(qp);
|
||||
}
|
||||
hfi1_wait_kmem(qp);
|
||||
write_sequnlock(&dev->iowait_lock);
|
||||
hfi1_qp_unbusy(qp, ps->wait);
|
||||
ret = -EBUSY;
|
||||
@ -674,6 +824,15 @@ bail_txadd:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 update_hcrc(u8 opcode, u64 pbc)
|
||||
{
|
||||
if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) {
|
||||
pbc &= ~PBC_INSERT_HCRC_SMASK;
|
||||
pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT;
|
||||
}
|
||||
return pbc;
|
||||
}
|
||||
|
||||
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
u64 pbc)
|
||||
{
|
||||
@ -719,6 +878,9 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
qp->srate_mbps,
|
||||
vl,
|
||||
plen);
|
||||
|
||||
/* Update HCRC based on packet opcode */
|
||||
pbc = update_hcrc(ps->opcode, pbc);
|
||||
}
|
||||
tx->wqe = qp->s_wqe;
|
||||
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
|
||||
@ -783,6 +945,7 @@ static int pio_wait(struct rvt_qp *qp,
|
||||
dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
|
||||
qp->s_flags |= flag;
|
||||
was_empty = list_empty(&sc->piowait);
|
||||
iowait_get_priority(&priv->s_iowait);
|
||||
iowait_queue(ps->pkts_sent, &priv->s_iowait,
|
||||
&sc->piowait);
|
||||
priv->s_iowait.lock = &sc->waitlock;
|
||||
@ -867,6 +1030,9 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
|
||||
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
|
||||
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
|
||||
|
||||
/* Update HCRC based on packet opcode */
|
||||
pbc = update_hcrc(ps->opcode, pbc);
|
||||
}
|
||||
if (cb)
|
||||
iowait_pio_inc(&priv->s_iowait);
|
||||
@ -1180,7 +1346,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
|
||||
rdi->dparms.props.max_mr_size = U64_MAX;
|
||||
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
|
||||
rdi->dparms.props.max_qp = hfi1_max_qps;
|
||||
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
|
||||
rdi->dparms.props.max_qp_wr =
|
||||
(hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
|
||||
HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
|
||||
rdi->dparms.props.max_send_sge = hfi1_max_sges;
|
||||
rdi->dparms.props.max_recv_sge = hfi1_max_sges;
|
||||
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
|
||||
@ -1735,6 +1903,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
|
||||
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
|
||||
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
|
||||
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
|
||||
dd->verbs_dev.rdi.dparms.reserved_operations = 1;
|
||||
dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT;
|
||||
|
||||
/* post send table */
|
||||
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
|
||||
|
@ -72,6 +72,7 @@ struct hfi1_packet;
|
||||
|
||||
#include "iowait.h"
|
||||
#include "tid_rdma.h"
|
||||
#include "opfn.h"
|
||||
|
||||
#define HFI1_MAX_RDMA_ATOMIC 16
|
||||
|
||||
@ -158,10 +159,68 @@ struct hfi1_qp_priv {
|
||||
struct sdma_engine *s_sde; /* current sde */
|
||||
struct send_context *s_sendcontext; /* current sendcontext */
|
||||
struct hfi1_ctxtdata *rcd; /* QP's receive context */
|
||||
struct page **pages; /* for TID page scan */
|
||||
u32 tid_enqueue; /* saved when tid waited */
|
||||
u8 s_sc; /* SC[0..4] for next packet */
|
||||
struct iowait s_iowait;
|
||||
struct timer_list s_tid_timer; /* for timing tid wait */
|
||||
struct timer_list s_tid_retry_timer; /* for timing tid ack */
|
||||
struct list_head tid_wait; /* for queueing tid space */
|
||||
struct hfi1_opfn_data opfn;
|
||||
struct tid_flow_state flow_state;
|
||||
struct tid_rdma_qp_params tid_rdma;
|
||||
struct rvt_qp *owner;
|
||||
u8 hdr_type; /* 9B or 16B */
|
||||
struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */
|
||||
atomic_t n_requests; /* # of TID RDMA requests in the */
|
||||
/* queue */
|
||||
atomic_t n_tid_requests; /* # of sent TID RDMA requests */
|
||||
unsigned long tid_timer_timeout_jiffies;
|
||||
unsigned long tid_retry_timeout_jiffies;
|
||||
|
||||
/* variables for the TID RDMA SE state machine */
|
||||
u8 s_state;
|
||||
u8 s_retry;
|
||||
u8 rnr_nak_state; /* RNR NAK state */
|
||||
u8 s_nak_state;
|
||||
u32 s_nak_psn;
|
||||
u32 s_flags;
|
||||
u32 s_tid_cur;
|
||||
u32 s_tid_head;
|
||||
u32 s_tid_tail;
|
||||
u32 r_tid_head; /* Most recently added TID RDMA request */
|
||||
u32 r_tid_tail; /* the last completed TID RDMA request */
|
||||
u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
|
||||
u32 r_tid_alloc; /* Request for which we are allocating resources */
|
||||
u32 pending_tid_w_segs; /* Num of pending tid write segments */
|
||||
u32 pending_tid_w_resp; /* Num of pending tid write responses */
|
||||
u32 alloc_w_segs; /* Number of segments for which write */
|
||||
/* resources have been allocated for this QP */
|
||||
|
||||
/* For TID RDMA READ */
|
||||
u32 tid_r_reqs; /* Num of tid reads requested */
|
||||
u32 tid_r_comp; /* Num of tid reads completed */
|
||||
u32 pending_tid_r_segs; /* Num of pending tid read segments */
|
||||
u16 pkts_ps; /* packets per segment */
|
||||
u8 timeout_shift; /* account for number of packets per segment */
|
||||
|
||||
u32 r_next_psn_kdeth;
|
||||
u32 r_next_psn_kdeth_save;
|
||||
u32 s_resync_psn;
|
||||
u8 sync_pt; /* Set when QP reaches sync point */
|
||||
u8 resync;
|
||||
};
|
||||
|
||||
#define HFI1_QP_WQE_INVALID ((u32)-1)
|
||||
|
||||
struct hfi1_swqe_priv {
|
||||
struct tid_rdma_request tid_req;
|
||||
struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
|
||||
};
|
||||
|
||||
struct hfi1_ack_priv {
|
||||
struct rvt_sge_state ss; /* used for TID WRITE RESP */
|
||||
struct tid_rdma_request tid_req;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -225,6 +284,7 @@ struct hfi1_ibdev {
|
||||
struct kmem_cache *verbs_txreq_cache;
|
||||
u64 n_txwait;
|
||||
u64 n_kmem_wait;
|
||||
u64 n_tidwait;
|
||||
|
||||
/* protect iowait lists */
|
||||
seqlock_t iowait_lock ____cacheline_aligned_in_smp;
|
||||
@ -312,6 +372,31 @@ static inline u32 delta_psn(u32 a, u32 b)
|
||||
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
|
||||
}
|
||||
|
||||
static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
|
||||
{
|
||||
return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
|
||||
}
|
||||
|
||||
static inline struct tid_rdma_request *ack_to_tid_req(struct rvt_ack_entry *e)
|
||||
{
|
||||
return &((struct hfi1_ack_priv *)e->priv)->tid_req;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look through all the active flows for a TID RDMA request and find
|
||||
* the one (if it exists) that contains the specified PSN.
|
||||
*/
|
||||
static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
|
||||
{
|
||||
return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
|
||||
(psn & HFI1_KDETH_BTH_SEQ_MASK));
|
||||
}
|
||||
|
||||
static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
|
||||
{
|
||||
return __full_flow_psn(&flow->flow_state, psn);
|
||||
}
|
||||
|
||||
struct verbs_txreq;
|
||||
void hfi1_put_txreq(struct verbs_txreq *tx);
|
||||
|
||||
@ -356,9 +441,12 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
|
||||
const struct ib_global_route *grh, u32 hwords, u32 nwords);
|
||||
|
||||
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
|
||||
u32 bth0, u32 bth2, int middle,
|
||||
u32 bth0, u32 bth1, u32 bth2, int middle,
|
||||
struct hfi1_pkt_state *ps);
|
||||
|
||||
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
|
||||
bool tid);
|
||||
|
||||
void _hfi1_do_send(struct work_struct *work);
|
||||
|
||||
void hfi1_do_send_from_rvt(struct rvt_qp *qp);
|
||||
@ -377,6 +465,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *);
|
||||
|
||||
void hfi1_unregister_ib_device(struct hfi1_devdata *);
|
||||
|
||||
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet);
|
||||
|
||||
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet);
|
||||
|
||||
void hfi1_ib_rcv(struct hfi1_packet *packet);
|
||||
|
||||
void hfi1_16B_rcv(struct hfi1_packet *packet);
|
||||
@ -394,6 +486,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
|
||||
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
|
||||
}
|
||||
|
||||
void hfi1_wait_kmem(struct rvt_qp *qp);
|
||||
|
||||
static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
|
||||
struct rvt_swqe *wqe,
|
||||
enum ib_wc_status status)
|
||||
{
|
||||
trdma_clean_swqe(qp, wqe);
|
||||
rvt_send_complete(qp, wqe, status);
|
||||
}
|
||||
|
||||
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
|
||||
|
||||
extern const u8 hdr_len_by_opcode[];
|
||||
|
@ -94,6 +94,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
|
||||
tx->txreq.num_desc = 0;
|
||||
/* Set the header type */
|
||||
tx->phdr.hdr.hdr_type = priv->hdr_type;
|
||||
tx->txreq.flags = 0;
|
||||
return tx;
|
||||
}
|
||||
|
||||
|
@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
|
||||
}
|
||||
|
||||
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
|
||||
if (list_empty(&vnic_sdma->wait.list))
|
||||
if (list_empty(&vnic_sdma->wait.list)) {
|
||||
iowait_get_priority(wait->iow);
|
||||
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
|
||||
}
|
||||
write_sequnlock(&sde->waitlock);
|
||||
return -EBUSY;
|
||||
}
|
||||
@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
|
||||
|
||||
iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
|
||||
hfi1_vnic_sdma_sleep,
|
||||
hfi1_vnic_sdma_wakeup, NULL);
|
||||
hfi1_vnic_sdma_wakeup, NULL, NULL);
|
||||
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
|
||||
vnic_sdma->dd = vinfo->dd;
|
||||
vnic_sdma->vinfo = vinfo;
|
||||
|
@ -45,12 +45,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
|
||||
u32 len;
|
||||
|
||||
len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
|
||||
ss->sge = wqe->sg_list[0];
|
||||
ss->sg_list = wqe->sg_list + 1;
|
||||
ss->num_sge = wqe->wr.num_sge;
|
||||
ss->total_len = wqe->length;
|
||||
rvt_skip_sge(ss, len, false);
|
||||
return wqe->length - len;
|
||||
return rvt_restart_sge(ss, wqe, len);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -854,6 +854,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
|
||||
qp->s_mig_state = IB_MIG_MIGRATED;
|
||||
qp->r_head_ack_queue = 0;
|
||||
qp->s_tail_ack_queue = 0;
|
||||
qp->s_acked_ack_queue = 0;
|
||||
qp->s_num_rd_atomic = 0;
|
||||
if (qp->r_rq.wq) {
|
||||
qp->r_rq.wq->head = 0;
|
||||
@ -1642,11 +1643,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
|
||||
kref_put(&qp->ip->ref, rvt_release_mmap_info);
|
||||
else
|
||||
vfree(qp->r_rq.wq);
|
||||
vfree(qp->s_wq);
|
||||
rdi->driver_f.qp_priv_free(rdi, qp);
|
||||
kfree(qp->s_ack_queue);
|
||||
rdma_destroy_ah_attr(&qp->remote_ah_attr);
|
||||
rdma_destroy_ah_attr(&qp->alt_ah_attr);
|
||||
vfree(qp->s_wq);
|
||||
kfree(qp);
|
||||
return 0;
|
||||
}
|
||||
@ -2393,11 +2394,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth)
|
||||
}
|
||||
|
||||
/*
|
||||
* rvt_add_retry_timer - add/start a retry timer
|
||||
* rvt_add_retry_timer_ext - add/start a retry timer
|
||||
* @qp - the QP
|
||||
* @shift - timeout shift to wait for multiple packets
|
||||
* add a retry timer on the QP
|
||||
*/
|
||||
void rvt_add_retry_timer(struct rvt_qp *qp)
|
||||
void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
|
||||
{
|
||||
struct ib_qp *ibqp = &qp->ibqp;
|
||||
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
|
||||
@ -2405,11 +2407,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp)
|
||||
lockdep_assert_held(&qp->s_lock);
|
||||
qp->s_flags |= RVT_S_TIMER;
|
||||
/* 4.096 usec. * (1 << qp->timeout) */
|
||||
qp->s_timer.expires = jiffies + qp->timeout_jiffies +
|
||||
rdi->busy_jiffies;
|
||||
qp->s_timer.expires = jiffies + rdi->busy_jiffies +
|
||||
(qp->timeout_jiffies << shift);
|
||||
add_timer(&qp->s_timer);
|
||||
}
|
||||
EXPORT_SYMBOL(rvt_add_retry_timer);
|
||||
EXPORT_SYMBOL(rvt_add_retry_timer_ext);
|
||||
|
||||
/**
|
||||
* rvt_add_rnr_timer - add/start an rnr timer
|
||||
|
@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(rvt_get_credit);
|
||||
|
||||
/* rvt_restart_sge - rewind the sge state for a wqe */
|
||||
u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len)
|
||||
{
|
||||
ss->sge = wqe->sg_list[0];
|
||||
ss->sg_list = wqe->sg_list + 1;
|
||||
ss->num_sge = wqe->wr.num_sge;
|
||||
ss->total_len = wqe->length;
|
||||
rvt_skip_sge(ss, len, false);
|
||||
return wqe->length - len;
|
||||
}
|
||||
EXPORT_SYMBOL(rvt_restart_sge);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
* Copyright(c) 2016 - 2018 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
@ -100,6 +100,8 @@ struct ib_atomic_eth {
|
||||
__be64 compare_data; /* potentially unaligned */
|
||||
} __packed;
|
||||
|
||||
#include <rdma/tid_rdma_defs.h>
|
||||
|
||||
union ib_ehdrs {
|
||||
struct {
|
||||
__be32 deth[2];
|
||||
@ -117,6 +119,16 @@ union ib_ehdrs {
|
||||
__be32 aeth;
|
||||
__be32 ieth;
|
||||
struct ib_atomic_eth atomic_eth;
|
||||
/* TID RDMA headers */
|
||||
union {
|
||||
struct tid_rdma_read_req r_req;
|
||||
struct tid_rdma_read_resp r_rsp;
|
||||
struct tid_rdma_write_req w_req;
|
||||
struct tid_rdma_write_resp w_rsp;
|
||||
struct tid_rdma_write_data w_data;
|
||||
struct tid_rdma_resync resync;
|
||||
struct tid_rdma_ack ack;
|
||||
} tid_rdma;
|
||||
} __packed;
|
||||
|
||||
struct ib_other_headers {
|
||||
|
@ -182,6 +182,7 @@ struct rvt_driver_params {
|
||||
u32 max_mad_size;
|
||||
u8 qos_shift;
|
||||
u8 max_rdma_atomic;
|
||||
u8 extra_rdma_atomic;
|
||||
u8 reserved_operations;
|
||||
};
|
||||
|
||||
@ -519,7 +520,14 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
|
||||
*/
|
||||
static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
|
||||
{
|
||||
return rdi->dparms.max_rdma_atomic + 1;
|
||||
return rdi->dparms.max_rdma_atomic +
|
||||
rdi->dparms.extra_rdma_atomic + 1;
|
||||
}
|
||||
|
||||
static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
|
||||
{
|
||||
return rdi->dparms.max_rdma_atomic +
|
||||
rdi->dparms.extra_rdma_atomic;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -566,9 +574,10 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
|
||||
/**
|
||||
* rvt_mod_retry_timer - mod a retry timer
|
||||
* @qp - the QP
|
||||
* @shift - timeout shift to wait for multiple packets
|
||||
* Modify a potentially already running retry timer
|
||||
*/
|
||||
static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
|
||||
static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
|
||||
{
|
||||
struct ib_qp *ibqp = &qp->ibqp;
|
||||
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
|
||||
@ -576,8 +585,13 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
|
||||
lockdep_assert_held(&qp->s_lock);
|
||||
qp->s_flags |= RVT_S_TIMER;
|
||||
/* 4.096 usec. * (1 << qp->timeout) */
|
||||
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
|
||||
rdi->busy_jiffies);
|
||||
mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
|
||||
(qp->timeout_jiffies << shift));
|
||||
}
|
||||
|
||||
static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
|
||||
{
|
||||
return rvt_mod_retry_timer_ext(qp, 0);
|
||||
}
|
||||
|
||||
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
|
||||
|
@ -174,6 +174,7 @@ struct rvt_swqe {
|
||||
u32 lpsn; /* last packet sequence number */
|
||||
u32 ssn; /* send sequence number */
|
||||
u32 length; /* total length of data in sg_list */
|
||||
void *priv; /* driver dependent field */
|
||||
struct rvt_sge sg_list[0];
|
||||
};
|
||||
|
||||
@ -235,6 +236,7 @@ struct rvt_ack_entry {
|
||||
u32 lpsn;
|
||||
u8 opcode;
|
||||
u8 sent;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
#define RC_QP_SCALING_INTERVAL 5
|
||||
@ -244,6 +246,7 @@ struct rvt_ack_entry {
|
||||
#define RVT_OPERATION_ATOMIC_SGE 0x00000004
|
||||
#define RVT_OPERATION_LOCAL 0x00000008
|
||||
#define RVT_OPERATION_USE_RESERVE 0x00000010
|
||||
#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
|
||||
|
||||
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
|
||||
|
||||
@ -373,6 +376,7 @@ struct rvt_qp {
|
||||
u8 s_rnr_retry; /* requester RNR retry counter */
|
||||
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
|
||||
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
|
||||
u8 s_acked_ack_queue; /* index into s_ack_queue[] */
|
||||
|
||||
struct rvt_sge_state s_ack_rdma_sge;
|
||||
struct timer_list s_timer;
|
||||
@ -628,6 +632,16 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp);
|
||||
*/
|
||||
void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
|
||||
|
||||
/**
|
||||
* rvt_restart_sge - rewind the sge state for a wqe
|
||||
* @ss: the sge state pointer
|
||||
* @wqe: the wqe to rewind
|
||||
* @len: the data length from the start of the wqe in bytes
|
||||
*
|
||||
* Returns the remaining data length.
|
||||
*/
|
||||
u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
|
||||
|
||||
/**
|
||||
* @qp - the qp pair
|
||||
* @len - the length
|
||||
@ -676,7 +690,11 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
|
||||
void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
|
||||
void rvt_del_timers_sync(struct rvt_qp *qp);
|
||||
void rvt_stop_rc_timers(struct rvt_qp *qp);
|
||||
void rvt_add_retry_timer(struct rvt_qp *qp);
|
||||
void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
|
||||
static inline void rvt_add_retry_timer(struct rvt_qp *qp)
|
||||
{
|
||||
rvt_add_retry_timer_ext(qp, 0);
|
||||
}
|
||||
|
||||
void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
|
||||
void *data, u32 length,
|
||||
|
108
include/rdma/tid_rdma_defs.h
Normal file
108
include/rdma/tid_rdma_defs.h
Normal file
@ -0,0 +1,108 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
|
||||
/*
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef TID_RDMA_DEFS_H
|
||||
#define TID_RDMA_DEFS_H
|
||||
|
||||
#include <rdma/ib_pack.h>
|
||||
|
||||
struct tid_rdma_read_req {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
struct ib_reth reth;
|
||||
__be32 tid_flow_psn;
|
||||
__be32 tid_flow_qp;
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
struct tid_rdma_read_resp {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
__be32 aeth;
|
||||
__be32 reserved[4];
|
||||
__be32 verbs_psn;
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
struct tid_rdma_write_req {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
struct ib_reth reth;
|
||||
__be32 reserved[2];
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
struct tid_rdma_write_resp {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
__be32 aeth;
|
||||
__be32 reserved[3];
|
||||
__be32 tid_flow_psn;
|
||||
__be32 tid_flow_qp;
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
struct tid_rdma_write_data {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
__be32 reserved[6];
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
struct tid_rdma_resync {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
__be32 reserved[6];
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
struct tid_rdma_ack {
|
||||
__le32 kdeth0;
|
||||
__le32 kdeth1;
|
||||
__be32 aeth;
|
||||
__be32 reserved[2];
|
||||
__be32 tid_flow_psn;
|
||||
__be32 verbs_psn;
|
||||
__be32 tid_flow_qp;
|
||||
__be32 verbs_qp;
|
||||
};
|
||||
|
||||
/*
|
||||
* TID RDMA Opcodes
|
||||
*/
|
||||
#define IB_OPCODE_TID_RDMA 0xe0
|
||||
enum {
|
||||
IB_OPCODE_WRITE_REQ = 0x0,
|
||||
IB_OPCODE_WRITE_RESP = 0x1,
|
||||
IB_OPCODE_WRITE_DATA = 0x2,
|
||||
IB_OPCODE_WRITE_DATA_LAST = 0x3,
|
||||
IB_OPCODE_READ_REQ = 0x4,
|
||||
IB_OPCODE_READ_RESP = 0x5,
|
||||
IB_OPCODE_RESYNC = 0x6,
|
||||
IB_OPCODE_ACK = 0x7,
|
||||
|
||||
IB_OPCODE(TID_RDMA, WRITE_REQ),
|
||||
IB_OPCODE(TID_RDMA, WRITE_RESP),
|
||||
IB_OPCODE(TID_RDMA, WRITE_DATA),
|
||||
IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
|
||||
IB_OPCODE(TID_RDMA, READ_REQ),
|
||||
IB_OPCODE(TID_RDMA, READ_RESP),
|
||||
IB_OPCODE(TID_RDMA, RESYNC),
|
||||
IB_OPCODE(TID_RDMA, ACK),
|
||||
};
|
||||
|
||||
#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
|
||||
|
||||
/*
|
||||
* Define TID RDMA specific WR opcodes. The ib_wr_opcode
|
||||
* enum already provides some reserved values for use by
|
||||
* low level drivers. Two of those are used but renamed
|
||||
* to be more descriptive.
|
||||
*/
|
||||
#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
|
||||
#define IB_WR_TID_RDMA_READ IB_WR_RESERVED2
|
||||
|
||||
#endif /* TID_RDMA_DEFS_H */
|
Loading…
x
Reference in New Issue
Block a user