From 1bae4dbf9576e563da23927e4078fffbbce67a75 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Mon, 14 May 2007 17:21:52 -0400 Subject: [PATCH 01/76] IB/mad: Enhance SMI for switch support Extend the SMI with switch (intermediate hop) support. Care has been taken to ensure that the CA (and router) code paths are changed as little as possible. Signed-off-by: Suresh Shelvapille Signed-off-by: Hal Rosenstock Signed-off-by: Roland Dreier --- drivers/infiniband/core/agent.c | 19 +++++++++++-- drivers/infiniband/core/mad.c | 50 +++++++++++++++++++++++++++------ drivers/infiniband/core/smi.c | 16 +++++++++-- drivers/infiniband/core/smi.h | 2 ++ 4 files changed, 72 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index ecd1a3057c61..db2633e4aae6 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -3,7 +3,7 @@ * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -34,7 +34,6 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ #include @@ -42,6 +41,7 @@ #include "agent.h" #include "smi.h" +#include "mad_priv.h" #define SPFX "ib_agent: " @@ -87,8 +87,13 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, struct ib_mad_send_buf *send_buf; struct ib_ah *ah; int ret; + struct ib_mad_send_wr_private *mad_send_wr; + + if (device->node_type == RDMA_NODE_IB_SWITCH) + port_priv = ib_get_agent_port(device, 0); + else + port_priv = ib_get_agent_port(device, port_num); - port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { printk(KERN_ERR SPFX "Unable to find port agent\n"); return -ENODEV; @@ -113,6 +118,14 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, memcpy(send_buf->mad, mad, sizeof *mad); send_buf->ah = ah; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_send_wr->send_wr.wr.ud.port_num = port_num; + } + if ((ret = ib_post_send_mad(send_buf, NULL))) { printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); goto err2; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 85ccf13b8041..6b8faca02f8a 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -675,10 +675,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num = mad_agent_priv->agent.port_num; + u8 port_num; struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + if (device->node_type == RDMA_NODE_IB_SWITCH && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + port_num = send_wr->wr.ud.port_num; + else + port_num = mad_agent_priv->agent.port_num; + /* * Directed route handling starts if the initial LID routed part of * a request or the ending LID routed part of a response is empty. @@ -1839,6 +1845,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_private *recv, *response; struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; + int port_num; response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!response) @@ -1872,25 +1879,50 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) goto out; + if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + port_num = wc->port_num; + else + port_num = port_priv->port_num; + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + enum smi_forward_action retsmi; + if (smi_handle_dr_smp_recv(&recv->mad.smp, port_priv->device->node_type, - port_priv->port_num, + port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) goto out; - if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL) + retsmi = smi_check_forward_dr_smp(&recv->mad.smp); + if (retsmi == IB_SMI_LOCAL) goto local; - if (smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_priv->port_num) == IB_SMI_DISCARD) - goto out; + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(&recv->mad.smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + goto out; + + if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) + goto out; + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, sizeof(*response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + if (!agent_send_response(&response->mad.mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(&recv->mad.smp), + qp_info->qp->qp_num)) + response = NULL; - if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) goto out; + } } local: @@ -1919,7 +1951,7 @@ local: agent_send_response(&response->mad.mad, &recv->grh, wc, port_priv->device, - port_priv->port_num, + port_num, qp_info->qp->qp_num); goto out; } diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 2bca753eb622..87236753bce9 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -192,7 +192,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, } /* smp->hop_ptr updated when sending */ return (node_type == RDMA_NODE_IB_SWITCH ? - IB_SMI_HANDLE: IB_SMI_DISCARD); + IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ @@ -211,7 +211,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) if (!ib_get_smp_direction(smp)) { /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) - return IB_SMI_SEND; + return IB_SMI_FORWARD; /* C14-9:3 -- at the end of the DR segment of path */ if (hop_ptr == hop_cnt) @@ -224,7 +224,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) } else { /* C14-13:2 -- intermediate hop */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) - return IB_SMI_SEND; + return IB_SMI_FORWARD; /* C14-13:3 -- at the end of the DR segment of path */ if (hop_ptr == 1) @@ -233,3 +233,13 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) } return IB_SMI_LOCAL; } + +/* + * Return the forwarding port number from initial_path for outgoing SMP and + * from return_path for returning SMP + */ +int smi_get_fwd_port(struct ib_smp *smp) +{ + return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] : + smp->return_path[smp->hop_ptr-1]); +} diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 9a4b349efc30..1cfc2984434f 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -48,10 +48,12 @@ enum smi_action { enum smi_forward_action { IB_SMI_LOCAL, /* SMP should be completed up the stack */ IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */ + IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, int port_num, int phys_port_cnt); +int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, u8 node_type, int port_num); From fffba373ef8cc9a5881353395c4edd1ab27cf154 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Thu, 24 May 2007 16:51:05 +0200 Subject: [PATCH 02/76] IB/ehca: Refactor "maybe missed event" code Refactor the ehca changes from commit ed23a727 ("IB: Return "maybe missed event" hint from ib_req_notify_cq()") so the queue arithmetic is done in slightly fewer lines. Also, move the spinlock flags into the block they're used in. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 2 +- drivers/infiniband/hw/ehca/ipz_pt_fn.h | 28 +++++++++----------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index caec9dee09e1..56c4527c884f 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -637,7 +637,6 @@ poll_cq_exit0: int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - unsigned long spl_flags; int ret = 0; switch (notify_flags & IB_CQ_SOLICITED_MASK) { @@ -652,6 +651,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) } if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; spin_lock_irqsave(&my_cq->spinlock, spl_flags); ret = ipz_qeit_is_valid(&my_cq->ipz_queue); spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 57f141a36bce..007f0882fd40 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -105,7 +105,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue); * step in struct ipz_queue, will wrap in ringbuffer * returns address (kv) of Queue Entry BEFORE increment * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) { @@ -120,32 +119,25 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) return ret; } +/* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + /* * return current Queue Entry, increment Queue Entry iterator by one * step in struct ipz_queue, will wrap in ringbuffer * returns address (kv) of Queue Entry BEFORE increment * returns 0 and does not increment, if wrong valid state * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) { - struct ehca_cqe *cqe = ipz_qeit_get(queue); - u32 cqe_flags = cqe->cqe_flags; - - if ((cqe_flags >> 7) != (queue->toggle_state & 1)) - return NULL; - - ipz_qeit_get_inc(queue); - return cqe; -} - -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - u32 cqe_flags = cqe->cqe_flags; - - return cqe_flags >> 7 == (queue->toggle_state & 1); + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; } /* From 43506d954e43933cd6fdcab679f6ab057e7607c6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 9 Jul 2007 16:17:32 -0700 Subject: [PATCH 03/76] IB: Remove garbage non-ASCII characters from comments A few files had 0xa0 characters in comments. Remove them so that the files are clean ASCII text. Signed-off-by: Roland Dreier --- drivers/infiniband/core/multicast.c | 2 +- drivers/infiniband/core/sa.h | 2 +- drivers/infiniband/core/sa_query.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1e13ab42b70b..15b4c4d3606d 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Intel Corporation.  All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h index 24c93fd320fb..b1d4bbf4ce5c 100644 --- a/drivers/infiniband/core/sa.h +++ b/drivers/infiniband/core/sa.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 6469406ea9d8..9d3797fcc37e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two From 1d3f4b905a786d69103d9e6d8e92683fb2c7a027 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 8 Jun 2007 16:29:43 -0700 Subject: [PATCH 04/76] IB: Fix ib_umem_get() when npages == 0 gcc correctly warned: drivers/infiniband/core/umem.c: In function 'ib_umem_get': drivers/infiniband/core/umem.c:78: warning: 'ret' may be used uninitialized in this function Set ret to 0 in case npages == 0 and the loop isn't entered at all. Signed-off-by: Andrew Morton Signed-off-by: Roland Dreier --- drivers/infiniband/core/umem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index d40652a80151..26d0470eef6e 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -121,6 +121,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; + ret = 0; while (npages) { ret = get_user_pages(current, current->mm, cur_base, min_t(int, npages, From 63019d9329628a5056b3a0c76aee955873dc25c3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 11 Jun 2007 08:56:10 +0300 Subject: [PATCH 05/76] IB/mlx4: Include linux/mutex.h from mlx4_ib.h mlx4_ib.h uses struct mutex, so although seems to be pulled in indirectly by one of the headers it includes, the right thing is to include directly. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 24ccadd6e4f8..40b83914b7b2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -35,6 +35,7 @@ #include #include +#include #include #include From 525f5f44c487201c91a11904516ee56dea93644e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 9 Jul 2007 20:12:20 -0700 Subject: [PATCH 06/76] mlx4_core: Include linux/mutex.h from mlx4.h mlx4.h uses struct mutex, so although seems to be pulled in indirectly by one of the headers it includes, the right thing to do is to include directly. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/net/mlx4/mlx4.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 3d3b6d24d8d3..d9c91a71fc87 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -37,6 +37,7 @@ #ifndef MLX4_H #define MLX4_H +#include #include #include From a024291b367f98188f4da4a66a9f2f40a2163efb Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Wed, 16 May 2007 15:31:19 -0700 Subject: [PATCH 07/76] IB/ipath: Include to fix ppc64 build Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba6110.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 4171198fc202..ba73dd0a06cd 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -36,6 +36,7 @@ * HT chip. */ +#include #include #include #include From 82466f00ec6ef0a5ca7ea8991c731af2ec561c7d Mon Sep 17 00:00:00 2001 From: Michael Albaugh Date: Wed, 16 May 2007 15:45:09 -0700 Subject: [PATCH 08/76] IB/ipath: Support blinking LEDs with an led_override file When we want to find an InfiniPath HCA in a rack of nodes, it is often expeditious to blink the status LEDs via a userspace /sys file. A write-only led_override "file" is published per device. Writes to this file are interpreted as (string form) numbers, and the resulting value sent to ipath_set_led_override(). The upper eight bits are interpretted as a 4.4 fixed-point "frequency in Hertz", and the bottom two 4-bit values are alternately (D0..3, then D4..7) used by the board-specific LED-setting function to override the normal state. Signed-off-by: Michael Albaugh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 92 ++++++++++++++++++++- drivers/infiniband/hw/ipath/ipath_iba6110.c | 10 +++ drivers/infiniband/hw/ipath/ipath_iba6120.c | 10 +++ drivers/infiniband/hw/ipath/ipath_kernel.h | 19 +++++ drivers/infiniband/hw/ipath/ipath_sysfs.c | 19 +++++ 5 files changed, 149 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index e3a223209710..097593286582 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1846,6 +1846,87 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, ipath_write_kreg(dd, where, value); } +/* + * Following deal with the "obviously simple" task of overriding the state + * of the LEDS, which normally indicate link physical and logical status. + * The complications arise in dealing with different hardware mappings + * and the board-dependent routine being called from interrupts. + * and then there's the requirement to _flash_ them. + */ +#define LED_OVER_FREQ_SHIFT 8 +#define LED_OVER_FREQ_MASK (0xFF<ipath_flags & IPATH_INITTED)) + return; + + pidx = dd->ipath_led_override_phase++ & 1; + dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; + timeoff = dd->ipath_led_override_timeoff; + + /* + * below potentially restores the LED values per current status, + * should also possibly setup the traffic-blink register, + * but leave that to per-chip functions. + */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); + ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & + INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; + lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & + INFINIPATH_IBCS_LINKSTATE_MASK; + + dd->ipath_f_setextled(dd, lstate, ltstate); + mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); +} + +void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) +{ + int timeoff, freq; + + if (!(dd->ipath_flags & IPATH_INITTED)) + return; + + /* First check if we are blinking. If not, use 1HZ polling */ + timeoff = HZ; + freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; + + if (freq) { + /* For blink, set each phase from one nybble of val */ + dd->ipath_led_override_vals[0] = val & 0xF; + dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; + timeoff = (HZ << 4)/freq; + } else { + /* Non-blink set both phases the same. */ + dd->ipath_led_override_vals[0] = val & 0xF; + dd->ipath_led_override_vals[1] = val & 0xF; + } + dd->ipath_led_override_timeoff = timeoff; + + /* + * If the timer has not already been started, do so. Use a "quick" + * timeout so the function will be called soon, to look at our request. + */ + if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { + /* Need to start timer */ + init_timer(&dd->ipath_led_override_timer); + dd->ipath_led_override_timer.function = + ipath_run_led_override; + dd->ipath_led_override_timer.data = (unsigned long) dd; + dd->ipath_led_override_timer.expires = jiffies + 1; + add_timer(&dd->ipath_led_override_timer); + } else { + atomic_dec(&dd->ipath_led_override_timer_active); + } +} + /** * ipath_shutdown_device - shut down a device * @dd: the infinipath device @@ -1909,7 +1990,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd) * Turn the LEDs off explictly for the same reason. */ dd->ipath_f_quiet_serdes(dd); - dd->ipath_f_setextled(dd, 0, 0); if (dd->ipath_stats_timer_active) { del_timer_sync(&dd->ipath_stats_timer); @@ -2085,6 +2165,16 @@ int ipath_reset_device(int unit) goto bail; } + if (atomic_read(&dd->ipath_led_override_timer_active)) { + /* Need to stop LED timer, _then_ shut off LEDs */ + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + dd->ipath_led_override = LED_OVER_BOTH_OFF; + dd->ipath_f_setextled(dd, 0, 0); + dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index ba73dd0a06cd..4372c6c50ff6 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1065,6 +1065,16 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, if (ipath_diag_inuse) return; + /* Allow override of LED display for, e.g. Locating system in rack */ + if (dd->ipath_led_override) { + ltst = (dd->ipath_led_override & IPATH_LED_PHYS) + ? INFINIPATH_IBCS_LT_STATE_LINKUP + : INFINIPATH_IBCS_LT_STATE_DISABLED; + lst = (dd->ipath_led_override & IPATH_LED_LOG) + ? INFINIPATH_IBCS_L_STATE_ACTIVE + : INFINIPATH_IBCS_L_STATE_DOWN; + } + /* * start by setting both LED control bits to off, then turn * on the appropriate bit(s). diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 4e2e3dfeb2c8..bcb70d67dbb9 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -797,6 +797,16 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, if (ipath_diag_inuse) return; + /* Allow override of LED display for, e.g. Locating system in rack */ + if (dd->ipath_led_override) { + ltst = (dd->ipath_led_override & IPATH_LED_PHYS) + ? INFINIPATH_IBCS_LT_STATE_LINKUP + : INFINIPATH_IBCS_LT_STATE_DISABLED; + lst = (dd->ipath_led_override & IPATH_LED_LOG) + ? INFINIPATH_IBCS_L_STATE_ACTIVE + : INFINIPATH_IBCS_L_STATE_DOWN; + } + extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | INFINIPATH_EXTC_LED2PRIPORT_ON); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 12194f3dd8cc..2f39db7df31c 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -575,6 +575,16 @@ struct ipath_devdata { u16 ipath_gpio_scl_num; u64 ipath_gpio_sda; u64 ipath_gpio_scl; + + /* used to override LED behavior */ + u8 ipath_led_override; /* Substituted for normal value, if non-zero */ + u16 ipath_led_override_timeoff; /* delta to next timer event */ + u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */ + u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */ + atomic_t ipath_led_override_timer_active; + /* Used to flash LEDs in override mode */ + struct timer_list ipath_led_override_timer; + }; /* Private data for file operations */ @@ -716,6 +726,15 @@ void ipath_get_eeprom_info(struct ipath_devdata *); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); void ipath_disarm_senderrbufs(struct ipath_devdata *, int); +/* + * Set LED override, only the two LSBs have "public" meaning, but + * any non-zero value substitutes them for the Link and LinkTrain + * LED states. + */ +#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ +#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */ +void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); + /* * number of words used for protocol header if not set by ipath_userinit(); */ diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 4dc398d5e011..17ec14571722 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -596,6 +596,23 @@ bail: return ret; } +static ssize_t store_led_override(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret > 0) + ipath_set_led_override(dd, val); + else + ipath_dev_err(dd, "attempt to set invalid LED override\n"); + return ret; +} + static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -625,6 +642,7 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); +static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -641,6 +659,7 @@ static struct attribute *dev_attributes[] = { &dev_attr_unit.attr, &dev_attr_enabled.attr, &dev_attr_rx_pol_inv.attr, + &dev_attr_led_override.attr, NULL }; From 17b2eb9fe6bfadcb3ece308ed50193d10b71ba6e Mon Sep 17 00:00:00 2001 From: Michael Albaugh Date: Thu, 17 May 2007 07:05:04 -0700 Subject: [PATCH 09/76] IB/ipath: Lock and always use shadow copies of GPIO register The new LED blinking interface adds more contention for the unprotected GPIO pins that were already shared, though not commonly at the same time. We add locks to the accesses to these pins so that Read-Modify-Write is now safe. Some of these locks are added at interrupt context, so we shadow the registers which drive and inspect these pins to avoid the mmio read/writes. This mitigates the effects of the locks and hastens us through the interrupt. Add locking and always use shadows for registers controlling GPIO pins (ExtCtrl and GPIOout). The use of shadows implies doing less I/O, which can make I2C operation too fast on some platforms. An explicit udelay(1) in SCL manipulation fixes that. Signed-off-by: Michael Albaugh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_eeprom.c | 74 +++++++++++-------- drivers/infiniband/hw/ipath/ipath_iba6110.c | 3 + drivers/infiniband/hw/ipath/ipath_iba6120.c | 3 + drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 + drivers/infiniband/hw/ipath/ipath_kernel.h | 7 +- 5 files changed, 56 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 030185f90ee2..26daac9d8b63 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -95,39 +95,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd, enum i2c_type line, enum i2c_state new_line_state) { - u64 read_val, write_val, mask, *gpioval; + u64 out_mask, dir_mask, *gpioval; + unsigned long flags = 0; gpioval = &dd->ipath_gpio_out; - read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); - if (line == i2c_line_scl) - mask = dd->ipath_gpio_scl; - else - mask = dd->ipath_gpio_sda; - - if (new_line_state == i2c_line_high) - /* tri-state the output rather than force high */ - write_val = read_val & ~mask; - else - /* config line to be an output */ - write_val = read_val | mask; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); - - /* set high and verify */ - if (new_line_state == i2c_line_high) - write_val = 0x1UL; - else - write_val = 0x0UL; if (line == i2c_line_scl) { - write_val <<= dd->ipath_gpio_scl_num; - *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num); - *gpioval |= write_val; + dir_mask = dd->ipath_gpio_scl; + out_mask = (1UL << dd->ipath_gpio_scl_num); } else { - write_val <<= dd->ipath_gpio_sda_num; - *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num); - *gpioval |= write_val; + dir_mask = dd->ipath_gpio_sda; + out_mask = (1UL << dd->ipath_gpio_sda_num); } + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + if (new_line_state == i2c_line_high) { + /* tri-state the output rather than force high */ + dd->ipath_extctrl &= ~dir_mask; + } else { + /* config line to be an output */ + dd->ipath_extctrl |= dir_mask; + } + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); + + /* set output as well (no real verify) */ + if (new_line_state == i2c_line_high) + *gpioval |= out_mask; + else + *gpioval &= ~out_mask; + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); return 0; } @@ -145,8 +143,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd, enum i2c_type line, enum i2c_state *curr_statep) { - u64 read_val, write_val, mask; + u64 read_val, mask; int ret; + unsigned long flags = 0; /* check args */ if (curr_statep == NULL) { @@ -154,15 +153,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd, goto bail; } - read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); /* config line to be an input */ if (line == i2c_line_scl) mask = dd->ipath_gpio_scl; else mask = dd->ipath_gpio_sda; - write_val = read_val & ~mask; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + dd->ipath_extctrl &= ~mask; + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); + /* + * Below is very unlikely to reflect true input state if Output + * Enable actually changed. + */ read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); if (read_val & mask) *curr_statep = i2c_line_high; @@ -192,6 +197,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd) static void scl_out(struct ipath_devdata *dd, u8 bit) { + udelay(1); i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); i2c_wait_for_writes(dd); @@ -314,12 +320,18 @@ static int eeprom_reset(struct ipath_devdata *dd) int clock_cycles_left = 9; u64 *gpioval = &dd->ipath_gpio_out; int ret; + unsigned long flags; - eeprom_init = 1; + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + /* Make sure shadows are consistent */ + dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); + ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " "is %llx\n", (unsigned long long) *gpioval); + eeprom_init = 1; /* * This is to get the i2c into a known state, by first going low, * then tristate sda (and then tristate scl as first thing diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 4372c6c50ff6..8482ea366fb1 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1059,6 +1059,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, u64 lst, u64 ltst) { u64 extctl; + unsigned long flags = 0; /* the diags use the LED to indicate diag info, so we leave * the external LED alone when the diags are running */ @@ -1075,6 +1076,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, : INFINIPATH_IBCS_L_STATE_DOWN; } + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); /* * start by setting both LED control bits to off, then turn * on the appropriate bit(s). @@ -1103,6 +1105,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, } dd->ipath_extctrl = extctl; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); } static void ipath_init_ht_variables(struct ipath_devdata *dd) diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index bcb70d67dbb9..2345bb011acd 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -791,6 +791,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, u64 ltst) { u64 extctl; + unsigned long flags = 0; /* the diags use the LED to indicate diag info, so we leave * the external LED alone when the diags are running */ @@ -807,6 +808,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, : INFINIPATH_IBCS_L_STATE_DOWN; } + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | INFINIPATH_EXTC_LED2PRIPORT_ON); @@ -816,6 +818,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; dd->ipath_extctrl = extctl; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); } /** diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 7045ba689494..f6ee7a83595a 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -340,6 +340,8 @@ static int init_chip_first(struct ipath_devdata *dd, spin_lock_init(&dd->ipath_tid_lock); + spin_lock_init(&dd->ipath_gpio_lock); + done: *pdp = pd; return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 2f39db7df31c..bd1088a99891 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -399,6 +399,8 @@ struct ipath_devdata { u64 ipath_gpio_out; /* shadow the gpio mask register */ u64 ipath_gpio_mask; + /* shadow the gpio output enable, etc... */ + u64 ipath_extctrl; /* kr_revision shadow */ u64 ipath_revision; /* @@ -473,8 +475,6 @@ struct ipath_devdata { u32 ipath_cregbase; /* shadow the control register contents */ u32 ipath_control; - /* shadow the gpio output contents */ - u32 ipath_extctrl; /* PCI revision register (HTC rev on FPGA) */ u32 ipath_pcirev; @@ -576,6 +576,9 @@ struct ipath_devdata { u64 ipath_gpio_sda; u64 ipath_gpio_scl; + /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ + spinlock_t ipath_gpio_lock; + /* used to override LED behavior */ u8 ipath_led_override; /* Substituted for normal value, if non-zero */ u16 ipath_led_override_timeoff; /* delta to next timer event */ From 8e9ab3f1c9e34d5c28446c3738983d33a3937fe0 Mon Sep 17 00:00:00 2001 From: John Gregor Date: Thu, 17 May 2007 08:15:50 -0700 Subject: [PATCH 10/76] IB/ipath: Remove incompletely implemented ipath_runtime flags and code The IPATH_RUNTIME_PBC_REWRITE and the IPATH_RUNTIME_LOOSE_DMA_ALIGN flags were not ever implemented correctly and did not turn out to be necessary. Remove the last vestiges of these flags but mark the spot with a comment to remind us to not reuse these flags in the interest of binary compatibility. The INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR bit was also not found to be useful, so it was dropped in the cleanup as well. Signed-off-by: John Gregor Signed-off-by: Arthur Jones Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 3 +-- drivers/infiniband/hw/ipath/ipath_iba6120.c | 25 --------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 10c008f22ba6..12e1349cd03e 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -189,8 +189,7 @@ typedef enum _ipath_ureg { #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 #define IPATH_RUNTIME_RCVHDR_COPY 0x8 #define IPATH_RUNTIME_MASTER 0x10 -#define IPATH_RUNTIME_PBC_REWRITE 0x20 -#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40 +/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */ /* * This structure is returned by ipath_userinit() immediately after diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 2345bb011acd..711590740118 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -296,13 +296,6 @@ static const struct ipath_cregs ipath_pe_cregs = { #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -/* - * Rev2 silicon allows suppressing check for ArmLaunch errors. - * this can speed up short packet sends on systems that do - * not guaranteee write-order. - */ -#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63) - /* 6120 specific hardware errors... */ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), @@ -680,17 +673,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val |= dd->ipath_rx_pol_inv << INFINIPATH_XGXS_RX_POL_SHIFT; } - if (dd->ipath_minrev >= 2) { - /* Rev 2. can tolerate multiple writes to PBC, and - * allowing them can provide lower latency on some - * CPUs, but this feature is off by default, only - * turned on by setting D63 of XGXSconfig reg. - * May want to make this conditional more - * fine-grained in future. This is not exactly - * related to XGXS, but where the bit ended up. - */ - val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR; - } if (val != prev_val) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); @@ -1324,13 +1306,6 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) dd = pd->port_dd; - if (dd != NULL && dd->ipath_minrev >= 2) { - ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE; - ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN; - } - done: kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; return 0; From aecd3b5ab19624ca9644b9df9c61615282d8923f Mon Sep 17 00:00:00 2001 From: Michael Albaugh Date: Thu, 17 May 2007 07:26:28 -0700 Subject: [PATCH 11/76] IB/ipath: Log "active" time and some errors to EEPROM We currently track various errors, now we enhance that capability by logging some of them to EEPROM. We also now log a cumulative "active" time defined by traffic though the InfiniPath HCA beyond the normal SM traffic. Signed-off-by: Michael Albaugh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 3 + drivers/infiniband/hw/ipath/ipath_eeprom.c | 233 +++++++++++++++++- drivers/infiniband/hw/ipath/ipath_iba6110.c | 22 ++ drivers/infiniband/hw/ipath/ipath_iba6120.c | 27 ++ drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 + drivers/infiniband/hw/ipath/ipath_intr.c | 8 + drivers/infiniband/hw/ipath/ipath_kernel.h | 38 +++ drivers/infiniband/hw/ipath/ipath_stats.c | 23 +- drivers/infiniband/hw/ipath/ipath_sysfs.c | 22 ++ 9 files changed, 370 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 097593286582..e9639860b48d 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2005,6 +2005,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); + + ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); + ipath_update_eeprom_log(dd); } /** diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 26daac9d8b63..9be1b9ac55f0 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -367,8 +367,8 @@ bail: * @len: number of bytes to receive */ -int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, - void *buffer, int len) +static int ipath_eeprom_internal_read(struct ipath_devdata *dd, + u8 eeprom_offset, void *buffer, int len) { /* compiler complains unless initialized */ u8 single_byte = 0; @@ -418,6 +418,7 @@ bail: return ret; } + /** * ipath_eeprom_write - writes data to the eeprom via I2C * @dd: the infinipath device @@ -425,8 +426,8 @@ bail: * @buffer: data to write * @len: number of bytes to write */ -int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) +int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buffer, int len) { u8 single_byte; int sub_len; @@ -500,6 +501,38 @@ bail: return ret; } +/* + * The public entry-points ipath_eeprom_read() and ipath_eeprom_write() + * are now just wrappers around the internal functions. + */ +int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, + void *buff, int len) +{ + int ret; + + ret = down_interruptible(&dd->ipath_eep_sem); + if (!ret) { + ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); + up(&dd->ipath_eep_sem); + } + + return ret; +} + +int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buff, int len) +{ + int ret; + + ret = down_interruptible(&dd->ipath_eep_sem); + if (!ret) { + ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); + up(&dd->ipath_eep_sem); + } + + return ret; +} + static u8 flash_csum(struct ipath_flash *ifp, int adjust) { u8 *ip = (u8 *) ifp; @@ -527,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) void *buf; struct ipath_flash *ifp; __be64 guid; - int len; + int len, eep_stat; u8 csum, *bguid; int t = dd->ipath_unit; struct ipath_devdata *dd0 = ipath_lookup(0); @@ -571,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) goto bail; } - if (ipath_eeprom_read(dd, 0, buf, len)) { + down(&dd->ipath_eep_sem); + eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); + up(&dd->ipath_eep_sem); + + if (eep_stat) { ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); goto done; } @@ -646,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", (unsigned long long) be64_to_cpu(dd->ipath_guid)); + memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT); + /* + * Power-on (actually "active") hours are kept as little-endian value + * in EEPROM, but as seconds in a (possibly as small as 24-bit) + * atomic_t while running. + */ + atomic_set(&dd->ipath_active_time, 0); + dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); + done: vfree(buf); bail:; } + +/** + * ipath_update_eeprom_log - copy active-time and error counters to eeprom + * @dd: the infinipath device + * + * Although the time is kept as seconds in the ipath_devdata struct, it is + * rounded to hours for re-write, as we have only 16 bits in EEPROM. + * First-cut code reads whole (expected) struct ipath_flash, modifies, + * re-writes. Future direction: read/write only what we need, assuming + * that the EEPROM had to have been "good enough" for driver init, and + * if not, we aren't making it worse. + * + */ + +int ipath_update_eeprom_log(struct ipath_devdata *dd) +{ + void *buf; + struct ipath_flash *ifp; + int len, hi_water; + uint32_t new_time, new_hrs; + u8 csum; + int ret, idx; + unsigned long flags; + + /* first, check if we actually need to do anything. */ + ret = 0; + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + if (dd->ipath_eep_st_new_errs[idx]) { + ret = 1; + break; + } + } + new_time = atomic_read(&dd->ipath_active_time); + + if (ret == 0 && new_time < 3600) + return 0; + + /* + * The quick-check above determined that there is something worthy + * of logging, so get current contents and do a more detailed idea. + */ + len = offsetof(struct ipath_flash, if_future); + buf = vmalloc(len); + ret = 1; + if (!buf) { + ipath_dev_err(dd, "Couldn't allocate memory to read %u " + "bytes from eeprom for logging\n", len); + goto bail; + } + + /* Grab semaphore and read current EEPROM. If we get an + * error, let go, but if not, keep it until we finish write. + */ + ret = down_interruptible(&dd->ipath_eep_sem); + if (ret) { + ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); + goto free_bail; + } + ret = ipath_eeprom_internal_read(dd, 0, buf, len); + if (ret) { + up(&dd->ipath_eep_sem); + ipath_dev_err(dd, "Unable read EEPROM for logging\n"); + goto free_bail; + } + ifp = (struct ipath_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + up(&dd->ipath_eep_sem); + ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", + csum, ifp->if_csum); + ret = 1; + goto free_bail; + } + hi_water = 0; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + int new_val = dd->ipath_eep_st_new_errs[idx]; + if (new_val) { + /* + * If we have seen any errors, add to EEPROM values + * We need to saturate at 0xFF (255) and we also + * would need to adjust the checksum if we were + * trying to minimize EEPROM traffic + * Note that we add to actual current count in EEPROM, + * in case it was altered while we were running. + */ + new_val += ifp->if_errcntp[idx]; + if (new_val > 0xFF) + new_val = 0xFF; + if (ifp->if_errcntp[idx] != new_val) { + ifp->if_errcntp[idx] = new_val; + hi_water = offsetof(struct ipath_flash, + if_errcntp) + idx; + } + /* + * update our shadow (used to minimize EEPROM + * traffic), to match what we are about to write. + */ + dd->ipath_eep_st_errs[idx] = new_val; + dd->ipath_eep_st_new_errs[idx] = 0; + } + } + /* + * now update active-time. We would like to round to the nearest hour + * but unless atomic_t are sure to be proper signed ints we cannot, + * because we need to account for what we "transfer" to EEPROM and + * if we log an hour at 31 minutes, then we would need to set + * active_time to -29 to accurately count the _next_ hour. + */ + if (new_time > 3600) { + new_hrs = new_time / 3600; + atomic_sub((new_hrs * 3600), &dd->ipath_active_time); + new_hrs += dd->ipath_eep_hrs; + if (new_hrs > 0xFFFF) + new_hrs = 0xFFFF; + dd->ipath_eep_hrs = new_hrs; + if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { + ifp->if_powerhour[0] = new_hrs & 0xFF; + hi_water = offsetof(struct ipath_flash, if_powerhour); + } + if ((new_hrs >> 8) != ifp->if_powerhour[1]) { + ifp->if_powerhour[1] = new_hrs >> 8; + hi_water = offsetof(struct ipath_flash, if_powerhour) + + 1; + } + } + /* + * There is a tiny possibility that we could somehow fail to write + * the EEPROM after updating our shadows, but problems from holding + * the spinlock too long are a much bigger issue. + */ + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + if (hi_water) { + /* we made some change to the data, uopdate cksum and write */ + csum = flash_csum(ifp, 1); + ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); + } + up(&dd->ipath_eep_sem); + if (ret) + ipath_dev_err(dd, "Failed updating EEPROM\n"); + +free_bail: + vfree(buf); +bail: + return ret; + +} + +/** + * ipath_inc_eeprom_err - increment one of the four error counters + * that are logged to EEPROM. + * @dd: the infinipath device + * @eidx: 0..3, the counter to increment + * @incr: how much to add + * + * Each counter is 8-bits, and saturates at 255 (0xFF). They + * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log() + * is called, but it can only be called in a context that allows sleep. + * This function can be called even at interrupt level. + */ + +void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr) +{ + uint new_val; + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + new_val = dd->ipath_eep_st_new_errs[eidx] + incr; + if (new_val > 255) + new_val = 255; + dd->ipath_eep_st_new_errs[eidx] = new_val; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + return; +} diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 8482ea366fb1..85f408de7bf7 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -440,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, u32 bits, ctrl; int isfatal = 0; char bitsmsg[64]; + int log_idx; hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); @@ -468,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, hwerrs &= dd->ipath_hwerrmask; + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) + ipath_inc_eeprom_err(dd, log_idx, 1); + /* * make sure we get this much out, unless told to be quiet, * it's a parity error we may recover from, @@ -1171,6 +1177,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->ipath_eep_st_masks[0].hwerrs_to_log = + INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[1].hwerrs_to_log = + INFINIPATH_HWE_RXEMEMPARITYERR_MASK << + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[2].errs_to_log = + INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + } /** diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 711590740118..207323a5b52b 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -340,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, u32 bits, ctrl; int isfatal = 0; char bitsmsg[64]; + int log_idx; hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); if (!hwerrs) { @@ -367,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, hwerrs &= dd->ipath_hwerrmask; + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) + ipath_inc_eeprom_err(dd, log_idx, 1); + /* * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds @@ -950,6 +956,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->ipath_eep_st_masks[0].hwerrs_to_log = + INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; + + /* Ignore errors in PIO/PBC on systems with unordered write-combining */ + if (ipath_unordered_wc()) + dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; + + dd->ipath_eep_st_masks[1].hwerrs_to_log = + INFINIPATH_HWE_RXEMEMPARITYERR_MASK << + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[2].errs_to_log = + INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + + } /* setup the MSI stuff again after a reset. I'd like to just call diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index f6ee7a83595a..ee839346a3a4 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -341,6 +341,8 @@ static int init_chip_first(struct ipath_devdata *dd, spin_lock_init(&dd->ipath_tid_lock); spin_lock_init(&dd->ipath_gpio_lock); + spin_lock_init(&dd->ipath_eep_st_lock); + sema_init(&dd->ipath_eep_sem, 1); done: *pdp = pd; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index a90d3b5699c4..d9cdd00c8233 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -505,6 +505,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) int i, iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; + int log_idx; supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); @@ -518,6 +519,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (errs & INFINIPATH_E_HARDWARE) { /* reuse same msg buf */ dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); + } else { + u64 mask; + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { + mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; + if (errs & mask) + ipath_inc_eeprom_err(dd, log_idx, 1); + } } if (!noprint && (errs & ~dd->ipath_e_bitsextant)) diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index bd1088a99891..2a4414b948ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -57,6 +57,24 @@ extern struct infinipath_stats ipath_stats; #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ +/* + * First-cut critierion for "device is active" is + * two thousand dwords combined Tx, Rx traffic per + * 5-second interval. SMA packets are 64 dwords, + * and occur "a few per second", presumably each way. + */ +#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000) +/* + * Struct used to indicate which errors are logged in each of the + * error-counters that are logged to EEPROM. A counter is incremented + * _once_ (saturating at 255) for each event with any bits set in + * the error or hwerror register masks below. + */ +#define IPATH_EEP_LOG_CNT (4) +struct ipath_eep_log_mask { + u64 errs_to_log; + u64 hwerrs_to_log; +}; struct ipath_portdata { void **port_rcvegrbuf; @@ -588,6 +606,24 @@ struct ipath_devdata { /* Used to flash LEDs in override mode */ struct timer_list ipath_led_override_timer; + /* Support (including locks) for EEPROM logging of errors and time */ + /* control access to actual counters, timer */ + spinlock_t ipath_eep_st_lock; + /* control high-level access to EEPROM */ + struct semaphore ipath_eep_sem; + /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ + uint64_t ipath_traffic_wds; + /* active time is kept in seconds, but logged in hours */ + atomic_t ipath_active_time; + /* Below are nominal shadow of EEPROM, new since last EEPROM update */ + uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT]; + uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT]; + uint16_t ipath_eep_hrs; + /* + * masks for which bits of errs, hwerrs that cause + * each of the counters to increment. + */ + struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; }; /* Private data for file operations */ @@ -726,6 +762,8 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); +int ipath_update_eeprom_log(struct ipath_devdata *dd); +void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); void ipath_disarm_senderrbufs(struct ipath_devdata *, int); diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index d8b5e4cefe25..2955f368de0c 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) u64 val64; unsigned long t0, t1; u64 ret; + unsigned long flags; t0 = jiffies; /* If fast increment counters are only 32 bits, snapshot them, @@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) if (creg == dd->ipath_cregs->cr_wordsendcnt) { if (val != dd->ipath_lastsword) { dd->ipath_sword += val - dd->ipath_lastsword; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + dd->ipath_traffic_wds += val - dd->ipath_lastsword; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); dd->ipath_lastsword = val; } val64 = dd->ipath_sword; } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { if (val != dd->ipath_lastrword) { dd->ipath_rword += val - dd->ipath_lastrword; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + dd->ipath_traffic_wds += val - dd->ipath_lastrword; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); dd->ipath_lastrword = val; } val64 = dd->ipath_rword; @@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque) struct ipath_devdata *dd = (struct ipath_devdata *) opaque; u32 val; static unsigned cnt; + unsigned long flags; /* * don't access the chip while running diags, or memory diags can @@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque) /* but re-arm the timer, for diags case; won't hurt other */ goto done; + /* + * We now try to maintain a "active timer", based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->ipath_active_time); /* S/B #define */ + dd->ipath_traffic_wds = 0; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + if (dd->ipath_flags & IPATH_32BITCOUNTERS) { - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 17ec14571722..ab34d3e8b955 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -613,6 +613,26 @@ static ssize_t store_led_override(struct device *dev, return ret; } +static ssize_t show_logged_errs(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int idx, count; + + /* force consistency with actual EEPROM */ + if (ipath_update_eeprom_log(dd) != 0) + return -ENXIO; + + count = 0; + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", + dd->ipath_eep_st_errs[idx], + idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' '); + } + + return count; +} static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -643,6 +663,7 @@ static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); +static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -660,6 +681,7 @@ static struct attribute *dev_attributes[] = { &dev_attr_enabled.attr, &dev_attr_rx_pol_inv.attr, &dev_attr_led_override.attr, + &dev_attr_logged_errors.attr, NULL }; From 380bf5d38f3cc2799ed2fae554f7af1c4b0ed35b Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Mon, 18 Jun 2007 14:24:35 -0700 Subject: [PATCH 12/76] IB/ipath: Support the IBA6110 revision 4 Recognize IBA 6110 Revision 4: same feature set, etc. as earlier revisions. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba6110.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 85f408de7bf7..04799852ec7e 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -680,9 +680,9 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, snprintf(name, namelen, "%s", n); if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || - dd->ipath_minrev > 3)) { + dd->ipath_minrev > 4)) { /* - * This version of the driver only supports Rev 3.2 and 3.3 + * This version of the driver only supports Rev 3.2 - 3.4 */ ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n", From e7340f04426416a6655ffaead4651bfb9e1b0848 Mon Sep 17 00:00:00 2001 From: Robert Walsh Date: Mon, 18 Jun 2007 14:24:35 -0700 Subject: [PATCH 13/76] IB/ipath: Fix maximum MTU reporting Although our chip supports 4K MTUs, our driver doesn't yet support this feature, so limit the maximum MTU to 2K until we get support for 4K MTUs implemented. Signed-off-by: Robert Walsh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_fs.c | 7 ++++++- drivers/infiniband/hw/ipath/ipath_init_chip.c | 7 ++++++- drivers/infiniband/hw/ipath/ipath_mad.c | 7 ++++++- drivers/infiniband/hw/ipath/ipath_qp.c | 7 ++++++- drivers/infiniband/hw/ipath/ipath_verbs.c | 7 ++++++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index ebd5c7bd2cdb..40cf1bc90d74 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -257,9 +257,14 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, /* Notimpl InitType (actually, an SMA decision) */ /* VLHighLimit is 0 (only one VL) */ ; /* VLArbitrationHighCap is 0 (only one VL) */ + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the maximum + * to 2048. + */ portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */ /* InitTypeReply is SMA decision */ - (5 << 16) /* MTUCap 4096 */ + (4 << 16) /* MTUCap 2048 */ | (7 << 13) /* VLStallCount */ | (0x1f << 8) /* HOQLife */ | (1 << 4) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index ee839346a3a4..bdfda6221744 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -310,7 +310,12 @@ static int init_chip_first(struct ipath_devdata *dd, val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); dd->ipath_piosize2k = val & ~0U; dd->ipath_piosize4k = val >> 32; - dd->ipath_ibmtu = 4096; /* default to largest legal MTU */ + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the initial value + * to 2048. + */ + dd->ipath_ibmtu = 2048; val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); dd->ipath_piobcnt2k = val & ~0U; dd->ipath_piobcnt4k = val >> 32; diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 25908b02fbe5..2e9e161bfd0a 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -292,7 +292,12 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, /* pip->vl_arb_high_cap; // only one VL */ /* pip->vl_arb_low_cap; // only one VL */ /* InitTypeReply = 0 */ - pip->inittypereply_mtucap = IB_MTU_4096; + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the maximum value + * to 2048. + */ + pip->inittypereply_mtucap = IB_MTU_2048; // HCAs ignore VLStallCount and HOQLife /* pip->vlstallcnt_hoqlife; */ pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index bfef08ecd342..9e07abba8aa7 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -507,8 +507,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->port_num > ibqp->device->phys_port_cnt) goto inval; + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so don't allow Path MTU + * values greater than 2048. + */ if (attr_mask & IB_QP_PATH_MTU) - if (attr->path_mtu > IB_MTU_4096) + if (attr->path_mtu > IB_MTU_2048) goto inval; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index bb70845279b8..980b64add321 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1051,7 +1051,12 @@ static int ipath_query_port(struct ib_device *ibdev, props->max_vl_num = 1; /* VLCap = VL0 */ props->init_type_reply = 0; - props->max_mtu = IB_MTU_4096; + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the maximum value + * to 2048. + */ + props->max_mtu = IB_MTU_2048; switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; From fdc7215fbd7b7652b052d1fb7893afe324dba7aa Mon Sep 17 00:00:00 2001 From: Robert Walsh Date: Mon, 18 Jun 2007 14:24:36 -0700 Subject: [PATCH 14/76] IB/ipath: Fill in some missing FMR-related fields in query_device In ipath_query_device(), some of the struct ib_device_attr fields were not being initialized. Signed-off-by: Robert Walsh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 980b64add321..04294ca0a81f 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -981,6 +981,8 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_ah = ib_ipath_max_ahs; props->max_cqe = ib_ipath_max_cqes; props->max_mr = dev->lk_table.max; + props->max_fmr = dev->lk_table.max; + props->max_map_per_fmr = 32767; props->max_pd = ib_ipath_max_pds; props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; props->max_qp_init_rd_atom = 255; From 2c9749c3b534ea0e606b7ee2c29849bbb8d5b0a9 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:37 -0700 Subject: [PATCH 15/76] IB/ipath: Fix problem with next WQE after a UC completion This patch fixes a bug introduced when moving some code around for readability. Setting the wqe pointer at the end of the function is a NOP since it isn't used. Move it back to where it is used. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_uc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 1c2b03c2ef5e..49d650cabccf 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -58,7 +58,6 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, wc->port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0); } - wqe = get_swqe_ptr(qp, qp->s_last); } /** @@ -97,8 +96,10 @@ int ipath_make_uc_req(struct ipath_qp *qp, * Signal the completion of the last send * (if there is one). */ - if (qp->s_last != qp->s_tail) + if (qp->s_last != qp->s_tail) { complete_last_send(qp, wqe, &wc); + wqe = get_swqe_ptr(qp, qp->s_last); + } /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) From 6d2fad0472ca0d6caba7c36d2823a527e2a0e4f5 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:38 -0700 Subject: [PATCH 16/76] IB/ipath: Fix local loopback bug when waiting for resources This patch fixes a minor bug where the wrong QP was checked for a send work request that should wait for an RNR timeout. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_ruc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index d9c2a9b15d86..8c5d20a4b5f3 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -267,7 +267,7 @@ again: spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout) { + sqp->s_rnr_timeout) { spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } From 1dd6a1be1416be48cafda9e63a614f26f0428d10 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:38 -0700 Subject: [PATCH 17/76] IB/ipath: Set M bit in BTH according to IB spec According to chapter 17.2.8.1.1, QPs start in the migrated state and should send packets with the M bit set in the BTH. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 6 +++--- drivers/infiniband/hw/ipath/ipath_uc.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 1915771fd038..9ba80d107dcd 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -188,7 +188,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, } qp->s_hdrwords = hwords; qp->s_cur_size = len; - *bth0p = bth0; + *bth0p = bth0 | (1 << 22); /* Set M bit */ *bth2p = bth2; return 1; @@ -240,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; - bth0 = 0; + bth0 = 1 << 22; /* Set M bit */ /* Send a request. */ wqe = get_swqe_ptr(qp, qp->s_cur); @@ -604,7 +604,7 @@ static void send_rc_ack(struct ipath_qp *qp) } /* read pkey_index w/o lock (its atomic) */ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | - OP(ACKNOWLEDGE) << 24; + (OP(ACKNOWLEDGE) << 24) | (1 << 22); if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 49d650cabccf..243d7c61e18d 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -86,7 +86,7 @@ int ipath_make_uc_req(struct ipath_qp *qp, /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; - bth0 = 0; + bth0 = 1 << 22; /* Set M bit */ /* Get the next send request. */ wqe = get_swqe_ptr(qp, qp->s_last); From f716cdfe57f217966f41a7add190d6f5b9fd0769 Mon Sep 17 00:00:00 2001 From: Joan Eslinger Date: Mon, 18 Jun 2007 14:24:39 -0700 Subject: [PATCH 18/76] IB/ipath: Change use of constants for TID type to defined values Define pkt rcvd 'type' in a way consistent with HW spec and chips. The hardware considers received packets of type 0 to be expected, and type 1 to be eager. The driver was calling the ipath_f_put_tid functions using a variable called 'type' set to 0 for eager and to 1 for expected packets. Worse, the iba6110 and iba6120 drivers used those values inconsistently. This was quite confusing. Now everything is consistent with the hardware. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_file_ops.c | 12 ++++++++---- drivers/infiniband/hw/ipath/ipath_iba6110.c | 10 ++++++---- drivers/infiniband/hw/ipath/ipath_iba6120.c | 14 ++++++++------ drivers/infiniband/hw/ipath/ipath_init_chip.c | 3 ++- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 1272aaf2a785..931802b55cc5 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -396,7 +396,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, "TID %u, vaddr %lx, physaddr %llx pgp %p\n", tid, vaddr, (unsigned long long) physaddr, pagep[i]); - dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr); + dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, + physaddr); /* * don't check this tid in ipath_portshadow, since we * just filled it in; start with the next one. @@ -422,7 +423,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, if (dd->ipath_pageshadow[porttid + tid]) { ipath_cdbg(VERBOSE, "Freeing TID %u\n", tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], 1, + dd->ipath_f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); pci_unmap_page(dd->pcidev, dd->ipath_physshadow[porttid + tid], @@ -538,7 +540,8 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, if (dd->ipath_pageshadow[porttid + tid]) { ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", pd->port_pid, tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], 1, + dd->ipath_f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); pci_unmap_page(dd->pcidev, dd->ipath_physshadow[porttid + tid], @@ -921,7 +924,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, pa); + dd->ipath_rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, pa); pa += egrsize; } cond_resched(); /* don't hog the cpu */ diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 04799852ec7e..d8ac9f18bf49 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1408,7 +1408,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * * This exists as a separate routine to allow for special locking etc. @@ -1429,7 +1429,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, "40 bits, using only 40!!!\n", pa); pa &= INFINIPATH_RT_ADDR_MASK; } - if (type == 0) + if (type == RCVHQ_RCV_TYPE_EAGER) pa |= dd->ipath_tidtemplate; else { /* in words (fixed, full page). */ @@ -1469,7 +1469,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid); + ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + dd->ipath_tidinvalid); tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + dd->ipath_rcvegrbase + @@ -1477,7 +1478,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvegrcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid); + ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + dd->ipath_tidinvalid); } /** diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 207323a5b52b..b931057bb3e8 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -1104,7 +1104,7 @@ bail: * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * * This exists as a separate routine to allow for special locking etc. @@ -1130,7 +1130,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, "BUG: Physical page address 0x%lx " "has bits set in 31-29\n", pa); - if (type == 0) + if (type == RCVHQ_RCV_TYPE_EAGER) pa |= dd->ipath_tidtemplate; else /* for now, always full 4KB page */ pa |= 2 << 29; @@ -1154,7 +1154,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher * @dd: the infinipath device * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * * This exists as a separate routine to allow for selection of the @@ -1179,7 +1179,7 @@ static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr, "BUG: Physical page address 0x%lx " "has bits set in 31-29\n", pa); - if (type == 0) + if (type == RCVHQ_RCV_TYPE_EAGER) pa |= dd->ipath_tidtemplate; else /* for now, always full 4KB page */ pa |= 2 << 29; @@ -1218,7 +1218,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port) port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv); + ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + @@ -1226,7 +1227,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port) port * dd->ipath_rcvegrcnt * sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvegrcnt; i++) - ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv); + ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); } /** diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index bdfda6221744..9f611553ffb3 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -133,7 +133,8 @@ static int create_port0_egr(struct ipath_devdata *dd) dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); dd->ipath_f_put_tid(dd, e + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, + dd->ipath_rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, dd->ipath_port0_skbinfo[e].phys); } From 561095f20eeb5c6c05c303bad8cdb3f8a80821bc Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Mon, 18 Jun 2007 14:24:40 -0700 Subject: [PATCH 19/76] IB/ipath: Fix the mtrr_add args for chips with 2 buffer sizes The values passed have never been right for iba 6120 chips, but just happened to work. We needed to select the right buffer offset in the chip (both are in same register), and the total length was wrong also, but was covered by the rounding up. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index 04696e62da87..9f409fd928fb 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd) * of 2 address matching the length (which has to be a power of 2). * For rev1, that means the base address, for rev2, it will be just * the PIO buffers themselves. + * For chips with two sets of buffers, the calculations are + * somewhat more complicated; we need to sum, and the piobufbase + * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. + * The buffers are still packed, so a single range covers both. */ - pioaddr = addr + dd->ipath_piobufbase; - piolen = (dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k) * - ALIGN(dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k, dd->ipath_palign); + if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */ + unsigned long pio2kbase, pio4kbase; + pio2kbase = dd->ipath_piobufbase & 0xffffffffUL; + pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL; + if (pio2kbase < pio4kbase) { /* all, for now */ + pioaddr = addr + pio2kbase; + piolen = pio4kbase - pio2kbase + + dd->ipath_piobcnt4k * dd->ipath_4kalign; + } else { + pioaddr = addr + pio4kbase; + piolen = pio2kbase - pio4kbase + + dd->ipath_piobcnt2k * dd->ipath_palign; + } + } else { /* single buffer size (2K, currently) */ + pioaddr = addr + dd->ipath_piobufbase; + piolen = dd->ipath_piobcnt2k * dd->ipath_palign + + dd->ipath_piobcnt4k * dd->ipath_4kalign; + } for (bits = 0; !(piolen & (1ULL << bits)); bits++) /* do nothing */ ; From 9380068fc2f230e7840ff87d3f1e6030ae2ee5e8 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Mon, 18 Jun 2007 14:24:41 -0700 Subject: [PATCH 20/76] IB/ipath: Use S_ABORT not cancel and abort on exit freeze mode after recovery This centralizes the use of the abort functionality, removes the unneeded buffer cancel (abort does the same thing), sets up to ignore launch errors after abort, same as cancel. We need abort on exit from freeze mode to avoid having buffers stuck in the busy state, if a user process happened to complete the send while we were in freeze mode doing the recovery. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 57 ++++++++++++------- drivers/infiniband/hw/ipath/ipath_iba6110.c | 13 +++-- drivers/infiniband/hw/ipath/ipath_iba6120.c | 16 +++++- drivers/infiniband/hw/ipath/ipath_init_chip.c | 6 ++ drivers/infiniband/hw/ipath/ipath_intr.c | 13 ++--- drivers/infiniband/hw/ipath/ipath_kernel.h | 1 + 6 files changed, 68 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index e9639860b48d..8b611796f33e 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -706,9 +706,9 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, u64 sendctrl, sendorig; ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); - sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM; + sendorig = dd->ipath_sendctrl; for (i = first; i < last; i++) { - sendctrl = sendorig | + sendctrl = sendorig | INFINIPATH_S_DISARM | (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, sendctrl); @@ -719,12 +719,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * while we were looping; no critical bits that would require * locking. * - * Write a 0, and then the original value, reading scratch in + * disable PIOAVAILUPD, then re-enable, reading scratch in * between. This seems to avoid a chip timing race that causes * pioavail updates to memory to stop. */ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - 0); + sendorig & ~IPATH_S_PIOBUFAVAILUPD); sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); @@ -1596,6 +1596,35 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) return ret; } + +/* + * Flush all sends that might be in the ready to send state, as well as any + * that are in the process of being sent. Used whenever we need to be + * sure the send side is idle. Cleans up all buffer state by canceling + * all pio buffers, and issuing an abort, which cleans up anything in the + * launch fifo. The cancel is superfluous on some chip versions, but + * it's safer to always do it. + * PIOAvail bits are updated by the chip as if normal send had happened. + */ +void ipath_cancel_sends(struct ipath_devdata *dd) +{ + ipath_dbg("Cancelling all in-progress send buffers\n"); + dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */ + /* + * the abort bit is auto-clearing. We read scratch to be sure + * that cancels and the abort have taken effect in the chip. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + INFINIPATH_S_ABORT); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_disarm_piobufs(dd, 0, + (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k)); + + /* and again, be sure all have hit the chip */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); +} + + static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { @@ -1617,14 +1646,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); /* flush all queued sends when going to DOWN or INIT, to be sure that * they don't block MAD packets */ - if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - (unsigned)(dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k) - - dd->ipath_lastport_piobuf); - } + if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) + ipath_cancel_sends(dd); ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl | which); @@ -1967,17 +1990,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ udelay(5); - /* - * abort any armed or launched PIO buffers that didn't go. (self - * clearing). Will cause any packet currently being transmitted to - * go out with an EBP, and may also cause a short packet error on - * the receiver. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_cancel_sends(dd); /* disable IBC */ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index d8ac9f18bf49..34d159ad97b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -509,6 +509,13 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (!hwerrs) { ipath_dbg("Clearing freezemode on ignored or " "recovered hardware error\n"); + /* + * clear all sends, becauase they have may been + * completed by usercode while in freeze mode, and + * therefore would not be sent, and eventually + * might cause the process to run out of bufs + */ + ipath_cancel_sends(dd); ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, ctrl); @@ -1566,11 +1573,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) writel(16, piobuf); piobuf += pioincr; } - /* - * self-clearing - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); ipath_get_eeprom_info(dd); if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && @@ -1599,7 +1601,6 @@ static int ipath_ht_txe_recover(struct ipath_devdata *dd) } dev_info(&dd->pcidev->dev, "Recovering from TXE PIO parity error\n"); - ipath_disarm_senderrbufs(dd, 1); return 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index b931057bb3e8..0c34555d4979 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -430,8 +430,19 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, *dd->ipath_statusp |= IPATH_STATUS_HWERROR; dd->ipath_flags &= ~IPATH_INITTED; } else { - ipath_dbg("Clearing freezemode on ignored hardware " - "error\n"); + static u32 freeze_cnt; + + freeze_cnt++; + ipath_dbg("Clearing freezemode on ignored or recovered " + "hardware error (%u)\n", freeze_cnt); + /* + * clear all sends, becauase they have may been + * completed by usercode while in freeze mode, and + * therefore would not be sent, and eventually + * might cause the process to run out of bufs + */ + ipath_cancel_sends(dd); + ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control); } @@ -1371,7 +1382,6 @@ static int ipath_pe_txe_recover(struct ipath_devdata *dd) dev_info(&dd->pcidev->dev, "Recovering from TXE PIO parity error\n"); } - ipath_disarm_senderrbufs(dd, 1); return 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 9f611553ffb3..5193d6945caa 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -777,6 +777,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) piobufs, dd->ipath_pbufsport, uports); dd->ipath_f_early_init(dd); + /* + * cancel any possible active sends from early driver load. + * Follows early_init because some chips have to initialize + * PIO buffers in early_init to avoid false parity errors. + */ + ipath_cancel_sends(dd); /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be * done after early_init */ diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index d9cdd00c8233..948091f7d5ac 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -93,7 +93,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { int i; - if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) { + if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && + dd->ipath_lastcancel > jiffies) { __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, "SendbufErrs %lx %lx", sbuf[0], sbuf[1]); @@ -108,7 +109,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) ipath_clrpiobuf(dd, i); ipath_disarm_piobufs(dd, i, 1); } - dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ + /* ignore armlaunch errs for a bit */ + dd->ipath_lastcancel = jiffies+3; } } @@ -290,12 +292,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, * Flush all queued sends when link went to DOWN or INIT, * to be sure that they don't block SMA and other MAD packets */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - (unsigned)(dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k) - - dd->ipath_lastport_piobuf); + ipath_cancel_sends(dd); } else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || lstate == IPATH_IBSTATE_ACTIVE) { diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 2a4414b948ee..2e85aeca5eac 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -676,6 +676,7 @@ int ipath_unordered_wc(void); void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, unsigned cnt); +void ipath_cancel_sends(struct ipath_devdata *); int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); From 06ee109002672ac875558ec699b53cf08a865bd3 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:42 -0700 Subject: [PATCH 21/76] IB/ipath: Fix RDMA read retry code A RDMA read response or atomic response can ACK earlier sends and RDMA writes. In this case, the wrong work request pointer was being used to store the read first response or atomic result. Also, if a RDMA read request is retried, the code to compute which request to resend was incorrect. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 57 +++++++++++++++++--------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 9ba80d107dcd..014d811d222d 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -806,13 +806,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn) * Called at interrupt level with the QP s_lock held and interrupts disabled. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */ -static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) +static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, + u64 val) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; struct ipath_swqe *wqe; int ret = 0; u32 ack_psn; + int diff; /* * Remove the QP from the timeout queue (or RNR timeout queue). @@ -840,7 +842,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * The MSN might be for a later WQE than the PSN indicates so * only complete WQEs that the PSN finishes. */ - while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) { + while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { + /* + * RDMA_READ_RESPONSE_ONLY is a special case since + * we want to generate completion events for everything + * before the RDMA read, copy the data, then generate + * the completion for the read. + */ + if (wqe->wr.opcode == IB_WR_RDMA_READ && + opcode == OP(RDMA_READ_RESPONSE_ONLY) && + diff == 0) { + ret = 1; + goto bail; + } /* * If this request is a RDMA read or atomic, and the ACK is * for a later operation, this ACK NAKs the RDMA read or @@ -851,12 +865,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * is sent but before the response is received. */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && - (opcode != OP(RDMA_READ_RESPONSE_LAST) || - ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || + (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && - (opcode != OP(ATOMIC_ACKNOWLEDGE) || - ipath_cmp24(wqe->psn, psn) != 0))) { + (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { /* * The last valid PSN seen is the previous * request's. @@ -870,6 +882,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ goto bail; } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + *(u64 *) wqe->sg_list[0].vaddr = val; if (qp->s_num_rd_atomic && (wqe->wr.opcode == IB_WR_RDMA_READ || wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || @@ -1079,6 +1094,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, int diff; u32 pad; u32 aeth; + u64 val; spin_lock_irqsave(&qp->s_lock, flags); @@ -1118,8 +1134,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, data += sizeof(__be32); } if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - u64 val; - if (!header_in_data) { __be32 *p = ohdr->u.at.atomic_ack_eth; @@ -1127,12 +1141,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, be32_to_cpu(p[1]); } else val = be64_to_cpu(((__be64 *) data)[0]); - *(u64 *) wqe->sg_list[0].vaddr = val; - } - if (!do_rc_ack(qp, aeth, psn, opcode) || + } else + val = 0; + if (!do_rc_ack(qp, aeth, psn, opcode, val) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; hdrsize += 4; + wqe = get_swqe_ptr(qp, qp->s_last); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* @@ -1176,13 +1191,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto bail; case OP(RDMA_READ_RESPONSE_ONLY): - if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { - dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (!header_in_data) + aeth = be32_to_cpu(ohdr->u.aeth); + else + aeth = be32_to_cpu(((__be32 *) data)[0]); + if (!do_rc_ack(qp, aeth, psn, opcode, 0)) goto ack_done; - } - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; /* @@ -1197,6 +1211,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, * have to be careful to copy the data to the right * location. */ + wqe = get_swqe_ptr(qp, qp->s_last); qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, wqe, psn, pmtu); goto read_last; @@ -1230,7 +1245,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, data += sizeof(__be32); } ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); - (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); + (void) do_rc_ack(qp, aeth, psn, + OP(RDMA_READ_RESPONSE_LAST), 0); goto ack_done; } @@ -1344,8 +1360,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, e = NULL; break; } - if (ipath_cmp24(psn, e->psn) >= 0) + if (ipath_cmp24(psn, e->psn) >= 0) { + if (prev == qp->s_tail_ack_queue) + old_req = 0; break; + } } switch (opcode) { case OP(RDMA_READ_REQUEST): { From db5518cd09c21f0fa70af0a4ca38badd90622c9e Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:43 -0700 Subject: [PATCH 22/76] IB/ipath: Wait for PIO available interrupt The send function is called when posting new send work requests. There is no point in trying to send a packet if the QP is already waiting for a HW send buffer so don't clear the busy bit until the buffer available interrupt happens. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_ruc.c | 6 ++---- drivers/infiniband/hw/ipath/ipath_verbs.c | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 8c5d20a4b5f3..837118676cc7 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -503,11 +503,9 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) * could be called. If we are still in the tasklet function, * tasklet_hi_schedule() will not call us until the next time * tasklet_hi_schedule() is called. - * We clear the tasklet flag now since we are committing to return - * from the tasklet function. + * We leave the busy flag set so that another post send doesn't + * try to put the same QP on the piowait list again. */ - clear_bit(IPATH_S_BUSY, &qp->s_busy); - tasklet_unlock(&qp->s_task); want_buffer(dev->dd); dev->n_piowait++; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 04294ca0a81f..cd79a8ca5104 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -948,6 +948,7 @@ int ipath_ib_piobufavail(struct ipath_ibdev *dev) qp = list_entry(dev->piowait.next, struct ipath_qp, piowait); list_del_init(&qp->piowait); + clear_bit(IPATH_S_BUSY, &qp->s_busy); tasklet_hi_schedule(&qp->s_task); } spin_unlock_irqrestore(&dev->pending_lock, flags); From 30d149ab58cc3ed8e4bc9c4dc45bebbed0e84b6e Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:44 -0700 Subject: [PATCH 23/76] IB/ipath: Fix possible data corruption if multiple SGEs used for receive The code to copy data from the receive queue buffers to the IB SGEs doesn't check the SGE length, only the memory region/page length when copying data. This could overwrite parts of the user's memory that were not intended to be written. It can only happen if multiple SGEs are used to describe a receive buffer which almost never happens in practice. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_ruc.c | 2 ++ drivers/infiniband/hw/ipath/ipath_ud.c | 2 ++ drivers/infiniband/hw/ipath/ipath_verbs.c | 8 ++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 837118676cc7..1b0e62b761bb 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -396,6 +396,8 @@ again: if (len > sge->length) len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; BUG_ON(len == 0); ipath_copy_sge(&qp->r_sge, sge->vaddr, len); sge->vaddr += len; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index a518f7c8fa83..0b5a6ac1bb83 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -231,6 +231,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, if (len > length) len = length; + if (len > sge->sge_length) + len = sge->sge_length; BUG_ON(len == 0); ipath_copy_sge(&rsge, sge->vaddr, len); sge->vaddr += len; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index cd79a8ca5104..35241b76d766 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -164,9 +164,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) while (length) { u32 len = sge->length; - BUG_ON(len == 0); if (len > length) len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); memcpy(sge->vaddr, data, len); sge->vaddr += len; sge->length -= len; @@ -202,9 +204,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) while (length) { u32 len = sge->length; - BUG_ON(len == 0); if (len > length) len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; From d781b129f1e8b3e2f369d8035a61a5233832e65c Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 18 Jun 2007 14:24:44 -0700 Subject: [PATCH 24/76] IB/ipath: Duplicate RDMA reads can cause responder to NAK inappropriately A duplicate RDMA read request can fool the responder into NAKing a new RDMA read request because the responder wasn't keeping track of whether the queue of RDMA read requests had been sent at least once. For example, requester sends 4 2K byte RDMA read requests, times out, and resends the first, then sees the 4 responses, then sends a 5th RDMA read or atomic operation. The responder sees the 4 requests, sends 4 responses, sees the resent 1st request, rewinds the queue, then sees the 5th request but thinks the queue is full and that the requester is invalidly sending a 5th new request. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 38 ++++++++++++++++++++--- drivers/infiniband/hw/ipath/ipath_verbs.h | 1 + 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 014d811d222d..9e7123987ae6 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else + } else { qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + e->sent = 1; + } ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_rdma_psn = e->psn; @@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, cpu_to_be32(e->atomic_data); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn; + e->sent = 1; } bth0 = qp->s_ack_state << 24; break; @@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; } bth0 = qp->s_ack_state << 24; bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; @@ -1479,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) spin_unlock_irqrestore(&qp->s_lock, flags); } +static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) +{ + unsigned long flags; + unsigned next; + + next = n + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + if (n == qp->s_tail_ack_queue) { + qp->s_tail_ack_queue = next; + qp->s_ack_state = OP(ACKNOWLEDGE); + } + spin_unlock_irqrestore(&qp->s_lock, flags); +} + /** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on @@ -1741,8 +1761,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - if (unlikely(next == qp->s_tail_ack_queue)) - goto nack_inv; + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv; + ipath_update_ack_queue(qp, next); + } e = &qp->s_ack_queue[qp->r_head_ack_queue]; /* RETH comes after BTH */ if (!header_in_data) @@ -1777,6 +1800,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, e->rdma_sge.sge.sge_length = 0; } e->opcode = opcode; + e->sent = 0; e->psn = psn; /* * We need to increment the MSN here instead of when we @@ -1812,8 +1836,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - if (unlikely(next == qp->s_tail_ack_queue)) - goto nack_inv; + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv; + ipath_update_ack_queue(qp, next); + } if (!header_in_data) ateth = &ohdr->u.atomic_eth; else @@ -1838,6 +1865,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, be64_to_cpu(ateth->compare_data), sdata); e->opcode = opcode; + e->sent = 0; e->psn = psn & IPATH_PSN_MASK; qp->r_msn++; qp->r_psn++; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 088b837ebea8..458f8227fa36 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -321,6 +321,7 @@ struct ipath_sge_state { */ struct ipath_ack_entry { u8 opcode; + u8 sent; u32 psn; union { struct ipath_sge_state rdma_sge; From 0df6291c8af2778d05f278d5738eef2c8fafa2dd Mon Sep 17 00:00:00 2001 From: Mark Debbage Date: Mon, 18 Jun 2007 14:24:45 -0700 Subject: [PATCH 25/76] IB/ipath: Correct checking of swminor version field when using subports When subports are required to run a program, this patch checks that the driver and the userspace library have compatible subport implementations. This is achieved through checks on the swminor version field built into the driver and userspace library. Bad combinations are reported through syslog and result in an error when opening the port. Signed-off-by: Mark Debbage Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_file_ops.c | 64 +++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 931802b55cc5..fc83f40a933f 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1403,6 +1403,38 @@ bail: return pollflag; } +static int ipath_supports_subports(int user_swmajor, int user_swminor) +{ + /* no subport implementation prior to software version 1.3 */ + return (user_swmajor > 1) || (user_swminor >= 3); +} + +static int ipath_compatible_subports(int user_swmajor, int user_swminor) +{ + /* this code is written long-hand for clarity */ + if (IPATH_USER_SWMAJOR != user_swmajor) { + /* no promise of compatibility if major mismatch */ + return 0; + } + if (IPATH_USER_SWMAJOR == 1) { + switch (IPATH_USER_SWMINOR) { + case 0: + case 1: + case 2: + /* no subport implementation so cannot be compatible */ + return 0; + case 3: + /* 3 is only compatible with itself */ + return user_swminor == 3; + default: + /* >= 4 are compatible (or are expected to be) */ + return user_swminor >= 4; + } + } + /* make no promises yet for future major versions */ + return 0; +} + static int init_subports(struct ipath_devdata *dd, struct ipath_portdata *pd, const struct ipath_user_info *uinfo) @@ -1418,14 +1450,26 @@ static int init_subports(struct ipath_devdata *dd, if (uinfo->spu_subport_cnt <= 1) goto bail; - /* Old user binaries don't know about new subport implementation */ - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) { + /* Self-consistency check for ipath_compatible_subports() */ + if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && + !ipath_compatible_subports(IPATH_USER_SWMAJOR, + IPATH_USER_SWMINOR)) { dev_info(&dd->pcidev->dev, - "Mismatched user minor version (%d) and driver " - "minor version (%d) while port sharing. Ensure " + "Inconsistent ipath_compatible_subports()\n"); + goto bail; + } + + /* Check for subport compatibility */ + if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, + uinfo->spu_userversion & 0xffff)) { + dev_info(&dd->pcidev->dev, + "Mismatched user version (%d.%d) and driver " + "version (%d.%d) while port sharing. Ensure " "that driver and library are from the same " "release.\n", + (int) (uinfo->spu_userversion >> 16), (int) (uinfo->spu_userversion & 0xffff), + IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR); goto bail; } @@ -1729,14 +1773,13 @@ static int ipath_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } - /* Get port early, so can set affinity prior to memory allocation */ static int ipath_assign_port(struct file *fp, const struct ipath_user_info *uinfo) { int ret; int i_minor; - unsigned swminor; + unsigned swmajor, swminor; /* Check to be sure we haven't already initialized this file */ if (port_fp(fp)) { @@ -1745,7 +1788,8 @@ static int ipath_assign_port(struct file *fp, } /* for now, if major version is different, bail */ - if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { + swmajor = uinfo->spu_userversion >> 16; + if (swmajor != IPATH_USER_SWMAJOR) { ipath_dbg("User major version %d not same as driver " "major %d\n", uinfo->spu_userversion >> 16, IPATH_USER_SWMAJOR); @@ -1760,7 +1804,8 @@ static int ipath_assign_port(struct file *fp, mutex_lock(&ipath_mutex); - if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && + if (ipath_compatible_subports(swmajor, swminor) && + uinfo->spu_subport_cnt && (ret = find_shared_port(fp, uinfo))) { mutex_unlock(&ipath_mutex); if (ret > 0) @@ -2024,7 +2069,8 @@ static int ipath_port_info(struct ipath_portdata *pd, u16 subport, info.port = pd->port_port; info.subport = subport; /* Don't return new fields if old library opened the port. */ - if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { + if (ipath_supports_subports(pd->userversion >> 16, + pd->userversion & 0xffff)) { /* Number of user ports available for this device. */ info.num_ports = pd->port_dd->ipath_cfgports - 1; info.num_subports = pd->port_subport_cnt; From bacf4013530e7fc230a8aa0c6ea3c17fc2f47665 Mon Sep 17 00:00:00 2001 From: Mark Debbage Date: Mon, 18 Jun 2007 14:24:46 -0700 Subject: [PATCH 26/76] IB/ipath: Make handling of one subport consistent Previously the driver and userspace code handled the case of 1 subport somewhat inconsistently. The new interpretation of this situation is that if one subport is requested, the driver turns on the subport mechanism and arranges for the port to be "shared" by one process. In normal use the userspace library does not use this configuration and instead arranges for the port not to be shared at all. This particular idiom can be useful for testing purposes. Signed-off-by: Mark Debbage Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_file_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index fc83f40a933f..a47479608f48 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1444,10 +1444,10 @@ static int init_subports(struct ipath_devdata *dd, size_t size; /* - * If the user is requesting zero or one port, + * If the user is requesting zero subports, * skip the subport allocation. */ - if (uinfo->spu_subport_cnt <= 1) + if (uinfo->spu_subport_cnt <= 0) goto bail; /* Self-consistency check for ipath_compatible_subports() */ From e8e7ad711509f576b1bffd92c3ae4672fe92ec48 Mon Sep 17 00:00:00 2001 From: Michael Albaugh Date: Mon, 18 Jun 2007 14:24:47 -0700 Subject: [PATCH 27/76] IB/ipath: Add capability to modify PBC word During compliance testing and when debugging some interconnect issues, it is very useful to be able to send malformed packets, without having the device signal them as malformed (drop, or terminate with EBP). The hardware supports this, but the driver "diagnostic packet" interface did not. Extend capability to send specific malformed packets for testing. Signed-off-by: Michael Albaugh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 19 ++++++++++- drivers/infiniband/hw/ipath/ipath_diag.c | 39 +++++++++++++++++++--- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 12e1349cd03e..f70788c25ea6 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -501,13 +501,30 @@ struct __ipath_sendpkt { struct ipath_iovec sps_iov[4]; }; -/* Passed into diag data special file's ->write method. */ +/* + * diagnostics can send a packet by "writing" one of the following + * two structs to diag data special file + * The first is the legacy version for backward compatibility + */ struct ipath_diag_pkt { __u32 unit; __u64 data; __u32 len; }; +/* The second diag_pkt struct is the expanded version that allows + * more control over the packet, specifically, by allowing a custom + * pbc (+ extra) qword, so that special modes and deliberate + * changes to CRCs can be used. The elements were also re-ordered + * for better alignment and to avoid padding issues. + */ +struct ipath_diag_xpkt { + __u64 data; + __u64 pbc_wd; + __u32 unit; + __u32 len; +}; + /* * Data layout in I2C flash (for GUID, etc.) * All fields are little-endian binary unless otherwise stated diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 63e8368b0e95..aab21c1b822d 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -323,13 +323,14 @@ static ssize_t ipath_diagpkt_write(struct file *fp, { u32 __iomem *piobuf; u32 plen, clen, pbufn; - struct ipath_diag_pkt dp; + struct ipath_diag_pkt odp; + struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; struct ipath_devdata *dd; ssize_t ret = 0; u64 val; - if (count < sizeof(dp)) { + if (count != sizeof(dp)) { ret = -EINVAL; goto bail; } @@ -339,6 +340,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } + /* + * Due to padding/alignment issues (lessened with new struct) + * the old and new structs are the same length. We need to + * disambiguate them, which we can do because odp.len has never + * been less than the total of LRH+BTH+DETH so far, while + * dp.unit (same offset) unit is unlikely to get that high. + * Similarly, dp.data, the pointer to user at the same offset + * as odp.unit, is almost certainly at least one (512byte)page + * "above" NULL. The if-block below can be omitted if compatibility + * between a new driver and older diagnostic code is unimportant. + * compatibility the other direction (new diags, old driver) is + * handled in the diagnostic code, with a warning. + */ + if (dp.unit >= 20 && dp.data < 512) { + /* very probable version mismatch. Fix it up */ + memcpy(&odp, &dp, sizeof(odp)); + /* We got a legacy dp, copy elements to dp */ + dp.unit = odp.unit; + dp.data = odp.data; + dp.len = odp.len; + dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ + } + /* send count must be an exact number of dwords */ if (dp.len & 3) { ret = -EINVAL; @@ -371,9 +395,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp, ret = -ENODEV; goto bail; } + /* Check link state, but not if we have custom PBC */ val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; - if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM && - val != IPATH_IBSTATE_ACTIVE) { + if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT && + val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) { ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", dd->ipath_unit, (unsigned long long) val); ret = -EINVAL; @@ -419,9 +444,13 @@ static ssize_t ipath_diagpkt_write(struct file *fp, ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", dd->ipath_unit, plen - 1, pbufn); + if (dp.pbc_wd == 0) + /* Legacy operation, use computed pbc_wd */ + dp.pbc_wd = plen; + /* we have to flush after the PBC for correctness on some cpus * or WC buffer can be written out of order */ - writeq(plen, piobuf); + writeq(dp.pbc_wd, piobuf); ipath_flush_wc(); /* copy all by the trigger word, then flush, so it's written * to chip before trigger word, then write trigger word, then From b506e1dc59726a1c608f26e7294b9fe186255139 Mon Sep 17 00:00:00 2001 From: Robert Walsh Date: Mon, 18 Jun 2007 14:24:48 -0700 Subject: [PATCH 28/76] IB/ipath: Send ACK invalid where appropriate The IB specification ch. 9.9.3 table 58 says that a QP which isn't set up for the operation should return a NAK invalid request. Signed-off-by: Robert Walsh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 13 +++++++------ drivers/infiniband/hw/ipath/ipath_ruc.c | 22 ++++++++++++++++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 9e7123987ae6..6423d9ef4175 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1711,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) + goto nack_inv; /* consume RWQE */ /* RETH comes after BTH */ if (!header_in_data) @@ -1740,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_sge.sge.length = 0; qp->r_sge.sge.sge_length = 0; } - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) - goto nack_acc; if (opcode == OP(RDMA_WRITE_FIRST)) goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) @@ -1756,8 +1756,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, u32 len; u8 next; - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) - goto nack_acc; + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_READ))) + goto nack_inv; next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; @@ -1832,7 +1833,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_inv; next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 1b0e62b761bb..d47ad7c8a0d2 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -319,12 +319,22 @@ again: break; case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = wqe->wr.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, @@ -354,8 +364,10 @@ again: case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto acc_err; + IB_ACCESS_REMOTE_READ))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, @@ -369,8 +381,10 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) - goto acc_err; + IB_ACCESS_REMOTE_ATOMIC))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, From f2d042313e420002b06715675963cfab48ed2597 Mon Sep 17 00:00:00 2001 From: Robert Walsh Date: Mon, 18 Jun 2007 14:24:49 -0700 Subject: [PATCH 29/76] IB/ipath: ipath_poll fixups and enhancements Fix ipath_poll and enhance it so we can poll for urgent packets or regular packets and receive notifications of when a header queue overflows. Signed-off-by: Robert Walsh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 11 +- drivers/infiniband/hw/ipath/ipath_file_ops.c | 143 ++++++++++++------- drivers/infiniband/hw/ipath/ipath_intr.c | 38 +++-- drivers/infiniband/hw/ipath/ipath_kernel.h | 8 ++ 4 files changed, 140 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index f70788c25ea6..b4b786d0dfca 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -431,8 +431,15 @@ struct ipath_user_info { #define IPATH_CMD_UNUSED_1 25 #define IPATH_CMD_UNUSED_2 26 #define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ +#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ -#define IPATH_CMD_MAX 27 +#define IPATH_CMD_MAX 28 + +/* + * Poll types + */ +#define IPATH_POLL_TYPE_URGENT 0x01 +#define IPATH_POLL_TYPE_OVERFLOW 0x02 struct ipath_port_info { __u32 num_active; /* number of active units */ @@ -473,6 +480,8 @@ struct ipath_cmd { __u16 part_key; /* user address of __u32 bitmask of active slaves */ __u64 slave_mask_addr; + /* type of polling we want */ + __u16 poll_type; } cmd; }; diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index a47479608f48..33ab0d6b80ff 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1341,65 +1341,98 @@ bail: return ret; } +static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, + struct file *fp, + struct poll_table_struct *pt) +{ + unsigned pollflag = 0; + struct ipath_devdata *dd; + + dd = pd->port_dd; + + if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) { + pollflag |= POLLERR; + clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag); + } + + if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) { + pollflag |= POLLIN | POLLRDNORM; + clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag); + } + + if (!pollflag) { + set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); + if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) + set_bit(IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag); + + poll_wait(fp, &pd->port_wait, pt); + } + + return pollflag; +} + +static unsigned int ipath_poll_next(struct ipath_portdata *pd, + struct file *fp, + struct poll_table_struct *pt) +{ + u32 head, tail; + unsigned pollflag = 0; + struct ipath_devdata *dd; + + dd = pd->port_dd; + + head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); + tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr; + + if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) { + pollflag |= POLLERR; + clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag); + } + + if (tail != head || + test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) { + pollflag |= POLLIN | POLLRDNORM; + clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag); + } + + if (!pollflag) { + set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); + if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) + set_bit(IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag); + + set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, + &dd->ipath_rcvctrl); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ + ipath_write_ureg(dd, ur_rcvhdrhead, + dd->ipath_rhdrhead_intr_off | head, + pd->port_port); + + poll_wait(fp, &pd->port_wait, pt); + } + + return pollflag; +} + static unsigned int ipath_poll(struct file *fp, struct poll_table_struct *pt) { struct ipath_portdata *pd; - u32 head, tail; - int bit; - unsigned pollflag = 0; - struct ipath_devdata *dd; + unsigned pollflag; pd = port_fp(fp); if (!pd) - goto bail; - dd = pd->port_dd; + pollflag = 0; + else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) + pollflag = ipath_poll_urgent(pd, fp, pt); + else + pollflag = ipath_poll_next(pd, fp, pt); - bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; - set_bit(bit, &dd->ipath_rcvctrl); - - /* - * Before blocking, make sure that head is still == tail, - * reading from the chip, so we can be sure the interrupt - * enable has made it to the chip. If not equal, disable - * interrupt again and return immediately. This avoids races, - * and the overhead of the chip read doesn't matter much at - * this point, since we are waiting for something anyway. - */ - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); - tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); - - if (tail == head) { - set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ - (void)ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off - | head, pd->port_port); - poll_wait(fp, &pd->port_wait, pt); - - if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - /* timed out, no packets received */ - clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - pd->port_rcvwait_to++; - } - else - pollflag = POLLIN | POLLRDNORM; - } - else { - /* it's already happened; don't do wait_event overhead */ - pollflag = POLLIN | POLLRDNORM; - pd->port_rcvnowait++; - } - - clear_bit(bit, &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - -bail: return pollflag; } @@ -2173,6 +2206,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, src = NULL; dest = NULL; break; + case IPATH_CMD_POLL_TYPE: + copy = sizeof(cmd.cmd.poll_type); + dest = &cmd.cmd.poll_type; + src = &ucmd->cmd.poll_type; + break; default: ret = -EINVAL; goto bail; @@ -2245,6 +2283,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, case IPATH_CMD_PIOAVAILUPD: ret = ipath_force_pio_avail_update(pd->port_dd); break; + case IPATH_CMD_POLL_TYPE: + pd->poll_type = cmd.cmd.poll_type; + break; } if (ret >= 0) diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 948091f7d5ac..f8aac8e932f5 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -680,6 +680,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) chkerrpkts = 1; dd->ipath_lastrcvhdrqtails[i] = tl; pd->port_hdrqfull++; + if (test_bit(IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag)) { + clear_bit( + IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag); + set_bit( + IPATH_PORT_WAITING_OVERFLOW, + &pd->int_flag); + wake_up_interruptible( + &pd->port_wait); + } } } } @@ -877,14 +888,25 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) dd->ipath_i_rcvurg_mask); for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; - if (portr & (1 << i) && pd && pd->port_cnt && - test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - clear_bit(IPATH_PORT_WAITING_RCV, - &pd->port_flag); - clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, - &dd->ipath_rcvctrl); - wake_up_interruptible(&pd->port_wait); - rcvdint = 1; + if (portr & (1 << i) && pd && pd->port_cnt) { + if (test_bit(IPATH_PORT_WAITING_RCV, + &pd->port_flag)) { + clear_bit(IPATH_PORT_WAITING_RCV, + &pd->port_flag); + set_bit(IPATH_PORT_WAITING_RCV, + &pd->int_flag); + clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, + &dd->ipath_rcvctrl); + wake_up_interruptible(&pd->port_wait); + rcvdint = 1; + } else if (test_bit(IPATH_PORT_WAITING_URG, + &pd->port_flag)) { + clear_bit(IPATH_PORT_WAITING_URG, + &pd->port_flag); + set_bit(IPATH_PORT_WAITING_URG, + &pd->int_flag); + wake_up_interruptible(&pd->port_wait); + } } } if (rcvdint) { diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 2e85aeca5eac..034c283990e8 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -127,6 +127,8 @@ struct ipath_portdata { u32 port_tidcursor; /* next expected TID to check */ unsigned long port_flag; + /* what happened */ + unsigned long int_flag; /* WAIT_RCV that timed out, no interrupt */ u32 port_rcvwait_to; /* WAIT_PIO that timed out, no interrupt */ @@ -155,6 +157,8 @@ struct ipath_portdata { u32 userversion; /* Bitmask of active slaves */ u32 active_slaves; + /* Type of packets or conditions we want to poll for */ + u16 poll_type; }; struct sk_buff; @@ -754,6 +758,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); #define IPATH_PORT_WAITING_PIO 3 /* master has not finished initializing */ #define IPATH_PORT_MASTER_UNINIT 4 + /* waiting for an urgent packet to arrive */ +#define IPATH_PORT_WAITING_URG 5 + /* waiting for a header overflow */ +#define IPATH_PORT_WAITING_OVERFLOW 6 /* free up any allocated data at closes */ void ipath_free_data(struct ipath_portdata *dd); From 991bda284dcbc1ed0522683320043ac41d70c82c Mon Sep 17 00:00:00 2001 From: Robert Walsh Date: Mon, 4 Jun 2007 09:55:48 -0700 Subject: [PATCH 30/76] IB/ipath: Clean send flags properly on QP reset Signed-off-by: Robert Walsh Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 9e07abba8aa7..bfd39c933b29 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -336,7 +336,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->qkey = 0; qp->qp_access_flags = 0; qp->s_busy = 0; - qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR; + qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_psn = 0; qp->r_psn = 0; From 87427da55bc03dbce7906a5b09ed50279d654d28 Mon Sep 17 00:00:00 2001 From: John Gregor Date: Mon, 11 Jun 2007 10:21:14 -0700 Subject: [PATCH 31/76] IB/ipath: Update copyright dates Now that it's June, it's about time to update the copyright notices of files that have changed. Signed-off-by: John Gregor Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_cq.c | 2 +- drivers/infiniband/hw/ipath/ipath_debug.h | 2 +- drivers/infiniband/hw/ipath/ipath_diag.c | 2 +- drivers/infiniband/hw/ipath/ipath_driver.c | 2 +- drivers/infiniband/hw/ipath/ipath_eeprom.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/ipath/ipath_iba6110.c | 2 +- drivers/infiniband/hw/ipath/ipath_iba6120.c | 2 +- drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 +- drivers/infiniband/hw/ipath/ipath_intr.c | 2 +- drivers/infiniband/hw/ipath/ipath_kernel.h | 2 +- drivers/infiniband/hw/ipath/ipath_keys.c | 2 +- drivers/infiniband/hw/ipath/ipath_layer.c | 2 +- drivers/infiniband/hw/ipath/ipath_layer.h | 2 +- drivers/infiniband/hw/ipath/ipath_mad.c | 2 +- drivers/infiniband/hw/ipath/ipath_mmap.c | 2 +- drivers/infiniband/hw/ipath/ipath_mr.c | 2 +- drivers/infiniband/hw/ipath/ipath_qp.c | 2 +- drivers/infiniband/hw/ipath/ipath_rc.c | 2 +- drivers/infiniband/hw/ipath/ipath_registers.h | 2 +- drivers/infiniband/hw/ipath/ipath_ruc.c | 2 +- drivers/infiniband/hw/ipath/ipath_srq.c | 2 +- drivers/infiniband/hw/ipath/ipath_stats.c | 2 +- drivers/infiniband/hw/ipath/ipath_sysfs.c | 2 +- drivers/infiniband/hw/ipath/ipath_uc.c | 2 +- drivers/infiniband/hw/ipath/ipath_ud.c | 2 +- drivers/infiniband/hw/ipath/ipath_user_pages.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.h | 2 +- drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 2 +- drivers/infiniband/hw/ipath/ipath_wc_ppc64.c | 2 +- drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 2 +- 32 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 3e9241badba0..9014ef63eedc 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index 42bfbdb0d3e6..19c56e6491eb 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index aab21c1b822d..a698f1949d10 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 8b611796f33e..c40a542bebec 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 9be1b9ac55f0..6b9147964a4f 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 40cf1bc90d74..2e689b974e1f 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 34d159ad97b4..87b18e928c79 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 0c34555d4979..e67e4a89fcc4 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 5193d6945caa..1b1af349f194 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index f8aac8e932f5..e86a23a05919 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 034c283990e8..f1f812759ee3 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -1,7 +1,7 @@ #ifndef _IPATH_KERNEL_H #define _IPATH_KERNEL_H /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index dd487c100f5b..85a4aefc6c03 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index 05a1d2b01d9d..82616b779e24 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 3854a4eae684..415709c4d85b 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 2e9e161bfd0a..2aaa0297b20d 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c index 937bc3396b53..fa830e22002f 100644 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index bdeef8d4f279..e442470a2375 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index bfd39c933b29..d317b81360b2 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 6423d9ef4175..46744ea2babd 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index c182bcd62098..708eba3165d7 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index d47ad7c8a0d2..854deb56ac02 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 03acae66ba81..14cbbd633d34 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 2955f368de0c..73ed17d03188 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index ab34d3e8b955..16238cd3a036 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 243d7c61e18d..8380fbc50d2c 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 0b5a6ac1bb83..38ba771b3efe 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 8536aeb96af8..27034d38b3dd 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 35241b76d766..0aecded6af86 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 458f8227fa36..f3d1f2cee6f8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index dd691cfa5079..9e5abf9c309d 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c index 0095bb70f34e..1d7bd82a1fb1 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index 9f409fd928fb..3428acb0868c 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two From 149983af609e8f5c57157467baf8545d17b8a6a1 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 26 Jun 2007 15:55:28 +0300 Subject: [PATCH 32/76] mlx4_core: Get the maximum message size from reported device capabilities Get the maximum message size from the device capabilities returned from the QUERY_DEV_CAP firmware command, rather than hard-coding 2 GB. Signed-off-by: Dotan Barak Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/mlx4/fw.c | 3 +++ drivers/net/mlx4/fw.h | 1 + drivers/net/mlx4/main.c | 1 + include/linux/mlx4/device.h | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c591616dccde..2fc8ccebaac1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -169,7 +169,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; - props->max_msg_sz = 0x80000000; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index d2b065351e45..c45cbe43a0c4 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -138,6 +138,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 #define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 +#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f @@ -220,6 +221,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->local_ca_ack_delay = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->num_ports = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); + dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 296254ac27c1..7e1dd9e25cfb 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -60,6 +60,7 @@ struct mlx4_dev_cap { int max_rdma_global; int local_ca_ack_delay; int num_ports; + u32 max_msg_sz; int max_mtu[MLX4_MAX_PORTS + 1]; int max_port_width[MLX4_MAX_PORTS + 1]; int max_vl[MLX4_MAX_PORTS + 1]; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index c3da2a2f5431..a4f2e0475a71 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -154,6 +154,7 @@ static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev dev->caps.reserved_uars = dev_cap->reserved_uars; dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; dev->caps.stat_rate_support = dev_cap->stat_rate_support; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b372f5910fc1..8209387ee854 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -172,6 +172,7 @@ struct mlx4_caps { int num_pds; int reserved_pds; int mtt_entry_sz; + u32 max_msg_sz; u32 page_size_cap; u32 flags; u16 stat_rate_support; From de3d353072f9342f04112ba0504c3e294220cb8f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 May 2007 13:27:27 -0500 Subject: [PATCH 33/76] RDMA/cxgb3: Streaming -> RDMA mode transition fixes Due to a HW issue, our current scheme to transition the connection from streaming to rdma mode is broken on the passive side. The firmware and driver now support a new transition scheme for the passive side: - driver posts rdma_init_wr (now including the initial receive seqno) - driver posts last streaming message via TX_DATA message (MPA start response) - uP atomically sends the last streaming message and transitions the tcb to rdma mode. - driver waits for wr_ack indicating the last streaming message was ACKed. NOTE: This change also bumps the required firmware version to 4.3. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 2 +- drivers/infiniband/hw/cxgb3/cxio_wr.h | 3 +- drivers/infiniband/hw/cxgb3/iwch_cm.c | 82 ++++++++++---------------- drivers/infiniband/hw/cxgb3/iwch_cm.h | 1 + drivers/infiniband/hw/cxgb3/iwch_qp.c | 1 + drivers/net/cxgb3/version.h | 2 +- 6 files changed, 38 insertions(+), 53 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 76049afc7655..215bbe51047a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -833,7 +833,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->ird = cpu_to_be32(attr->ird); wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->rsvd = 0; + wqe->irs = cpu_to_be32(attr->irs); skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); } diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index ff7290eacefb..c84d4ac49355 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -294,6 +294,7 @@ struct t3_rdma_init_attr { u64 qp_dma_addr; u32 qp_dma_size; u32 flags; + u32 irs; }; struct t3_rdma_init_wr { @@ -314,7 +315,7 @@ struct t3_rdma_init_wr { __be32 ird; __be64 qp_dma_addr; /* 7 */ __be32 qp_dma_size; /* 8 */ - u32 rsvd; + u32 irs; }; struct t3_genbit { diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index b2faff5abce8..7b8d5aaa2204 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -515,7 +515,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; @@ -566,7 +566,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) req->len = htonl(mpalen); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; @@ -618,7 +618,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); @@ -641,6 +641,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); set_emss(ep, ntohs(req->tcp_opt)); @@ -1023,6 +1024,9 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + ep->rcv_seq += dlen; + BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); + switch (state_read(&ep->com)) { case MPA_REQ_SENT: process_mpa_reply(ep, skb); @@ -1060,7 +1064,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); - enum iwch_qp_attr_mask mask; PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits); @@ -1072,30 +1075,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ep->mpa_skb = NULL; dst_confirm(ep->dst); if (state_read(&ep->com) == MPA_REP_SENT) { - struct iwch_qp_attributes attrs; - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ord; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - - if (!ep->com.rpl_err) { - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - } - ep->com.rpl_done = 1; PDBG("waking up ep %p\n", ep); wake_up(&ep->com.waitq); @@ -1378,6 +1357,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p\n", __FUNCTION__, ep); ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); set_emss(ep, ntohs(req->tcp_opt)); @@ -1732,10 +1712,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); + if (state_read(&ep->com) == DEAD) return -ECONNRESET; - } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); @@ -1755,17 +1733,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ird = conn_param->ird; ep->ord = conn_param->ord; PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord); + get_ep(&ep->com); - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) { - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); - put_ep(&ep->com); - return err; - } /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -1783,16 +1752,29 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); + if (err) + goto err; - if (err) { - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); - } else { - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - } + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err; + + /* wait for wr_ack */ + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (err) + goto err; + + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + put_ep(&ep->com); + return 0; +err: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); + abort_connection(ep, NULL, GFP_KERNEL); put_ep(&ep->com); return err; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 21a388c313cf..6107e7cd9b57 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -175,6 +175,7 @@ struct iwch_ep { unsigned int atid; u32 hwtid; u32 snd_seq; + u32 rcv_seq; struct l2t_entry *l2t; struct dst_entry *dst; struct sk_buff *mpa_skb; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 714dddbc9a98..679b7c179273 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -732,6 +732,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, init_attr.rq_addr, init_attr.rq_size, diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h index 8eddd23a3a51..eb508bf8022a 100644 --- a/drivers/net/cxgb3/version.h +++ b/drivers/net/cxgb3/version.h @@ -39,6 +39,6 @@ /* Firmware version */ #define FW_VERSION_MAJOR 4 -#define FW_VERSION_MINOR 1 +#define FW_VERSION_MINOR 3 #define FW_VERSION_MICRO 0 #endif /* __CHELSIO_VERSION_H */ From fb497d726612bb0e50240405aaabcf37f13901ed Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Jun 2007 09:27:48 -0500 Subject: [PATCH 34/76] RDMA/cxgb3: TERMINATE WRs can hang the tx ofld queue Don't set the gen bits nor length bits in the terminate WR. This is done by the LLD driver. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 679b7c179273..dd89b6b91f9c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -628,9 +628,9 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* immediate data starts here. */ term = (struct terminate_message *)wqe->send.sgl; build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - build_fw_riwrh((void *)wqe, T3_WR_SEND, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1, - qhp->ep->hwtid, 5); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | + V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); skb->priority = CPL_PRIORITY_DATA; return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); } From 1580367e7b2068d075cd42d04c4b8c274815e6fc Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Jun 2007 09:27:48 -0500 Subject: [PATCH 35/76] RDMA/cxgb3: Don't count neg_adv abort_req_rss messages as real aborts Negative advice messages should _not_ count toward the 2 abort requests needed to indicate an abort request. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 7b8d5aaa2204..4d7c277d7cc4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1465,6 +1465,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) int ret; int state; + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, + ep->hwtid); + t3_l2t_send_event(ep->com.tdev, ep->l2t); + return CPL_RET_BUF_DONE; + } + /* * We get 2 peer aborts from the HW. The first one must * be ignored except for scribbling that we need one more. @@ -1474,13 +1481,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - if (is_neg_adv_abort(req->status)) { - PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - state = state_read(&ep->com); PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state); switch (state) { From 6eda48d1e82f901d977a405e43a849dc79968d61 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Jun 2007 09:27:48 -0500 Subject: [PATCH 36/76] RDMA/cxgb3: ctrl-qp init/clear shouldn't set the gen bit Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 215bbe51047a..1518b41482ae 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -144,7 +144,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) } wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -548,7 +548,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1, + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, T3_CTL_QP_TID, 7); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; From 96d0e4931e264012f57a2ae8f7c4697bfa55386a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 21 Jun 2007 18:17:57 -0500 Subject: [PATCH 37/76] RDMA/cxgb3: Don't post TID_RELEASE message The LLD does this for us in cxgb3_remove_tid(). Also fixed active open failure cases where we also shouldn't be releasing the TID. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 4d7c277d7cc4..228721f23ce4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -254,8 +254,6 @@ static void release_ep_resources(struct iwch_ep *ep) cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); dst_release(ep->dst); l2t_release(L2DATA(ep->com.tdev), ep->l2t); - if (ep->com.tdev->type == T3B) - release_tid(ep->com.tdev, ep->hwtid, NULL); put_ep(&ep->com); } @@ -1103,6 +1101,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } +/* + * Return whether a failed active open has allocated a TID + */ +static inline int act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; @@ -1112,7 +1119,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) status2errno(rpl->status)); connect_reply_upcall(ep, status2errno(rpl->status)); state_set(&ep->com, DEAD); - if (ep->com.tdev->type == T3B) + if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status)) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); From ecc2f0060fa7ff2fc53864ee19e370e5ddd47d5e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 25 Jun 2007 12:46:00 -0500 Subject: [PATCH 38/76] RDMA/cxgb3: Don't abort after failures sending the mpa reply This bug results in an abort request being sent down _after_ the tid has been released. If the tid happens to have been reused, then the subsequent generation of the tid gets incorrectly aborted. The thread running iwch_accecpt_cr() must not abort a connection if an error is returned after being awakened. If any errors did occur while iwch_accept_cr() is blocked, then the connection has already been aborted on the thread processing the error. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 228721f23ce4..3b41dc0c39dd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1781,7 +1781,6 @@ err: ep->com.cm_id = NULL; ep->com.qp = NULL; cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); put_ep(&ep->com); return err; } From 6abb6ea80b6564c00906aa0727cf59d2be780d82 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 9 Jul 2007 20:12:26 -0700 Subject: [PATCH 39/76] RDMA/cxgb3: Check return of kmalloc() in iwch_register_device() Signed-off-by: WANG Cong [ Also remove cast from void * return of kmalloc() as suggested by Jesper Juhl . ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index e7c2c3948037..f0c777589374 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1163,9 +1163,10 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.iwcm = - (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs), - GFP_KERNEL); + dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; + dev->ibdev.iwcm->connect = iwch_connect; dev->ibdev.iwcm->accept = iwch_accept_cr; dev->ibdev.iwcm->reject = iwch_reject_cr; From 06cc85086e6896939f8c68f8518224748f6b0b2f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 23 May 2007 14:21:22 -0700 Subject: [PATCH 40/76] IB: Use menuconfig for InfiniBand menu Change Kconfig objects from "menu, config" into "menuconfig" so that the user can disable the whole feature without having to enter the menu first. Signed-off-by: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Roland Dreier --- drivers/infiniband/Kconfig | 15 +++++++-------- drivers/infiniband/hw/amso1100/Kconfig | 2 +- drivers/infiniband/hw/cxgb3/Kconfig | 2 +- drivers/infiniband/hw/ehca/Kconfig | 2 +- drivers/infiniband/hw/ipath/Kconfig | 2 +- drivers/infiniband/hw/mlx4/Kconfig | 1 - drivers/infiniband/hw/mthca/Kconfig | 2 +- drivers/infiniband/ulp/ipoib/Kconfig | 2 +- drivers/infiniband/ulp/iser/Kconfig | 2 +- drivers/infiniband/ulp/srp/Kconfig | 2 +- 10 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 994decc7bcf2..a193dfbf99d2 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -1,14 +1,14 @@ -menu "InfiniBand support" - depends on HAS_IOMEM - -config INFINIBAND - depends on PCI || BROKEN +menuconfig INFINIBAND tristate "InfiniBand support" + depends on PCI || BROKEN + depends on HAS_IOMEM ---help--- Core support for InfiniBand (IB). Make sure to also select any protocols you wish to use as well as drivers for your InfiniBand hardware. +if INFINIBAND + config INFINIBAND_USER_MAD tristate "InfiniBand userspace MAD support" depends on INFINIBAND @@ -20,7 +20,6 @@ config INFINIBAND_USER_MAD config INFINIBAND_USER_ACCESS tristate "InfiniBand userspace access (verbs and CM)" - depends on INFINIBAND ---help--- Userspace InfiniBand access support. This enables the kernel side of userspace verbs and the userspace @@ -37,7 +36,7 @@ config INFINIBAND_USER_MEM config INFINIBAND_ADDR_TRANS bool - depends on INFINIBAND && INET + depends on INET default y source "drivers/infiniband/hw/mthca/Kconfig" @@ -54,4 +53,4 @@ source "drivers/infiniband/ulp/srp/Kconfig" source "drivers/infiniband/ulp/iser/Kconfig" -endmenu +endif # INFINIBAND diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig index 809cb14ac6de..e6ce5f209e47 100644 --- a/drivers/infiniband/hw/amso1100/Kconfig +++ b/drivers/infiniband/hw/amso1100/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_AMSO1100 tristate "Ammasso 1100 HCA support" - depends on PCI && INET && INFINIBAND + depends on PCI && INET ---help--- This is a low-level driver for the Ammasso 1100 host channel adapter (HCA). diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 77977f55dca3..2acec3fadf69 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_CXGB3 tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 && INFINIBAND && INET + depends on CHELSIO_T3 && INET select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig index 1a854598e0e6..59f807d8d58e 100644 --- a/drivers/infiniband/hw/ehca/Kconfig +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_EHCA tristate "eHCA support" - depends on IBMEBUS && INFINIBAND + depends on IBMEBUS ---help--- This driver supports the IBM pSeries eHCA InfiniBand adapter. diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 90c14543677d..044da5828a78 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_IPATH tristate "QLogic InfiniPath Driver" - depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET + depends on (PCI_MSI || HT_IRQ) && 64BIT && NET ---help--- This is a driver for QLogic InfiniPath host channel adapters, including InfiniBand verbs support. This driver allows these diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig index b8912cdb9663..4175a4bd0c78 100644 --- a/drivers/infiniband/hw/mlx4/Kconfig +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -1,6 +1,5 @@ config MLX4_INFINIBAND tristate "Mellanox ConnectX HCA support" - depends on INFINIBAND select MLX4_CORE ---help--- This driver provides low-level InfiniBand support for diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig index 9aa5a4468a75..03efc074967e 100644 --- a/drivers/infiniband/hw/mthca/Kconfig +++ b/drivers/infiniband/hw/mthca/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_MTHCA tristate "Mellanox HCA support" - depends on PCI && INFINIBAND + depends on PCI ---help--- This is a low-level driver for Mellanox InfiniHost host channel adapters (HCAs), including the MT23108 PCI-X HCA diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index af78ccc4ce71..1f76bad020f3 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" - depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n) + depends on NETDEVICES && INET && (IPV6 || IPV6=n) ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index aecbb9083f0c..fe604c8d2996 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_ISER tristate "iSCSI Extensions for RDMA (iSER)" - depends on INFINIBAND && SCSI && INET + depends on SCSI && INET select SCSI_ISCSI_ATTRS ---help--- Support for the iSCSI Extensions for RDMA (iSER) Protocol diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig index 8fe3be4e9910..3432dce29520 100644 --- a/drivers/infiniband/ulp/srp/Kconfig +++ b/drivers/infiniband/ulp/srp/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRP tristate "InfiniBand SCSI RDMA Protocol" - depends on INFINIBAND && SCSI + depends on SCSI ---help--- Support for the SCSI RDMA Protocol over InfiniBand. This allows you to access storage devices that speak SRP over From 4fc570bcbe77f823aae183dd824869f79e74cc97 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 6 Jul 2007 12:48:23 -0700 Subject: [PATCH 41/76] IB/ipath: Add barrier before updating WC head in shared memory Add a barrier to make sure the CPU doesn't reorder writes to memory, since user programs can be polling on the head index update and the entry should be written before that. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_cq.c | 5 ++++- drivers/infiniband/hw/ipath/ipath_ruc.c | 2 ++ drivers/infiniband/hw/ipath/ipath_srq.c | 2 ++ drivers/infiniband/hw/ipath/ipath_ud.c | 2 ++ drivers/infiniband/hw/ipath/ipath_verbs.c | 2 ++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 9014ef63eedc..a6f04d27ec57 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -90,6 +90,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) wc->queue[head].sl = entry->sl; wc->queue[head].dlid_path_bits = entry->dlid_path_bits; wc->queue[head].port_num = entry->port_num; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || @@ -139,7 +141,8 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) if (tail == wc->head) break; - + /* Make sure entry is read after head index is read. */ + smp_rmb(); qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table, wc->queue[tail].qp_num); entry->qp = &qp->ibqp; diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 854deb56ac02..85256747d8a1 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -194,6 +194,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) ret = 0; goto bail; } + /* Make sure entry is read after head index is read. */ + smp_rmb(); wqe = get_rwqe_ptr(rq, tail); if (++tail >= rq->size) tail = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 14cbbd633d34..40c36ec19016 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -80,6 +80,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); wq->head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 38ba771b3efe..f9a3338a5fb7 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -176,6 +176,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, dev->n_pkt_drops++; goto bail_sge; } + /* Make sure entry is read after head index is read. */ + smp_rmb(); wqe = get_rwqe_ptr(rq, tail); if (++tail >= rq->size) tail = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 0aecded6af86..c76ea0e0b024 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -327,6 +327,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); wq->head = next; spin_unlock_irqrestore(&qp->r_rq.lock, flags); } From 0f4fc5ebd9a68ede20ca365576a6df2df2fefc4c Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Fri, 6 Jul 2007 12:48:33 -0700 Subject: [PATCH 42/76] IB/ipath: Be more cautious about coming out of freeze mode We are more careful to be sure that we don't lose information about changes that occurred while we were in freeze mode, when the chip will not notify us, and try to avoid false error interrupts while doing cleanup. Put all of this logic in a new function ipath_clear_freeze(). Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba6110.c | 11 +-- drivers/infiniband/hw/ipath/ipath_iba6120.c | 11 +-- drivers/infiniband/hw/ipath/ipath_intr.c | 77 +++++++++++++++++++++ drivers/infiniband/hw/ipath/ipath_kernel.h | 1 + 4 files changed, 80 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 87b18e928c79..fdfa95d0dd72 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -509,16 +509,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (!hwerrs) { ipath_dbg("Clearing freezemode on ignored or " "recovered hardware error\n"); - /* - * clear all sends, becauase they have may been - * completed by usercode while in freeze mode, and - * therefore would not be sent, and eventually - * might cause the process to run out of bufs - */ - ipath_cancel_sends(dd); - ctrl &= ~INFINIPATH_C_FREEZEMODE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - ctrl); + ipath_clear_freeze(dd); } } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index e67e4a89fcc4..9868ccda5f26 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -435,16 +435,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, freeze_cnt++; ipath_dbg("Clearing freezemode on ignored or recovered " "hardware error (%u)\n", freeze_cnt); - /* - * clear all sends, becauase they have may been - * completed by usercode while in freeze mode, and - * therefore would not be sent, and eventually - * might cause the process to run out of bufs - */ - ipath_cancel_sends(dd); - ctrl &= ~INFINIPATH_C_FREEZEMODE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); + ipath_clear_freeze(dd); } } diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index e86a23a05919..ce490235c24f 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -132,6 +132,17 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ INFINIPATH_E_INVALIDADDR) +/* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ + INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \ + INFINIPATH_E_SPKTLEN) + /* * these are errors that can occur when the link changes state while * a packet is being sent or received. This doesn't cover things @@ -760,6 +771,72 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) return chkerrpkts; } + +/* + * try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it for anything changing while in freeze mode + * (we don't want to wait for the next pio buffer state change). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + */ +void ipath_clear_freeze(struct ipath_devdata *dd) +{ + int i, im; + __le64 val; + + /* disable error interrupts, to avoid confusion */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); + + /* + * clear all sends, because they have may been + * completed by usercode while in freeze mode, and + * therefore would not be sent, and eventually + * might cause the process to run out of bufs + */ + ipath_cancel_sends(dd); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); + + /* ensure pio avail updates continue */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl & ~IPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + + /* + * We just enabled pioavailupdate, so dma copy is almost certainly + * not yet right, so read the registers directly. Similar to init + */ + for (i = 0; i < dd->ipath_pioavregs; i++) { + /* deal with 6110 chip bug */ + im = i > 3 ? ((i&1) ? i-1 : i+1) : i; + val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64))); + dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] + = le64_to_cpu(val); + } + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + E_SPKT_ERRS_IGNORE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + ~dd->ipath_maskederrs); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); +} + + /* this is separate to allow for better optimization of ipath_intr() */ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index f1f812759ee3..8bad3e3c5550 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -645,6 +645,7 @@ int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); +void ipath_clear_freeze(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, From f42b6471e9e9082ed4bcba1456eccadf98f1337a Mon Sep 17 00:00:00 2001 From: Arthur Jones Date: Mon, 9 Jul 2007 20:12:26 -0700 Subject: [PATCH 43/76] IB/ipath: Update MAINTAINERS entry Bryan is no longer with QLogic and we now have a public git server and a public email alias for infinipath driver patches. And, as pointed out by Hal Rosenstock, the mailing list has changed as well. Signed-off-by: Arthur Jones Signed-off-by: Roland Dreier --- MAINTAINERS | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2c1dfb271613..2e13aee21589 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1989,9 +1989,10 @@ M: jjciarla@raiz.uncu.edu.ar S: Maintained IPATH DRIVER: -P: Bryan O'Sullivan -M: support@pathscale.com -L: openib-general@openib.org +P: Arthur Jones +M: infinipath@qlogic.com +L: general@lists.openfabrics.org +T: git git://git.qlogic.com/ipath-linux-2.6 S: Supported IPMI SUBSYSTEM From 37a7e9b7f28fbef4b6abda102fa41c1467f6022f Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 6 Jul 2007 12:48:38 -0700 Subject: [PATCH 44/76] IB/ipath: Lower default number of kernel send buffers The default calculation for the number of send buffers to allocate to the kernel was too high for the PCIe version of the chip thus leaving fewer than desired send buffers for user MPI applications. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 1b1af349f194..fa98aabdf76b 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -737,7 +737,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; if (ipath_kpiobufs == 0) { /* not set by user (this is default) */ - if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32) + if (piobufs > 144) kpiobufs = 32; else kpiobufs = 16; From 12f9a49e1bce241c243268c764e1b37391157cd0 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Fri, 6 Jul 2007 12:48:43 -0700 Subject: [PATCH 45/76] IB/ipath: Change version wording to be less confusing with release number Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index fa98aabdf76b..49951d583804 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -656,7 +656,7 @@ static int init_housekeeping(struct ipath_devdata *dd, ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), - "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " "SW Compat %u\n", IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & From 9ca48655667214be6ebd191628a3c4b5b529a87e Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 6 Jul 2007 12:48:48 -0700 Subject: [PATCH 46/76] IB/ipath: Remove support for preproduction HTX InfiniPath cards Clean up some code by removing support for some older pre-production HTX InfiniPath cards. Signed-off-by: Ralph Campbell --- drivers/infiniband/hw/ipath/ipath_driver.c | 10 +----- drivers/infiniband/hw/ipath/ipath_iba6110.c | 39 ++++++--------------- drivers/infiniband/hw/ipath/ipath_kernel.h | 4 --- drivers/infiniband/hw/ipath/ipath_verbs.c | 7 ---- 4 files changed, 12 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index c40a542bebec..da4a2cfb61b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1021,14 +1021,10 @@ void ipath_kreceive(struct ipath_devdata *dd) goto bail; } - /* There is already a thread processing this queue. */ - if (test_and_set_bit(0, &dd->ipath_rcv_pending)) - goto bail; - l = dd->ipath_port0head; hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); if (l == hdrqtail) - goto done; + goto bail; reloop: for (i = 0; l != hdrqtail; i++) { @@ -1163,10 +1159,6 @@ reloop: ipath_stats.sps_avgpkts_call = ipath_stats.sps_port0pkts / ++totcalls; -done: - clear_bit(0, &dd->ipath_rcv_pending); - smp_mb__after_clear_bit(); - bail:; } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index fdfa95d0dd72..650745d83fac 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -677,6 +677,12 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (n) snprintf(name, namelen, "%s", n); + if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 && + dd->ipath_boardrev != 11) { + ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name); + ret = 1; + goto bail; + } if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 4)) { /* @@ -694,36 +700,11 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, * copies */ dd->ipath_flags |= IPATH_32BITCOUNTERS; + dd->ipath_flags |= IPATH_GPIO_INTR; if (dd->ipath_htspeed != 800) ipath_dev_err(dd, "Incorrectly configured for HT @ %uMHz\n", dd->ipath_htspeed); - if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 || - dd->ipath_boardrev == 6) - dd->ipath_flags |= IPATH_GPIO_INTR; - else - dd->ipath_flags |= IPATH_POLL_RX_INTR; - if (dd->ipath_boardrev == 8) { /* LS/X-1 */ - u64 val; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - if (val & INFINIPATH_EXTS_SERDESSEL) { - /* - * hardware disabled - * - * This means that the chip is hardware disabled, - * and will not be able to bring up the link, - * in any case. We special case this and abort - * early, to avoid later messages. We also set - * the DISABLED status bit - */ - ipath_dbg("Unit %u is hardware-disabled\n", - dd->ipath_unit); - *dd->ipath_statusp |= IPATH_STATUS_DISABLED; - /* this value is handled differently */ - ret = 2; - goto bail; - } - } ret = 0; bail: @@ -1574,8 +1555,10 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) * with 128, rather than 112. */ dd->ipath_flags |= IPATH_GPIO_INTR; - dd->ipath_flags &= ~IPATH_POLL_RX_INTR; - } + } else + ipath_dev_err(dd, "Unsupported InfiniPath serial " + "number %.16s!\n", dd->ipath_serial); + return 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 8bad3e3c5550..a27e06297636 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -391,9 +391,6 @@ struct ipath_devdata { struct class_device *diag_class_dev; /* timer used to prevent stats overflow, error throttling, etc. */ struct timer_list ipath_stats_timer; - /* check for stale messages in rcv queue */ - /* only allow one intr at a time. */ - unsigned long ipath_rcv_pending; void *ipath_dummy_hdrq; /* used after port close */ dma_addr_t ipath_dummy_hdrq_phys; @@ -740,7 +737,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); * are 64bit */ #define IPATH_32BITCOUNTERS 0x20000 /* can miss port0 rx interrupts */ -#define IPATH_POLL_RX_INTR 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ /* Use GPIO interrupts for new counters */ #define IPATH_GPIO_ERRINTRS 0x100000 diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index c76ea0e0b024..65f7181e9cf8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1375,13 +1375,6 @@ static void __verbs_timer(unsigned long arg) { struct ipath_devdata *dd = (struct ipath_devdata *) arg; - /* - * If port 0 receive packet interrupts are not available, or - * can be missed, poll the receive queue - */ - if (dd->ipath_flags & IPATH_POLL_RX_INTR) - ipath_kreceive(dd); - /* Handle verbs layer timeouts. */ ipath_ib_timer(dd->verbs_dev); From 3588423fbab1cfaf839e67378897e232a054f317 Mon Sep 17 00:00:00 2001 From: Arthur Jones Date: Fri, 6 Jul 2007 12:48:53 -0700 Subject: [PATCH 47/76] IB/ipath: Test interrupts at driver startup All too often, interrupts do not get enabled for our card due to BIOS misconfiguration and other issues. This patch checks for that condition on startup and warns the user. This patch is based on work (check LID availability) by Robert Walsh. Signed-off-by: Arthur Jones Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 23 ++++++++++++++++++++++ drivers/infiniband/hw/ipath/ipath_intr.c | 3 +++ drivers/infiniband/hw/ipath/ipath_kernel.h | 5 +++++ 3 files changed, 31 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index da4a2cfb61b4..e397ec0d388f 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -104,6 +104,9 @@ static int __devinit ipath_init_one(struct pci_dev *, #define PCI_DEVICE_ID_INFINIPATH_HT 0xd #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 +/* Number of seconds before our card status check... */ +#define STATUS_TIMEOUT 60 + static const struct pci_device_id ipath_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, @@ -119,6 +122,18 @@ static struct pci_driver ipath_driver = { .id_table = ipath_pci_tbl, }; +static void ipath_check_status(struct work_struct *work) +{ + struct ipath_devdata *dd = container_of(work, struct ipath_devdata, + status_work.work); + + /* + * If we don't have any interrupts, let the user know and + * don't bother checking again. + */ + if (dd->ipath_int_counter == 0) + dev_err(&dd->pcidev->dev, "No interrupts detected.\n"); +} static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, u32 *bar0, u32 *bar1) @@ -187,6 +202,8 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd->pcidev = pdev; pci_set_drvdata(pdev, dd); + INIT_DELAYED_WORK(&dd->status_work, ipath_check_status); + list_add(&dd->ipath_list, &ipath_dev_list); bail_unlock: @@ -511,6 +528,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipath_diag_add(dd); ipath_register_ib_device(dd); + /* Check that card status in STATUS_TIMEOUT seconds. */ + schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT); + goto bail; bail_irqsetup: @@ -638,6 +658,9 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) */ ipath_shutdown_device(dd); + cancel_delayed_work(&dd->status_work); + flush_scheduled_work(); + if (dd->verbs_dev) ipath_unregister_ib_device(dd->verbs_dev); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index ce490235c24f..47aa43428fbf 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -1009,6 +1009,9 @@ irqreturn_t ipath_intr(int irq, void *data) ipath_stats.sps_ints++; + if (dd->ipath_int_counter != (u32) -1) + dd->ipath_int_counter++; + if (!(dd->ipath_flags & IPATH_PRESENT)) { /* * This return value is not great, but we do not want the diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index a27e06297636..3105005fc9d2 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -297,6 +297,8 @@ struct ipath_devdata { u32 ipath_lastport_piobuf; /* is a stats timer active */ u32 ipath_stats_timer_active; + /* number of interrupts for this device -- saturates... */ + u32 ipath_int_counter; /* dwords sent read from counter */ u32 ipath_lastsword; /* dwords received read from counter */ @@ -571,6 +573,9 @@ struct ipath_devdata { u32 ipath_overrun_thresh_errs; u32 ipath_lli_errs; + /* status check work */ + struct delayed_work status_work; + /* * Not all devices managed by a driver instance are the same * type, so these fields must be per-device. From 4f5973fd3bed9dbff0940ad6cc918be64a17b3e4 Mon Sep 17 00:00:00 2001 From: Arthur Jones Date: Fri, 6 Jul 2007 12:48:58 -0700 Subject: [PATCH 48/76] IB/ipath: Remove bogus RD_ATOMIC checks from modify_qp The changeset 3859e39d ("IB/ipath: Support larger IB_QP_MAX_DEST_RD_ATOMIC and IB_QP_MAX_QP_RD_ATOMIC") added support for larger RD_ATOMIC values, but it failed to take out the stricter checks that were before these and hence had no effect. This patch takes out the bogus checks.... Signed-off-by: Arthur Jones Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index d317b81360b2..1324b35ff1f8 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -516,14 +516,6 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->path_mtu > IB_MTU_2048) goto inval; - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > 1) - goto inval; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - if (attr->max_rd_atomic > 1) - goto inval; - if (attr_mask & IB_QP_PATH_MIG_STATE) if (attr->path_mig_state != IB_MIG_MIGRATED && attr->path_mig_state != IB_MIG_REARM) From 78526821bee8506ab903f596e8e6187fa5919877 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 9 Jul 2007 20:12:26 -0700 Subject: [PATCH 49/76] IB: Update mailing list address The InfiniBand / RDMA discussion list has moved. Signed-off-by: Roland Dreier --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2e13aee21589..96a174b64e10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -371,7 +371,7 @@ P: Tom Tucker M: tom@opengridcomputing.com P: Steve Wise M: swise@opengridcomputing.com -L: openib-general@openib.org +L: general@lists.openfabrics.org S: Maintained AOA (Apple Onboard Audio) ALSA DRIVER @@ -1396,7 +1396,7 @@ P: Hoang-Nam Nguyen M: hnguyen@de.ibm.com P: Christoph Raisch M: raisch@de.ibm.com -L: openib-general@openib.org +L: general@lists.openfabrics.org S: Supported EMU10K1 SOUND DRIVER @@ -1851,7 +1851,7 @@ P: Sean Hefty M: mshefty@ichips.intel.com P: Hal Rosenstock M: halr@voltaire.com -L: openib-general@openib.org +L: general@lists.openfabrics.org W: http://www.openib.org/ T: git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git S: Supported From f41d229865c984015914221959675b1c8723f6a7 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 28 Jun 2007 19:16:20 -0700 Subject: [PATCH 50/76] IB/ipath: return correct PortGUID in NodeInfo Return the PortGUID of the correct port when responding to a NodeInfo query. Returning the SystemImageGUID causes issues when there are multiple HCAs in a single system. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 2aaa0297b20d..d61c03044545 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -103,7 +103,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, /* This is already in network order */ nip->sys_guid = to_idev(ibdev)->sys_image_guid; nip->node_guid = dd->ipath_guid; - nip->port_guid = nip->sys_guid; + nip->port_guid = dd->ipath_guid; nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); nip->device_id = cpu_to_be16(dd->ipath_deviceid); majrev = dd->ipath_majrev; From b8a3ba551369982180917a999d32fcedbba34115 Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Mon, 9 Jul 2007 15:20:55 +0200 Subject: [PATCH 51/76] IB/ehca: Change scaling_code parameter description to match default value Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c3f99f33b49c..fea199f01eaa 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -94,7 +94,7 @@ MODULE_PARM_DESC(poll_all_eqs, MODULE_PARM_DESC(static_rate, "set permanent static rate (default: disabled)"); MODULE_PARM_DESC(scaling_code, - "set scaling code (0: disabled, 1: enabled/default)"); + "set scaling code (0: disabled/default, 1: enabled)"); spinlock_t ehca_qp_idr_lock; spinlock_t ehca_cq_idr_lock; From 91f13aa3fc22e357b494c5b8270e94543870928d Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:21:45 +0200 Subject: [PATCH 52/76] IB/ehca: HW level, HW caps and MTU autodetection In preparation for support of new eHCA2 features, change adapter probing: - Hardware level is changed to encode major and minor chip version - Hardware capabilities are queried from the firmware - The maximum MTU is queried from the firmware instead of assuming a fixed value Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_av.c | 6 ++- drivers/infiniband/hw/ehca/ehca_classes.h | 2 + drivers/infiniband/hw/ehca/ehca_hca.c | 27 ++++++++-- drivers/infiniband/hw/ehca/ehca_main.c | 62 ++++++++++++++++++++--- drivers/infiniband/hw/ehca/hipz_hw.h | 18 +++++++ 5 files changed, 104 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 0d6e2c4bb245..3cd6bf3402d1 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); } - av->av.pmtu = EHCA_MAX_MTU; + av->av.pmtu = shca->max_mtu; /* dgid comes in grh.word_3 */ memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, @@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) struct ehca_av *av; struct ehca_ud_av new_ehca_av; struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); } - new_ehca_av.pmtu = EHCA_MAX_MTU; + new_ehca_av.pmtu = shca->max_mtu; memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, sizeof(ah_attr->grh.dgid)); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1d286d3cc2d5..35d948f2502c 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -107,6 +107,8 @@ struct ehca_shca { struct ehca_pd *pd; struct h_galpas galpas; struct mutex modify_mutex; + u64 hca_cap; + int max_mtu; }; struct ehca_pd { diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 32b55a4f0e5b..b310de5c94ae 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -45,11 +45,25 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { - int ret = 0; + int i, ret = 0; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_hca *rblock; + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); @@ -96,6 +110,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->max_total_mcast_qp_attach = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + query_device1: ehca_free_fw_ctrlblock(rblock); @@ -261,7 +282,7 @@ int ehca_modify_port(struct ib_device *ibdev, } if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; + return -ERESTARTSYS; rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -290,7 +311,7 @@ modify_port2: ehca_free_fw_ctrlblock(rblock); modify_port1: - mutex_unlock(&shca->modify_mutex); + mutex_unlock(&shca->modify_mutex); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index fea199f01eaa..eb22a6b296d9 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -205,11 +205,35 @@ static void ehca_destroy_slab_caches(void) #define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) #define EHCA_REVID EHCA_BMASK_IBM(40,63) +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, +}; + int ehca_sense_attributes(struct ehca_shca *shca) { - int ret = 0; + int i, ret = 0; u64 h_ret; struct hipz_query_hca *rblock; + struct hipz_query_port *port; rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -222,7 +246,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_err("Cannot query device properties. h_ret=%lx", h_ret); ret = -EPERM; - goto num_ports1; + goto sense_attributes1; } if (ehca_nr_ports == 1) @@ -242,18 +266,44 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); if ((hcaaver == 1) && (revid == 0)) - shca->hw_level = 0; + shca->hw_level = 0x11; else if ((hcaaver == 1) && (revid == 1)) - shca->hw_level = 1; + shca->hw_level = 0x12; else if ((hcaaver == 1) && (revid == 2)) - shca->hw_level = 2; + shca->hw_level = 0x13; + else if ((hcaaver == 2) && (revid == 0)) + shca->hw_level = 0x21; + else if ((hcaaver == 2) && (revid == 0x10)) + shca->hw_level = 0x22; + else { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } } ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); shca->sport[0].rate = IB_RATE_30_GBPS; shca->sport[1].rate = IB_RATE_30_GBPS; -num_ports1: + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + port = (struct hipz_query_port *) rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lx", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: ehca_free_fw_ctrlblock(rblock); return ret; } diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index fad91368dc5a..9116fc943fee 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -360,6 +360,24 @@ struct hipz_query_hca { u32 max_neq; } __attribute__ ((packed)); +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) + /* query port response block */ struct hipz_query_port { u32 state; From 9a79fc0a1b815cbd05a8e37ea838acfccb7235cc Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:23:15 +0200 Subject: [PATCH 53/76] IB/ehca: QP code restructuring in preparation for SRQ - Replace init_qp_queues() by a shorter init_qp_queue(), eliminating duplicate code. - hipz_h_alloc_resource_qp() doesn't need a pointer to struct ehca_qp any longer. All input and output data is transferred through the parms parameter. - Change the interface to also support SRQ. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 46 +++- drivers/infiniband/hw/ehca/ehca_qp.c | 254 ++++++++++------------ drivers/infiniband/hw/ehca/hcp_if.c | 35 ++- drivers/infiniband/hw/ehca/hcp_if.h | 1 - 4 files changed, 166 insertions(+), 170 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 35d948f2502c..6e75db68996e 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -322,14 +322,49 @@ struct ehca_alloc_cq_parms { struct ipz_eq_handle eq_handle; }; +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + struct ehca_alloc_qp_parms { - int servicetype; +/* input parameters */ + enum ehca_service_type servicetype; int sigtype; - int daqp_ctrl; - int max_send_sge; - int max_recv_sge; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; + + int max_send_wr, max_recv_wr; + int max_send_sge, max_recv_sge; int ud_av_l_key_ctl; + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; + + u32 srq_qpn, srq_token, srq_limit; + +/* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; + u16 act_nr_send_wqes; u16 act_nr_recv_wqes; u8 act_nr_recv_sges; @@ -337,9 +372,6 @@ struct ehca_alloc_qp_parms { u32 nr_rq_pages; u32 nr_sq_pages; - - struct ipz_eq_handle ipz_eq_handle; - struct ipz_pd pd; }; int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index b5bc787c77b6..513471a7bffa 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -234,13 +234,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, return index; } -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3 -}; - /* * ibqptype2servicetype returns hcp service type corresponding to given * ib qp type used by create_qp() @@ -268,15 +261,16 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) } /* - * init_qp_queues initializes/constructs r/squeue and registers queue pages. + * init_qp_queue initializes/constructs r/squeue and registers queue pages. */ -static inline int init_qp_queues(struct ehca_shca *shca, - struct ehca_qp *my_qp, - int nr_sq_pages, - int nr_rq_pages, - int swqe_size, - int rwqe_size, - int nr_send_sges, int nr_receive_sges) +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + int nr_q_pages, + int wqe_size, + int nr_sges) { int ret, cnt, ipz_rc; void *vpage; @@ -284,104 +278,63 @@ static inline int init_qp_queues(struct ehca_shca *shca, struct ib_device *ib_dev = &shca->ib_device; struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, - nr_sq_pages, - EHCA_PAGESIZE, swqe_size, nr_send_sges); + if (!nr_q_pages) + return 0; + + ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE, + wqe_size, nr_sges); if (!ipz_rc) { - ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x", + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", ipz_rc); return -EBUSY; } - ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, - nr_rq_pages, - EHCA_PAGESIZE, rwqe_size, nr_receive_sges); - if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x", - ipz_rc); - ret = -EBUSY; - goto init_qp_queues0; - } - /* register SQ pages */ - for (cnt = 0; cnt < nr_sq_pages; cnt++) { - vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue); + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); if (!vpage) { - ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() " + ehca_err(ib_dev, "ipz_qpageit_get_inc() " "failed p_vpage= %p", vpage); ret = -EINVAL; - goto init_qp_queues1; + goto init_qp_queue1; } rpage = virt_to_abs(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, - &my_qp->pf, 0, 0, + NULL, 0, q_type, rpage, 1, my_qp->galpas.kernel); - if (h_ret < H_SUCCESS) { - ehca_err(ib_dev, "SQ hipz_qp_register_rpage()" - " failed rc=%lx", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; - } - } - - ipz_qeit_reset(&my_qp->ipz_squeue); - - /* register RQ pages */ - for (cnt = 0; cnt < nr_rq_pages; cnt++) { - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (!vpage) { - ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() " - "failed p_vpage = %p", vpage); - ret = -EINVAL; - goto init_qp_queues1; - } - - rpage = virt_to_abs(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, 0, 1, - rpage, 1,my_qp->galpas.kernel); - if (h_ret < H_SUCCESS) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed " - "rc=%lx", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; - } - if (cnt == (nr_rq_pages - 1)) { /* last page! */ - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " "h_ret= %lx ", h_ret); ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; + goto init_qp_queue1; } vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); if (vpage) { ehca_err(ib_dev, "ipz_qpageit_get_inc() " "should not succeed vpage=%p", vpage); ret = -EINVAL; - goto init_qp_queues1; + goto init_qp_queue1; } } else { if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + ehca_err(ib_dev, "hipz_qp_register_rpage() " "h_ret= %lx ", h_ret); ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; + goto init_qp_queue1; } } } - ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(queue); return 0; -init_qp_queues1: - ipz_queue_dtor(&my_qp->ipz_rqueue); -init_qp_queues0: - ipz_queue_dtor(&my_qp->ipz_squeue); +init_qp_queue1: + ipz_queue_dtor(queue); return ret; } @@ -397,14 +350,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int max_send_sge, max_recv_sge, ret; + int is_llqp = 0, has_srq = 0; + int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ struct ehca_alloc_qp_parms parms; u32 swqe_size = 0, rwqe_size = 0; - u8 daqp_completion, isdaqp; unsigned long flags; + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", @@ -412,38 +368,47 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - /* save daqp completion bits */ - daqp_completion = init_attr->qp_type & 0x60; - /* save daqp bit */ - isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; - init_attr->qp_type = init_attr->qp_type & 0x1F; + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; - if (init_attr->qp_type != IB_QPT_UD && - init_attr->qp_type != IB_QPT_SMI && - init_attr->qp_type != IB_QPT_GSI && - init_attr->qp_type != IB_QPT_UC && - init_attr->qp_type != IB_QPT_RC) { - ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type); + /* check for SRQ */ + has_srq = !!(init_attr->srq); + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); return ERR_PTR(-EINVAL); } - if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD) - && isdaqp) { - ehca_err(pd->device, "unsupported LL QP Type=%x", - init_attr->qp_type); + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); return ERR_PTR(-EINVAL); - } else if (init_attr->qp_type == IB_QPT_RC && isdaqp && + } + + if (is_llqp && (qp_type != IB_QPT_RC && qp_type != IB_QPT_UD)) { + ehca_err(pd->device, "unsupported LL QP Type=%x", qp_type); + return ERR_PTR(-EINVAL); + } else if (is_llqp && qp_type == IB_QPT_RC && (init_attr->cap.max_send_wr > 255 || init_attr->cap.max_recv_wr > 255 )) { - ehca_err(pd->device, "Invalid Number of max_sq_wr =%x " - "or max_rq_wr=%x for QP Type=%x", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr,init_attr->qp_type); - return ERR_PTR(-EINVAL); - } else if (init_attr->qp_type == IB_QPT_UD && isdaqp && - init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + return ERR_PTR(-EINVAL); + } else if (is_llqp && qp_type == IB_QPT_UD && + init_attr->cap.max_send_wr > 255) { ehca_err(pd->device, "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, init_attr->qp_type); + init_attr->cap.max_send_wr, qp_type); return ERR_PTR(-EINVAL); } @@ -456,7 +421,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return ERR_PTR(-ENOMEM); } - memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms)); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); @@ -465,8 +429,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, my_qp->send_cq = container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - my_qp->init_attr = *init_attr; - do { if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { ret = -ENOMEM; @@ -486,10 +448,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit0; } - parms.servicetype = ibqptype2servicetype(init_attr->qp_type); + parms.servicetype = ibqptype2servicetype(qp_type); if (parms.servicetype < 0) { ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type); + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); goto create_qp_exit0; } @@ -501,21 +463,23 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; max_recv_sge = init_attr->cap.max_recv_sge; - if (IB_QPT_UD == init_attr->qp_type || - IB_QPT_GSI == init_attr->qp_type || - IB_QPT_SMI == init_attr->qp_type) { + if (parms.servicetype == ST_UD) { max_send_sge += 2; max_recv_sge += 2; } - parms.ipz_eq_handle = shca->eq.ipz_eq_handle; - parms.daqp_ctrl = isdaqp | daqp_completion; + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; parms.pd = my_pd->fw_pd; - parms.max_recv_sge = max_recv_sge; + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + + parms.max_send_wr = init_attr->cap.max_send_wr; + parms.max_recv_wr = init_attr->cap.max_recv_wr; parms.max_send_sge = max_send_sge; + parms.max_recv_sge = max_recv_sge; - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); - + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", h_ret); @@ -523,16 +487,18 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit1; } - my_qp->ib_qp.qp_num = my_qp->real_qp_num; + my_qp->ib_qp.qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; - switch (init_attr->qp_type) { + switch (qp_type) { case IB_QPT_RC: - if (isdaqp == 0) { + if (!is_llqp) { swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ (parms.act_nr_send_sges)]); rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ (parms.act_nr_recv_sges)]); - } else { /* for daqp we need to use msg size, not wqe size */ + } else { /* for LLQP we need to use msg size, not wqe size */ swqe_size = da_rc_msg_size[max_send_sge]; rwqe_size = da_rc_msg_size[max_recv_sge]; parms.act_nr_send_sges = 1; @@ -552,7 +518,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, /* UD circumvention */ parms.act_nr_recv_sges -= 2; parms.act_nr_send_sges -= 2; - if (isdaqp) { + if (is_llqp) { swqe_size = da_ud_sq_msg_size[max_send_sge]; rwqe_size = da_rc_msg_size[max_recv_sge]; parms.act_nr_send_sges = 1; @@ -564,14 +530,12 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, u.ud_av.sg_list[parms.act_nr_recv_sges]); } - if (IB_QPT_GSI == init_attr->qp_type || - IB_QPT_SMI == init_attr->qp_type) { + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { parms.act_nr_send_wqes = init_attr->cap.max_send_wr; parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; parms.act_nr_send_sges = init_attr->cap.max_send_sge; parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; - my_qp->ib_qp.qp_num = - (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; + my_qp->ib_qp.qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; } break; @@ -580,26 +544,33 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, break; } - /* initializes r/squeue and registers queue pages */ - ret = init_qp_queues(shca, my_qp, - parms.nr_sq_pages, parms.nr_rq_pages, - swqe_size, rwqe_size, - parms.act_nr_send_sges, parms.act_nr_recv_sges); + /* initialize r/squeue and register queue pages */ + ret = init_qp_queue(shca, my_qp, &my_qp->ipz_squeue, 0, + has_srq ? H_SUCCESS : H_PAGE_REGISTERED, + parms.nr_sq_pages, swqe_size, + parms.act_nr_send_sges); if (ret) { ehca_err(pd->device, - "Couldn't initialize r/squeue and pages ret=%x", ret); + "Couldn't initialize squeue and pages ret=%x", ret); goto create_qp_exit2; } + ret = init_qp_queue(shca, my_qp, &my_qp->ipz_rqueue, 1, H_SUCCESS, + parms.nr_rq_pages, rwqe_size, + parms.act_nr_recv_sges); + if (ret) { + ehca_err(pd->device, + "Couldn't initialize rqueue and pages ret=%x", ret); + goto create_qp_exit3; + } + my_qp->ib_qp.pd = &my_pd->ib_pd; my_qp->ib_qp.device = my_pd->ib_pd.device; my_qp->ib_qp.recv_cq = init_attr->recv_cq; my_qp->ib_qp.send_cq = init_attr->send_cq; - my_qp->ib_qp.qp_type = init_attr->qp_type; - - my_qp->qp_type = init_attr->qp_type; + my_qp->ib_qp.qp_type = my_qp->qp_type = qp_type; my_qp->ib_qp.srq = init_attr->srq; my_qp->ib_qp.qp_context = init_attr->qp_context; @@ -610,15 +581,16 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; init_attr->cap.max_send_sge = parms.act_nr_send_sges; init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + my_qp->init_attr = *init_attr; /* NOTE: define_apq0() not supported yet */ - if (init_attr->qp_type == IB_QPT_GSI) { + if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", h_ret); ret = ehca2ib_return_code(h_ret); - goto create_qp_exit3; + goto create_qp_exit4; } } if (init_attr->send_cq) { @@ -628,7 +600,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", ret); - goto create_qp_exit3; + goto create_qp_exit4; } my_qp->send_cq = cq; } @@ -659,14 +631,16 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit3; + goto create_qp_exit4; } } return &my_qp->ib_qp; -create_qp_exit3: +create_qp_exit4: ipz_queue_dtor(&my_qp->ipz_rqueue); + +create_qp_exit3: ipz_queue_dtor(&my_qp->ipz_squeue); create_qp_exit2: diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5766ae3a2029..7efc4a2ad2b9 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -74,11 +74,6 @@ #define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) #define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) -/* direct access qp controls */ -#define DAQP_CTRL_ENABLE 0x01 -#define DAQP_CTRL_SEND_COMP 0x20 -#define DAQP_CTRL_RECV_COMP 0x40 - static u32 get_longbusy_msecs(int longbusy_rc) { switch (longbusy_rc) { @@ -284,36 +279,31 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp, struct ehca_alloc_qp_parms *parms) { u64 ret; u64 allocate_controls; u64 max_r10_reg; u64 outs[PLPAR_HCALL9_BUFSIZE]; - u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1; - u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1; - int daqp_ctrl = parms->daqp_ctrl; allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, - (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0) + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, parms->ud_av_l_key_ctl) | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); max_r10_reg = EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - max_nr_send_wqes) + parms->max_send_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - max_nr_receive_wqes) + parms->max_recv_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, parms->max_send_sge) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, @@ -322,15 +312,16 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ allocate_controls, /* r5 */ - qp->send_cq->ipz_cq_handle.handle, - qp->recv_cq->ipz_cq_handle.handle, - parms->ipz_eq_handle.handle, - ((u64)qp->token << 32) | parms->pd.value, + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, max_r10_reg, /* r10 */ parms->ud_av_l_key_ctl, /* r11 */ 0); - qp->ipz_qp_handle.handle = outs[0]; - qp->real_qp_num = (u32)outs[1]; + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; parms->act_nr_send_wqes = (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); parms->act_nr_recv_wqes = @@ -345,7 +336,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) - hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]); + hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lx", ret); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2869f7dd6196..60ce02b70663 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp, struct ehca_alloc_qp_parms *parms); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, From a6a12947fbf4a1782535468d756b0d44babf9760 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:25:10 +0200 Subject: [PATCH 54/76] IB/ehca: add Shared Receive Queue support Support SRQs on eHCA2. Since an SRQ is a QP for eHCA2, a lot of code (structures, create, destroy, post_recv) can be shared between QP and SRQ. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 26 +- .../infiniband/hw/ehca/ehca_classes_pSeries.h | 4 +- drivers/infiniband/hw/ehca/ehca_iverbs.h | 15 + drivers/infiniband/hw/ehca/ehca_main.c | 16 +- drivers/infiniband/hw/ehca/ehca_qp.c | 451 +++++++++++++++--- drivers/infiniband/hw/ehca/ehca_reqs.c | 47 +- drivers/infiniband/hw/ehca/ehca_uverbs.c | 4 +- drivers/infiniband/hw/ehca/hcp_if.c | 23 +- drivers/infiniband/hw/ehca/hipz_hw.h | 1 + 9 files changed, 480 insertions(+), 107 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 6e75db68996e..9d689aeb928a 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -5,6 +5,7 @@ * * Authors: Heiko J Schick * Christoph Raisch + * Joachim Fenkes * * Copyright (c) 2005 IBM Corporation * @@ -117,9 +118,20 @@ struct ehca_pd { u32 ownpid; }; +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + struct ehca_qp { - struct ib_qp ib_qp; + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; u32 qp_type; + enum ehca_ext_qp_type ext_type; struct ipz_queue ipz_squeue; struct ipz_queue ipz_rqueue; struct h_galpas galpas; @@ -142,6 +154,10 @@ struct ehca_qp { u32 mm_count_galpa; }; +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + /* must be power of 2 */ #define QP_HASHTAB_LEN 8 @@ -307,6 +323,7 @@ struct ehca_create_qp_resp { u32 qp_num; u32 token; u32 qp_type; + u32 ext_type; u32 qkey; /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ u32 real_qp_num; @@ -329,13 +346,6 @@ enum ehca_service_type { ST_UD = 3, }; -enum ehca_ext_qp_type { - EQPT_NORMAL = 0, - EQPT_LLQP = 1, - EQPT_SRQBASE = 2, - EQPT_SRQ = 3, -}; - enum ehca_ll_comp_flags { LLQP_SEND_COMP = 0x20, LLQP_RECV_COMP = 0x40, diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 5665f213b81a..fb3df5c271e7 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -228,8 +228,8 @@ struct hcp_modify_qp_control_block { #define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) #define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) -#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49) +#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 37e7fe0908cf..fd84a804814c 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -154,6 +154,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, struct ib_qp_init_attr *qp_init_attr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index eb22a6b296d9..ca215bac9742 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -343,7 +343,7 @@ int ehca_init_device(struct ehca_shca *shca) strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 6; + shca->ib_device.uverbs_abi_ver = 7; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -411,6 +411,20 @@ int ehca_init_device(struct ehca_shca *shca) /* shca->ib_device.process_mad = ehca_process_mad; */ shca->ib_device.mmap = ehca_mmap; + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 513471a7bffa..6f35f07dc02c 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -3,7 +3,9 @@ * * QP functions * - * Authors: Waleri Fomin + * Authors: Joachim Fenkes + * Stefan Roscher + * Waleri Fomin * Hoang-Nam Nguyen * Reinhard Ernst * Heiko J Schick @@ -260,6 +262,19 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) } } +/* + * init userspace queue info from ipz_queue data + */ +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; +} + /* * init_qp_queue initializes/constructs r/squeue and registers queue pages. */ @@ -338,11 +353,17 @@ init_qp_queue1: return ret; } -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +struct ehca_qp *internal_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) { - static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 }; + static int da_rc_msg_size[] = { 128, 256, 512, 1024, 2048, 4096 }; static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 }; struct ehca_qp *my_qp; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); @@ -355,7 +376,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, /* h_call's out parameters */ struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; memset(&parms, 0, sizeof(parms)); @@ -376,13 +397,34 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, } qp_type &= 0x1F; - /* check for SRQ */ - has_srq = !!(init_attr->srq); + /* handle SRQ base QPs */ + if (init_attr->srq) { + struct ehca_qp *my_srq = + container_of(init_attr->srq, struct ehca_qp, ib_srq); + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + parms.srq_token = my_srq->token; + } + if (is_llqp && has_srq) { ehca_err(pd->device, "LLQPs can't have an SRQ"); return ERR_PTR(-EINVAL); } + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + return ERR_PTR(-EINVAL); + } + } + /* check QP type */ if (qp_type != IB_QPT_UD && qp_type != IB_QPT_UC && @@ -423,11 +465,15 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); do { if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { @@ -471,8 +517,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, parms.token = my_qp->token; parms.eq_handle = shca->eq.ipz_eq_handle; parms.pd = my_pd->fw_pd; - parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; - parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; parms.max_send_wr = init_attr->cap.max_send_wr; parms.max_recv_wr = init_attr->cap.max_recv_wr; @@ -487,7 +535,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit1; } - my_qp->ib_qp.qp_num = my_qp->real_qp_num = parms.real_qp_num; + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; my_qp->ipz_qp_handle = parms.qp_handle; my_qp->galpas = parms.galpas; @@ -535,7 +583,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; parms.act_nr_send_sges = init_attr->cap.max_send_sge; parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; - my_qp->ib_qp.qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; } break; @@ -545,36 +593,51 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, } /* initialize r/squeue and register queue pages */ - ret = init_qp_queue(shca, my_qp, &my_qp->ipz_squeue, 0, - has_srq ? H_SUCCESS : H_PAGE_REGISTERED, - parms.nr_sq_pages, swqe_size, - parms.act_nr_send_sges); - if (ret) { - ehca_err(pd->device, - "Couldn't initialize squeue and pages ret=%x", ret); - goto create_qp_exit2; + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + parms.nr_sq_pages, swqe_size, + parms.act_nr_send_sges); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%x", ret); + goto create_qp_exit2; + } } - ret = init_qp_queue(shca, my_qp, &my_qp->ipz_rqueue, 1, H_SUCCESS, - parms.nr_rq_pages, rwqe_size, - parms.act_nr_recv_sges); - if (ret) { - ehca_err(pd->device, - "Couldn't initialize rqueue and pages ret=%x", ret); - goto create_qp_exit3; + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, parms.nr_rq_pages, rwqe_size, + parms.act_nr_recv_sges); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%x", ret); + goto create_qp_exit3; + } } - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; - my_qp->ib_qp.qp_type = my_qp->qp_type = qp_type; - my_qp->ib_qp.srq = init_attr->srq; + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } init_attr->cap.max_inline_data = 0; /* not supported yet */ init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; @@ -593,41 +656,32 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit4; } } - if (init_attr->send_cq) { - struct ehca_cq *cq = container_of(init_attr->send_cq, - struct ehca_cq, ib_cq); - ret = ehca_cq_assign_qp(cq, my_qp); + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", ret); goto create_qp_exit4; } - my_qp->send_cq = cq; } + /* copy queues, galpa data to user space */ if (context && udata) { - struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; - struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; resp.token = my_qp->token; resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; resp.qkey = my_qp->qkey; resp.real_qp_num = my_qp->real_qp_num; - /* rqueue properties */ - resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; - resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; - resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; - resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; - resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - /* squeue properties */ - resp.ipz_squeue.qe_size = ipz_squeue->qe_size; - resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; - resp.ipz_squeue.queue_length = ipz_squeue->queue_length; - resp.ipz_squeue.pagesize = ipz_squeue->pagesize; - resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; @@ -635,13 +689,15 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, } } - return &my_qp->ib_qp; + return my_qp; create_qp_exit4: - ipz_queue_dtor(&my_qp->ipz_rqueue); + if (HAS_RQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_rqueue); create_qp_exit3: - ipz_queue_dtor(&my_qp->ipz_squeue); + if (HAS_SQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_squeue); create_qp_exit2: hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); @@ -656,6 +712,114 @@ create_qp_exit0: return ERR_PTR(ret); } +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp; +} + +int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *) my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + /* * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe @@ -1341,42 +1505,159 @@ query_qp_exit1: return ret; } -int ehca_destroy_qp(struct ib_qp *ibqp) +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_pd *my_pd = + container_of(ibsrq->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + u32 cur_pid = current->tgid; + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = + EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + u32 cur_pid = current->tgid; + int ret = 0; + u64 h_ret; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->srq_limit = EHCA_BMASK_GET( + MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + + if (ehca_debug_level) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; - u32 qp_num = ibqp->qp_num; + u32 qp_num = my_qp->real_qp_num; int ret; u64 h_ret; u8 port_num; enum ib_qp_type qp_type; unsigned long flags; - if (ibqp->uobject) { + if (uobject) { if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(ibqp->device, "Resources still referenced in " - "user space qp_num=%x", ibqp->qp_num); + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); return -EINVAL; } if (my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + ehca_err(dev, "Invalid caller pid=%x ownpid=%x", cur_pid, my_pd->ownpid); return -EINVAL; } } if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, - my_qp->real_qp_num); + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); if (ret) { - ehca_err(ibqp->device, "Couldn't unassign qp from " + ehca_err(dev, "Couldn't unassign qp from " "send_cq ret=%x qp_num=%x cq_num=%x", ret, - my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number); + qp_num, my_qp->send_cq->cq_number); return ret; } } @@ -1387,7 +1668,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp) h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " + ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx " "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); return ehca2ib_return_code(h_ret); } @@ -1398,7 +1679,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp) /* no support for IB_QPT_SMI yet */ if (qp_type == IB_QPT_GSI) { struct ib_event event; - ehca_info(ibqp->device, "device %s: port %x is inactive.", + ehca_info(dev, "device %s: port %x is inactive.", shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; @@ -1407,12 +1688,28 @@ int ehca_destroy_qp(struct ib_qp *ibqp) ib_dispatch_event(&event); } - ipz_queue_dtor(&my_qp->ipz_rqueue); - ipz_queue_dtor(&my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_rqueue); + if (HAS_SQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); return 0; } +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + int ehca_init_qp_cache(void) { qp_cache = kmem_cache_create("ehca_cache_qp", diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 56c4527c884f..b5664fa34de3 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -3,8 +3,9 @@ * * post_send/recv, poll_cq, req_notify * - * Authors: Waleri Fomin - * Hoang-Nam Nguyen + * Authors: Hoang-Nam Nguyen + * Waleri Fomin + * Joachim Fenkes * Reinhard Ernst * * Copyright (c) 2005 IBM Corporation @@ -413,17 +414,23 @@ post_send_exit0: return ret; } -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) { - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; unsigned long spl_flags; + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + return -ENODEV; + } + /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); @@ -439,8 +446,8 @@ int ehca_post_recv(struct ib_qp *qp, *bad_recv_wr = cur_recv_wr; if (wqe_cnt == 0) { ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); } goto post_recv_exit0; } @@ -455,14 +462,14 @@ int ehca_post_recv(struct ib_qp *qp, *bad_recv_wr = cur_recv_wr; if (wqe_cnt == 0) { ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); } goto post_recv_exit0; } wqe_cnt++; - ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, my_qp->real_qp_num, wqe_cnt); } /* eof for cur_recv_wr */ post_recv_exit0: @@ -472,6 +479,22 @@ post_recv_exit0: return ret; } +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), + qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + /* * ib_wc_opcode table converts ehca wc opcode to ib * Since we use zero to indicate invalid opcode, the actual ib opcode must diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 73db920b6945..d8fe37d56f1a 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -257,6 +257,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct ehca_cq *cq; struct ehca_qp *qp; struct ehca_pd *pd; + struct ib_uobject *uobject; switch (q_type) { case 1: /* CQ */ @@ -304,7 +305,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return -ENOMEM; } - if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) return -EINVAL; ret = ehca_mmap_qp(vma, qp, rsrc_type); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 7efc4a2ad2b9..b0783773f1c8 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -5,6 +5,7 @@ * * Authors: Christoph Raisch * Hoang-Nam Nguyen + * Joachim Fenkes * Gerd Bayer * Waleri Fomin * @@ -62,6 +63,12 @@ #define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) #define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + #define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) #define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) @@ -150,7 +157,7 @@ static long ehca_plpar_hcall9(unsigned long opcode, { long ret; int i, sleep_msecs, lock_is_set = 0; - unsigned long flags; + unsigned long flags = 0; ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", @@ -282,8 +289,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, struct ehca_alloc_qp_parms *parms) { u64 ret; - u64 allocate_controls; - u64 max_r10_reg; + u64 allocate_controls, max_r10_reg, r11, r12; u64 outs[PLPAR_HCALL9_BUFSIZE]; allocate_controls = @@ -309,6 +315,13 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, parms->max_recv_sge); + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ allocate_controls, /* r5 */ @@ -316,9 +329,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, parms->recv_cq_handle.handle, parms->eq_handle.handle, ((u64)parms->token << 32) | parms->pd.value, - max_r10_reg, /* r10 */ - parms->ud_av_l_key_ctl, /* r11 */ - 0); + max_r10_reg, r11, r12); parms->qp_handle.handle = outs[0]; parms->real_qp_num = (u32)outs[1]; diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index 9116fc943fee..dad6dea5636b 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -163,6 +163,7 @@ struct hipz_qptemm { #define QPX_SQADDER EHCA_BMASK_IBM(48,63) #define QPX_RQADDER EHCA_BMASK_IBM(48,63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3) #define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) From 472803dab844c0a8a5d757d4c67fa5e76013dcbd Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Mon, 9 Jul 2007 15:26:31 +0200 Subject: [PATCH 55/76] IB/ehca: Support UD low-latency QPs Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_qp.c | 84 +++++++++++++++++++--------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 6f35f07dc02c..fa3e03050347 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -275,6 +275,11 @@ static inline void queue2resp(struct ipzu_queue_resp *resp, resp->toggle_state = queue->toggle_state; } +static inline int ll_qp_msg_size(int nr_sge) +{ + return 128 << nr_sge; +} + /* * init_qp_queue initializes/constructs r/squeue and registers queue pages. */ @@ -363,8 +368,6 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - static int da_rc_msg_size[] = { 128, 256, 512, 1024, 2048, 4096 }; - static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 }; struct ehca_qp *my_qp; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, @@ -396,6 +399,7 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; } qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; /* handle SRQ base QPs */ if (init_attr->srq) { @@ -435,23 +439,49 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - if (is_llqp && (qp_type != IB_QPT_RC && qp_type != IB_QPT_UD)) { - ehca_err(pd->device, "unsupported LL QP Type=%x", qp_type); - return ERR_PTR(-EINVAL); - } else if (is_llqp && qp_type == IB_QPT_RC && - (init_attr->cap.max_send_wr > 255 || - init_attr->cap.max_recv_wr > 255 )) { - ehca_err(pd->device, "Invalid Number of max_sq_wr=%x " - "or max_rq_wr=%x for RC LLQP", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr); - return ERR_PTR(-EINVAL); - } else if (is_llqp && qp_type == IB_QPT_UD && - init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, qp_type); - return ERR_PTR(-EINVAL); + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "ax_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + return ERR_PTR(-EINVAL); + break; + } } if (pd->uobject && udata) @@ -509,7 +539,7 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; max_recv_sge = init_attr->cap.max_recv_sge; - if (parms.servicetype == ST_UD) { + if (parms.servicetype == ST_UD && !is_llqp) { max_send_sge += 2; max_recv_sge += 2; } @@ -547,8 +577,8 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ (parms.act_nr_recv_sges)]); } else { /* for LLQP we need to use msg size, not wqe size */ - swqe_size = da_rc_msg_size[max_send_sge]; - rwqe_size = da_rc_msg_size[max_recv_sge]; + swqe_size = ll_qp_msg_size(max_send_sge); + rwqe_size = ll_qp_msg_size(max_recv_sge); parms.act_nr_send_sges = 1; parms.act_nr_recv_sges = 1; } @@ -563,15 +593,15 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, case IB_QPT_UD: case IB_QPT_GSI: case IB_QPT_SMI: - /* UD circumvention */ - parms.act_nr_recv_sges -= 2; - parms.act_nr_send_sges -= 2; if (is_llqp) { - swqe_size = da_ud_sq_msg_size[max_send_sge]; - rwqe_size = da_rc_msg_size[max_recv_sge]; + swqe_size = ll_qp_msg_size(parms.act_nr_send_sges); + rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges); parms.act_nr_send_sges = 1; parms.act_nr_recv_sges = 1; } else { + /* UD circumvention */ + parms.act_nr_send_sges -= 2; + parms.act_nr_recv_sges -= 2; swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[parms.act_nr_send_sges]); rwqe_size = offsetof(struct ehca_wqe, From 85f003172fc130626261730141ed021258e81f85 Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Mon, 9 Jul 2007 15:27:13 +0200 Subject: [PATCH 56/76] IB/ehca: Set SEND_GRH flag for all non-LL UD QPs on eHCA2 Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_qp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index fa3e03050347..3a4b2bfdb06b 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1054,6 +1054,17 @@ static int internal_modify_qp(struct ib_qp *ibqp, "ehca_qp=%p qp_num=%x qp_state_xsit=%x", my_qp, ibqp->qp_num, statetrans); + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + /* sqe -> rts: set purge bit of bad wqe before actual trans */ if ((my_qp->qp_type == IB_QPT_UD || my_qp->qp_type == IB_QPT_GSI || From 15f001ec47b049051f679f8b8c965ac9aae95b3e Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Mon, 9 Jul 2007 15:28:18 +0200 Subject: [PATCH 57/76] IB/ehca: Report RDMA atomic attributes in query_qp() Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_qp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 3a4b2bfdb06b..ec34ff432900 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1491,6 +1491,9 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + /* primary av */ qp_attr->ah_attr.sl = qpcb->service_level; From 9844b71baa60270110eabaa9589d3260443d1a71 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:29:03 +0200 Subject: [PATCH 58/76] IB/ehca: Lock renaming, static initializers - Rename all spinlock flags to "flags", matching the vast majority of kernel code. - Move hcall_lock into the only file it's used in. - Replaced spin_lock_init() and friends with static initializers for global variables. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 1 - drivers/infiniband/hw/ehca/ehca_cq.c | 12 ++++++------ drivers/infiniband/hw/ehca/ehca_main.c | 18 ++++------------- drivers/infiniband/hw/ehca/ehca_qp.c | 6 +++--- drivers/infiniband/hw/ehca/ehca_reqs.c | 24 +++++++++++------------ drivers/infiniband/hw/ehca/hcp_if.c | 2 ++ 6 files changed, 27 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 9d689aeb928a..3550047c1375 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -295,7 +295,6 @@ void ehca_cleanup_mrmw_cache(void); extern spinlock_t ehca_qp_idr_lock; extern spinlock_t ehca_cq_idr_lock; -extern spinlock_t hcall_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 67f0670fe3b1..94bad273b34c 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) { unsigned int qp_num = qp->real_qp_num; unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long spl_flags; + unsigned long flags; - spin_lock_irqsave(&cq->spinlock, spl_flags); + spin_lock_irqsave(&cq->spinlock, flags); hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, spl_flags); + spin_unlock_irqrestore(&cq->spinlock, flags); ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", cq->cq_number, qp_num); @@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); struct hlist_node *iter; struct ehca_qp *qp; - unsigned long spl_flags; + unsigned long flags; - spin_lock_irqsave(&cq->spinlock, spl_flags); + spin_lock_irqsave(&cq->spinlock, flags); hlist_for_each(iter, &cq->qp_hashtab[key]) { qp = hlist_entry(iter, struct ehca_qp, list_entries); if (qp->real_qp_num == real_qp_num) { @@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) break; } } - spin_unlock_irqrestore(&cq->spinlock, spl_flags); + spin_unlock_irqrestore(&cq->spinlock, flags); if (ret) ehca_err(cq->ib_cq.device, "qp not found cq_num=%x real_qp_num=%x", diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index ca215bac9742..32396b203f14 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -96,15 +96,13 @@ MODULE_PARM_DESC(static_rate, MODULE_PARM_DESC(scaling_code, "set scaling code (0: disabled/default, 1: enabled)"); -spinlock_t ehca_qp_idr_lock; -spinlock_t ehca_cq_idr_lock; -spinlock_t hcall_lock; +DEFINE_SPINLOCK(ehca_qp_idr_lock); +DEFINE_SPINLOCK(ehca_cq_idr_lock); DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); - -static struct list_head shca_list; /* list of all registered ehcas */ -static spinlock_t shca_list_lock; +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +static DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; @@ -864,14 +862,6 @@ int __init ehca_module_init(void) printk(KERN_INFO "eHCA Infiniband Device Driver " "(Rel.: SVNEHCA_0023)\n"); - idr_init(&ehca_qp_idr); - idr_init(&ehca_cq_idr); - spin_lock_init(&ehca_qp_idr_lock); - spin_lock_init(&ehca_cq_idr_lock); - spin_lock_init(&hcall_lock); - - INIT_LIST_HEAD(&shca_list); - spin_lock_init(&shca_list_lock); if ((ret = ehca_create_comp_pool())) { ehca_gen_err("Cannot create comp pool."); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index ec34ff432900..31d21526df5e 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -933,7 +933,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 h_ret; int bad_wqe_cnt = 0; int squeue_locked = 0; - unsigned long spl_flags = 0; + unsigned long flags = 0; /* do query_qp to obtain current attr values */ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); @@ -1074,7 +1074,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); squeue_locked = 1; /* mark next free wqe */ wqe = (struct ehca_wqe*) @@ -1360,7 +1360,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); my_qp->sqerr_purgeflag = 1; } diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index b5664fa34de3..73f0c0652a00 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -363,10 +363,10 @@ int ehca_post_send(struct ib_qp *qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); /* loop processes list of send reqs */ for (cur_send_wr = send_wr; cur_send_wr != NULL; @@ -408,7 +408,7 @@ int ehca_post_send(struct ib_qp *qp, post_send_exit0: /* UNLOCK the QUEUE */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); return ret; @@ -423,7 +423,7 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", @@ -432,7 +432,7 @@ static int internal_post_recv(struct ehca_qp *my_qp, } /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_r, flags); /* loop processes list of send reqs */ for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; @@ -473,7 +473,7 @@ static int internal_post_recv(struct ehca_qp *my_qp, } /* eof for cur_recv_wr */ post_recv_exit0: - spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); return ret; @@ -536,7 +536,7 @@ poll_cq_one_read_cqe: if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); int purgeflag; - unsigned long spl_flags; + unsigned long flags; if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -546,9 +546,9 @@ poll_cq_one_read_cqe: /* ignore this purged cqe */ goto poll_cq_one_read_cqe; } - spin_lock_irqsave(&qp->spinlock_s, spl_flags); + spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " @@ -633,7 +633,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) int nr; struct ib_wc *current_wc = wc; int ret = 0; - unsigned long spl_flags; + unsigned long flags; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -642,14 +642,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) goto poll_cq_exit0; } - spin_lock_irqsave(&my_cq->spinlock, spl_flags); + spin_lock_irqsave(&my_cq->spinlock, flags); for (nr = 0; nr < num_entries; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) ret = nr; diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index b0783773f1c8..4776a8b0feec 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -81,6 +81,8 @@ #define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) #define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) +static DEFINE_SPINLOCK(hcall_lock); + static u32 get_longbusy_msecs(int longbusy_rc) { switch (longbusy_rc) { From 28db6beb420c756c61dd44d9f2786a0677159e74 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:30:39 +0200 Subject: [PATCH 59/76] IB/ehca: Refactor sync between completions and destroy_cq using atomic_t - ehca_cq.nr_events is made an atomic_t, eliminating a lot of locking. - The CQ is removed from the CQ idr first now to make sure no more completions are scheduled on that CQ. The "wait for all completions to end" code becomes much simpler this way. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 4 +-- drivers/infiniband/hw/ehca/ehca_cq.c | 26 ++++++---------- drivers/infiniband/hw/ehca/ehca_irq.c | 36 +++++++++++------------ drivers/infiniband/hw/ehca/ehca_irq.h | 1 - drivers/infiniband/hw/ehca/ehca_tools.h | 1 + 5 files changed, 29 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 3550047c1375..8580f2a0ea57 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -174,8 +174,8 @@ struct ehca_cq { spinlock_t cb_lock; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - u32 nr_events; /* #events seen */ + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ wait_queue_head_t wait_completion; spinlock_t task_lock; u32 ownpid; diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 94bad273b34c..3729997457ca 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, spin_lock_init(&my_cq->spinlock); spin_lock_init(&my_cq->cb_lock); spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); init_waitqueue_head(&my_cq->wait_completion); my_cq->ownpid = current->tgid; @@ -303,16 +304,6 @@ create_cq_exit1: return cq; } -static int get_cq_nr_events(struct ehca_cq *my_cq) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = my_cq->nr_events; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - return ret; -} - int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; @@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq) } } + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_events) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq)); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - /* recheck nr_events to assure no cqe has just arrived */ - } - idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + + /* nobody's using our CQ any longer -- we can destroy it */ h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 100329ba3343..3e790a326d97 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -5,6 +5,8 @@ * * Authors: Heiko J Schick * Khadija Souissi + * Hoang-Nam Nguyen + * Joachim Fenkes * * Copyright (c) 2005 IBM Corporation * @@ -212,6 +214,8 @@ static void cq_event_callback(struct ehca_shca *shca, spin_lock_irqsave(&ehca_cq_idr_lock, flags); cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); if (!cq) @@ -219,6 +223,9 @@ static void cq_event_callback(struct ehca_shca *shca, ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + return; } @@ -414,25 +421,22 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); spin_lock_irqsave(&ehca_cq_idr_lock, flags); cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); if (cq == NULL) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq token=%x", token); return; } reset_eq_pending(cq); - cq->nr_events++; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); if (ehca_scaling_code) queue_comp_task(cq); else { comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -478,15 +482,15 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); spin_lock(&ehca_cq_idr_lock); eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + spin_unlock(&ehca_cq_idr_lock); if (!eqe_cache[eqe_cnt].cq) { - spin_unlock(&ehca_cq_idr_lock); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq " "token=%x", token); continue; } - eqe_cache[eqe_cnt].cq->nr_events++; - spin_unlock(&ehca_cq_idr_lock); } else eqe_cache[eqe_cnt].cq = NULL; eqe_cnt++; @@ -517,11 +521,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) else { struct ehca_cq *cq = eq->eqe_cache[i].cq; comp_event_callback(cq); - spin_lock(&ehca_cq_idr_lock); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock(&ehca_cq_idr_lock); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -621,13 +622,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); spin_unlock_irqrestore(&cct->task_lock, flags); - comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); spin_lock_irqsave(&cct->task_lock, flags); spin_lock(&cq->task_lock); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 6ed06ee033ed..3346cb06cea6 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -47,7 +47,6 @@ struct ehca_shca; #include #include -#include int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 973c4b591545..03b185f873da 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -59,6 +59,7 @@ #include #include +#include #include #include #include From 26ed687fdd541c2542b79dcd75fb2c82eb36f189 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:31:10 +0200 Subject: [PATCH 60/76] IB/ehca: Change idr spinlocks into rwlocks This eliminates lock contention among IRQs as well as the need to disable IRQs around idr_find, because there are no IRQ writers. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 4 ++-- drivers/infiniband/hw/ehca/ehca_cq.c | 12 ++++++------ drivers/infiniband/hw/ehca/ehca_irq.c | 19 ++++++++----------- drivers/infiniband/hw/ehca/ehca_main.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_qp.c | 12 ++++++------ drivers/infiniband/hw/ehca/ehca_uverbs.c | 9 ++++----- 6 files changed, 28 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 8580f2a0ea57..f1e0db2ff16c 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -293,8 +293,8 @@ void ehca_cleanup_av_cache(void); int ehca_init_mrmw_cache(void); void ehca_cleanup_mrmw_cache(void); -extern spinlock_t ehca_qp_idr_lock; -extern spinlock_t ehca_cq_idr_lock; +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 3729997457ca..01d4a148bd71 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -163,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, goto create_cq_exit1; } - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); } while (ret == -EAGAIN); @@ -294,9 +294,9 @@ create_cq_exit3: "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); create_cq_exit2: - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); idr_remove(&ehca_cq_idr, my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); create_cq_exit1: kmem_cache_free(cq_cache, my_cq); @@ -334,9 +334,9 @@ int ehca_destroy_cq(struct ib_cq *cq) * remove the CQ from the idr first to make sure * no more interrupt tasklets will touch this CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); idr_remove(&ehca_cq_idr, my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); /* now wait until all pending events have completed */ wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 3e790a326d97..02b73c84c49b 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -180,12 +180,11 @@ static void qp_event_callback(struct ehca_shca *shca, { struct ib_event event; struct ehca_qp *qp; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + read_unlock(&ehca_qp_idr_lock); if (!qp) @@ -209,14 +208,13 @@ static void cq_event_callback(struct ehca_shca *shca, u64 eqe) { struct ehca_cq *cq; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); if (cq) atomic_inc(&cq->nr_events); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + read_unlock(&ehca_cq_idr_lock); if (!cq) return; @@ -411,7 +409,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) { u64 eqe_value; u32 token; - unsigned long flags; struct ehca_cq *cq; eqe_value = eqe->entry; @@ -419,11 +416,11 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { ehca_dbg(&shca->ib_device, "Got completion event"); token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); if (cq) atomic_inc(&cq->nr_events); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + read_unlock(&ehca_cq_idr_lock); if (cq == NULL) { ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq token=%x", @@ -480,11 +477,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_value = eqe_cache[eqe_cnt].eqe->entry; if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock(&ehca_cq_idr_lock); + read_lock(&ehca_cq_idr_lock); eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); if (eqe_cache[eqe_cnt].cq) atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); - spin_unlock(&ehca_cq_idr_lock); + read_unlock(&ehca_cq_idr_lock); if (!eqe_cache[eqe_cnt].cq) { ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq " diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 32396b203f14..28ba2dd24216 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -96,8 +96,8 @@ MODULE_PARM_DESC(static_rate, MODULE_PARM_DESC(scaling_code, "set scaling code (0: disabled/default, 1: enabled)"); -DEFINE_SPINLOCK(ehca_qp_idr_lock); -DEFINE_SPINLOCK(ehca_cq_idr_lock); +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 31d21526df5e..74671250303f 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -512,9 +512,9 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, goto create_qp_exit0; } - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); } while (ret == -EAGAIN); @@ -733,9 +733,9 @@ create_qp_exit2: hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); create_qp_exit1: - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); idr_remove(&ehca_qp_idr, my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); create_qp_exit0: kmem_cache_free(qp_cache, my_qp); @@ -1706,9 +1706,9 @@ int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, } } - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); idr_remove(&ehca_qp_idr, my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index d8fe37d56f1a..3031b3bb56f9 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -253,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; struct ehca_pd *pd; @@ -261,9 +260,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) switch (q_type) { case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + read_unlock(&ehca_cq_idr_lock); /* make sure this mmap really belongs to the authorized user */ if (!cq) @@ -289,9 +288,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) break; case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + read_unlock(&ehca_qp_idr_lock); /* make sure this mmap really belongs to the authorized user */ if (!qp) From b1cfe43d4b24144596d277133e0e5b715eea1347 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:31:53 +0200 Subject: [PATCH 61/76] IB/ehca: Return QP pointer in poll_cq() Also add two unlikely() statements. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 73f0c0652a00..fd3ba22467e8 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -517,6 +517,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) int ret = 0; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; + struct ehca_qp *my_qp; int cqe_count = 0; poll_cq_one_read_cqe: @@ -568,7 +569,7 @@ poll_cq_one_read_cqe: } /* tracing cqe */ - if (ehca_debug_level) { + if (unlikely(ehca_debug_level)) { ehca_dbg(cq->device, "Received COMPLETION ehca_cq=%p cq_num=%x -----", my_cq, my_cq->cq_number); @@ -602,7 +603,11 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - wc->qp = NULL; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + wc->qp = &my_qp->ib_qp; + read_unlock(&ehca_qp_idr_lock); + wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; @@ -612,7 +617,7 @@ poll_cq_one_read_cqe: wc->imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; - if (wc->status != IB_WC_SUCCESS) + if (unlikely(wc->status != IB_WC_SUCCESS)) ehca_dbg(cq->device, "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " From 8705ce5b90118be93eb8b0ed6f49ca5ff377df24 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 9 Jul 2007 15:32:22 +0200 Subject: [PATCH 62/76] IB/ehca: Notify consumers of LID/PKEY/SM changes after nondisruptive events When firmware reports a nondisruptive port configuration change event, previous versions of the eHCA driver didn't forward the event to consumers like IPoIB. Add code that determines the type of configuration change by comparing old and new port attributes and reports it. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 6 ++ drivers/infiniband/hw/ehca/ehca_hca.c | 34 +++++++++ drivers/infiniband/hw/ehca/ehca_irq.c | 85 +++++++++++++++-------- drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 + 4 files changed, 99 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index f1e0db2ff16c..daf823ea1ace 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -87,11 +87,17 @@ struct ehca_eq { struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; }; +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + struct ehca_sport { struct ib_cq *ibcq_aqp1; struct ib_qp *ibqp_aqp1; enum ib_rate rate; enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; }; struct ehca_shca { diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index b310de5c94ae..bbd3c6a5822f 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -193,6 +193,40 @@ query_port1: return ret; } +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { int ret = 0; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 02b73c84c49b..96eba3830754 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -61,6 +61,7 @@ #define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) #define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) #define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) @@ -286,30 +287,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) return; } -static void parse_ec(struct ehca_shca *shca, u64 eqe) +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) { struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &shca->sport[port - 1].saved_attr); } else { - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); } break; case 0x31: @@ -317,24 +349,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) * disruptive change is caused by * LID, PKEY or SM change */ - ehca_warn(&shca->ib_device, - "disruptive port %x configuration change", port); + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); + shca->sport[port - 1].port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + } else + notify_port_conf_change(shca, port); break; case 0x32: /* adapter malfunction */ ehca_err(&shca->ib_device, "Adapter malfunction."); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index fd84a804814c..77aeca6a2c2f 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, From f72d2f081453d8b1ea36e47478c3463042746ddc Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Mon, 9 Jul 2007 15:33:52 +0200 Subject: [PATCH 63/76] IB/ehca: Improve latency by unlocking after triggering the hardware Kick the hardware before unlocking the send/receive queue to overlap processing a little more. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index fd3ba22467e8..61da65e6e5e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -407,10 +407,9 @@ int ehca_post_send(struct ib_qp *qp, } /* eof for cur_send_wr */ post_send_exit0: - /* UNLOCK the QUEUE */ - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); return ret; } @@ -473,9 +472,9 @@ static int internal_post_recv(struct ehca_qp *my_qp, } /* eof for cur_recv_wr */ post_recv_exit0: - spin_unlock_irqrestore(&my_qp->spinlock_r, flags); iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); return ret; } From 856c52a741950dc0be2c5c231efec626e9a0a3fa Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 10 Jul 2007 16:55:57 +0300 Subject: [PATCH 64/76] IB/core: Take sizeof the correct pointer when calling kmalloc() When allocating out_mad in show_pma_counter(), take sizeof *out_mad instead of sizeof *in_mad. It is true that today the type of in_mad and out_mad are the same, but this patch will give us a cleaner code. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 08c299ebf4a8..6265a3fee076 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -311,7 +311,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, return sprintf(buf, "N/A (no PMA)\n"); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; From 20089ca55786a243c7b72becd1bf670f4e2c2028 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 10 Jul 2007 11:18:34 -0700 Subject: [PATCH 65/76] IPoIB/cm: Fix warning if IPV6 is not enabled Fix drivers/infiniband/ulp/ipoib/ipoib_cm.c:1151: warning: unused variable 'dev' by getting rid of the variable dev, which is only used if CONFIG_IPV6 is enabled, and replacing the one use of it with the value it is assigned, namely priv->dev. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index ea74d1eaf004..6764d216c887 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1148,7 +1148,6 @@ static void ipoib_cm_skb_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); - struct net_device *dev = priv->dev; struct sk_buff *skb; unsigned mtu = priv->mcast_mtu; @@ -1162,7 +1161,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev); #endif dev_kfree_skb_any(skb); spin_lock_irq(&priv->tx_lock); From 8909c571fa1e62e254c4045394e6eaccfadec6f4 Mon Sep 17 00:00:00 2001 From: Shani Moideen Date: Mon, 18 Jun 2007 08:46:41 +0530 Subject: [PATCH 66/76] IB/mthca: Replace memset(, 0, PAGE_SIZE) with clear_page() Signed-off-by: Shani Moideen Signed-off-by: Roland Dreier ---- --- drivers/infiniband/hw/mthca/mthca_allocator.c | 2 +- drivers/infiniband/hw/mthca/mthca_eq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index f930e55b58fc..a76306709618 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -255,7 +255,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, dma_list[i] = t; pci_unmap_addr_set(&buf->page_list[i], mapping, t); - memset(buf->page_list[i].buf, 0, PAGE_SIZE); + clear_page(buf->page_list[i].buf); } } diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8ec9fa1ff9ea..8592b26dc4e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -522,7 +522,7 @@ static int mthca_create_eq(struct mthca_dev *dev, dma_list[i] = t; pci_unmap_addr_set(&eq->page_list[i], mapping, t); - memset(eq->page_list[i].buf, 0, PAGE_SIZE); + clear_page(eq->page_list[i].buf); } for (i = 0; i < eq->nent; ++i) From 1b844afe9e67d6cd441ae6df71051b4004f31dd2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 10 Jul 2007 13:43:53 -0700 Subject: [PATCH 67/76] IPoIB: Recycle loopback skbs instead of freeing and reallocating InfiniBand HCAs replicate multicast packets back to the QP that sent them if that QP is attached to the destination multicast group. This means that IPoIB multicasts are often replicated back to the receive queue of the interface that generated them. To avoid confusing the network stack, we drop these duplicates within the IPoIB driver. However, there's no reason to free the skb that received the duplicate and then immediately allocate a new skb to post to the receive queue. We can be more efficient and just repost the same skb. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 33 +++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 8404f05b2b6e..10944888cffd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -196,6 +196,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) return; } + /* + * Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. + */ + if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) + goto repost; + /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. @@ -213,24 +220,18 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); - if (wc->slid != priv->local_lid || - wc->src_qp != priv->qp->qp_num) { - skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb_reset_mac_header(skb); - skb_pull(skb, IPOIB_ENCAP_LEN); + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb_reset_mac_header(skb); + skb_pull(skb, IPOIB_ENCAP_LEN); - dev->last_rx = jiffies; - ++priv->stats.rx_packets; - priv->stats.rx_bytes += skb->len; + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; - skb->dev = dev; - /* XXX get correct PACKET_ type here */ - skb->pkt_type = PACKET_HOST; - netif_receive_skb(skb); - } else { - ipoib_dbg_data(priv, "dropping loopback packet\n"); - dev_kfree_skb_any(skb); - } + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) From 2aec5c602c6a44e2a3a173339a9ab94549658e4b Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Jun 2007 11:03:58 -0700 Subject: [PATCH 68/76] IB/sa: Make sure SA queries use default P_Key MADs sent to the SA should use the the default P_Key (0x7fff/0xffff). There's no requirement that the default P_Key is stored at index 0 in the local P_Key table, so add code to the sa_query module to look up the index of the default P_Key when creating an address handle for the SA (which is done any time the P_Key table might change), and use this index for all SA queries. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 85 ++++++++++++++++++------------ include/rdma/ib_mad.h | 3 ++ 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 9d3797fcc37e..20ab6b3e484d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -56,6 +56,7 @@ MODULE_LICENSE("Dual BSD/GPL"); struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; + u16 pkey_index; u8 src_path_mask; }; @@ -382,6 +383,13 @@ static void update_sm_ah(struct work_struct *work) kref_init(&new_ah->ref); new_ah->src_path_mask = (1 << port_attr.lmc) - 1; + new_ah->pkey_index = 0; + if (ib_find_pkey(port->agent->device, port->port_num, + IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index) && + ib_find_pkey(port->agent->device, port->port_num, + IB_DEFAULT_PKEY_PARTIAL, &new_ah->pkey_index)) + printk(KERN_ERR "Couldn't find index for default PKey\n"); + memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = port_attr.sm_lid; ah_attr.sl = port_attr.sm_sl; @@ -512,6 +520,35 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_init_ah_from_path); +static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&query->port->ah_lock, flags); + kref_get(&query->port->sm_ah->ref); + query->sm_ah = query->port->sm_ah; + spin_unlock_irqrestore(&query->port->ah_lock, flags); + + query->mad_buf = ib_create_send_mad(query->port->agent, 1, + query->sm_ah->pkey_index, + 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, + gfp_mask); + if (!query->mad_buf) { + kref_put(&query->sm_ah->ref, free_sm_ah); + return -ENOMEM; + } + + query->mad_buf->ah = query->sm_ah->ah; + + return 0; +} + +static void free_mad(struct ib_sa_query *query) +{ + ib_free_send_mad(query->mad_buf); + kref_put(&query->sm_ah->ref, free_sm_ah); +} + static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) { unsigned long flags; @@ -548,20 +585,11 @@ retry: query->mad_buf->context[0] = query; query->id = id; - spin_lock_irqsave(&query->port->ah_lock, flags); - kref_get(&query->port->sm_ah->ref); - query->sm_ah = query->port->sm_ah; - spin_unlock_irqrestore(&query->port->ah_lock, flags); - - query->mad_buf->ah = query->sm_ah->ah; - ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { spin_lock_irqsave(&idr_lock, flags); idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); - - kref_put(&query->sm_ah->ref, free_sm_ah); } /* @@ -647,13 +675,10 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, if (!query) return -ENOMEM; - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) goto err1; - } ib_sa_client_get(client); query->sa_query.client = client; @@ -665,7 +690,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; - query->sa_query.port = port; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; @@ -683,7 +707,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); - ib_free_send_mad(query->sa_query.mad_buf); + free_mad(&query->sa_query); err1: kfree(query); @@ -773,13 +797,10 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, if (!query) return -ENOMEM; - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) goto err1; - } ib_sa_client_get(client); query->sa_query.client = client; @@ -791,7 +812,6 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; query->sa_query.release = ib_sa_service_rec_release; - query->sa_query.port = port; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); mad->sa_hdr.comp_mask = comp_mask; @@ -810,7 +830,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); - ib_free_send_mad(query->sa_query.mad_buf); + free_mad(&query->sa_query); err1: kfree(query); @@ -869,13 +889,10 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, if (!query) return -ENOMEM; - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) goto err1; - } ib_sa_client_get(client); query->sa_query.client = client; @@ -887,7 +904,6 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; - query->sa_query.port = port; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad->sa_hdr.comp_mask = comp_mask; @@ -906,7 +922,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); - ib_free_send_mad(query->sa_query.mad_buf); + free_mad(&query->sa_query); err1: kfree(query); @@ -939,8 +955,7 @@ static void send_handler(struct ib_mad_agent *agent, idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); - ib_free_send_mad(mad_send_wc->send_buf); - kref_put(&query->sm_ah->ref, free_sm_ah); + free_mad(query); ib_sa_client_put(query->client); query->release(query); } diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 739fa4d0e539..30712ddd8a5e 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -111,6 +111,9 @@ #define IB_QP1_QKEY 0x80010000 #define IB_QP_SET_QKEY 0x80000000 +#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF +#define IB_DEFAULT_PKEY_FULL 0xFFFF + enum { IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, From 24be6e81c78314c91a47200272eb4bc31284bd7b Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Jun 2007 11:09:36 -0700 Subject: [PATCH 69/76] IB/cm: Use spin_lock_irq() instead of spin_lock_irqsave() when possible The ib_cm is a little over zealous about using spin_lock_irqsave, when spin_lock_irq would do. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 171 +++++++++++++++-------------------- 1 file changed, 75 insertions(+), 96 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 40c004a2697e..16181d655854 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -318,12 +318,10 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) static void cm_free_id(__be32 local_id) { - unsigned long flags; - - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); idr_remove(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) @@ -345,11 +343,10 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - unsigned long flags; - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); cm_id_priv = cm_get_id(local_id, remote_id); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); return cm_id_priv; } @@ -713,31 +710,30 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; - unsigned long flags; cm_id_priv = container_of(cm_id, struct cm_id_private, id); retest: - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id->state) { case IB_CM_LISTEN: cm_id->state = IB_CM_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - spin_lock_irqsave(&cm.lock, flags); + spin_unlock_irq(&cm_id_priv->lock); + spin_lock_irq(&cm.lock); rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_SIDR_REQ_RCVD: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof cm_id_priv->id.device->node_guid, @@ -747,9 +743,9 @@ retest: if (err == -ENOMEM) { /* Do not reject to allow future retries. */ cm_reset_to_idle(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); } else { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); } @@ -762,25 +758,25 @@ retest: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_DREQ_RCVD: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_drep(cm_id, NULL, 0); break; default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); break; } @@ -1169,7 +1165,6 @@ static void cm_format_req_event(struct cm_work *work, static void cm_process_work(struct cm_id_private *cm_id_priv, struct cm_work *work) { - unsigned long flags; int ret; /* We will typically only have the current event to report. */ @@ -1177,9 +1172,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv, cm_free_work(work); while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); work = cm_dequeue_work(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); BUG_ON(!work); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); @@ -1250,7 +1245,6 @@ static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; /* Quick state check to discard duplicate REQs. */ @@ -1261,7 +1255,7 @@ static void cm_dup_req_handler(struct cm_work *work, if (ret) return; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_MRA_REQ_SENT: cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, @@ -1276,14 +1270,14 @@ static void cm_dup_req_handler(struct cm_work *work, default: goto unlock; } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); } @@ -1293,17 +1287,16 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; - unsigned long flags; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; /* Check for possible duplicate REQ. */ - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); cm_deref_id(cur_cm_id_priv); @@ -1315,7 +1308,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); @@ -1328,7 +1321,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, req_msg->private_data); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); @@ -1338,7 +1331,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); out: return listen_cm_id_priv; } @@ -1591,7 +1584,6 @@ static void cm_dup_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; @@ -1604,7 +1596,7 @@ static void cm_dup_rep_handler(struct cm_work *work) if (ret) goto deref; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state == IB_CM_ESTABLISHED) cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, @@ -1616,14 +1608,14 @@ static void cm_dup_rep_handler(struct cm_work *work) cm_id_priv->private_data_len); else goto unlock; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); deref: cm_deref_id(cm_id_priv); } @@ -1632,7 +1624,6 @@ static int cm_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; - unsigned long flags; int ret; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1644,13 +1635,13 @@ static int cm_rep_handler(struct cm_work *work) cm_format_rep_event(work); - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: break; default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } @@ -1663,7 +1654,7 @@ static int cm_rep_handler(struct cm_work *work) /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { spin_unlock(&cm.lock); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } @@ -1673,7 +1664,7 @@ static int cm_rep_handler(struct cm_work *work) &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; spin_unlock(&cm.lock); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); @@ -1696,7 +1687,7 @@ static int cm_rep_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1712,7 +1703,6 @@ error: static int cm_establish_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; - unsigned long flags; int ret; /* See comment in cm_establish about lookup. */ @@ -1720,9 +1710,9 @@ static int cm_establish_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } @@ -1730,7 +1720,7 @@ static int cm_establish_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1746,7 +1736,6 @@ static int cm_rtu_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rtu_msg *rtu_msg; - unsigned long flags; int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1757,10 +1746,10 @@ static int cm_rtu_handler(struct cm_work *work) work->cm_event.private_data = &rtu_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; @@ -1769,7 +1758,7 @@ static int cm_rtu_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1932,7 +1921,6 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1945,7 +1933,7 @@ static int cm_dreq_handler(struct cm_work *work) work->cm_event.private_data = &dreq_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) goto unlock; @@ -1964,7 +1952,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); @@ -1977,7 +1965,7 @@ static int cm_dreq_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1985,7 +1973,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } @@ -1994,7 +1982,6 @@ static int cm_drep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_drep_msg *drep_msg; - unsigned long flags; int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2005,10 +1992,10 @@ static int cm_drep_handler(struct cm_work *work) work->cm_event.private_data = &drep_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && cm_id_priv->id.state != IB_CM_DREQ_RCVD) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_enter_timewait(cm_id_priv); @@ -2017,7 +2004,7 @@ static int cm_drep_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2107,17 +2094,16 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; - unsigned long flags; __be32 remote_id; remote_id = rej_msg->local_comm_id; if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), remote_id); if (!timewait_info) { - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); return NULL; } cm_id_priv = idr_find(&cm.local_id_table, (__force int) @@ -2129,7 +2115,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) else cm_id_priv = NULL; } - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); else @@ -2142,7 +2128,6 @@ static int cm_rej_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rej_msg *rej_msg; - unsigned long flags; int ret; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2152,7 +2137,7 @@ static int cm_rej_handler(struct cm_work *work) cm_format_rej_event(work); - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: @@ -2176,7 +2161,7 @@ static int cm_rej_handler(struct cm_work *work) cm_enter_timewait(cm_id_priv); break; default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto out; } @@ -2184,7 +2169,7 @@ static int cm_rej_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2295,7 +2280,6 @@ static int cm_mra_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_mra_msg *mra_msg; - unsigned long flags; int timeout, ret; mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2309,7 +2293,7 @@ static int cm_mra_handler(struct cm_work *work) timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + cm_convert_to_ms(cm_id_priv->av.packet_life_time); - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || @@ -2342,7 +2326,7 @@ static int cm_mra_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2350,7 +2334,7 @@ static int cm_mra_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; out: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_deref_id(cm_id_priv); return -EINVAL; } @@ -2465,7 +2449,6 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; /* todo: verify LAP request and send reject APR if invalid. */ @@ -2480,7 +2463,7 @@ static int cm_lap_handler(struct cm_work *work) cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; @@ -2497,7 +2480,7 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); @@ -2515,7 +2498,7 @@ static int cm_lap_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2523,7 +2506,7 @@ static int cm_lap_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } @@ -2598,7 +2581,6 @@ static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_apr_msg *apr_msg; - unsigned long flags; int ret; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2612,11 +2594,11 @@ static int cm_apr_handler(struct cm_work *work) work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; work->cm_event.private_data = &apr_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; @@ -2626,7 +2608,7 @@ static int cm_apr_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2761,7 +2743,6 @@ static int cm_sidr_req_handler(struct cm_work *work) struct cm_id_private *cm_id_priv, *cur_cm_id_priv; struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; - unsigned long flags; cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) @@ -2782,10 +2763,10 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); goto out; /* Duplicate message. */ } cur_cm_id_priv = cm_find_listen(cm_id->device, @@ -2793,12 +2774,12 @@ static int cm_sidr_req_handler(struct cm_work *work) sidr_req_msg->private_data); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); /* todo: reply with no match */ goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; @@ -2899,7 +2880,6 @@ static int cm_sidr_rep_handler(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct cm_id_private *cm_id_priv; - unsigned long flags; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; @@ -2907,14 +2887,14 @@ static int cm_sidr_rep_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work); cm_process_work(cm_id_priv, work); @@ -2930,14 +2910,13 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, struct cm_id_private *cm_id_priv; struct ib_cm_event cm_event; enum ib_cm_state state; - unsigned long flags; int ret; memset(&cm_event, 0, sizeof cm_event); cm_id_priv = msg->context[0]; /* Discard old sends or ones without a response. */ - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); state = (enum ib_cm_state) (unsigned long) msg->context[1]; if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; @@ -2964,7 +2943,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, default: goto discard; } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ @@ -2974,7 +2953,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, ib_destroy_cm_id(&cm_id_priv->id); return; discard: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_free_msg(msg); } From 1d84612649427a85e1f311baa7215f9a6252d856 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Jun 2007 11:09:37 -0700 Subject: [PATCH 70/76] IB/cm: Include HCA ACK delay in local ACK timeout The IB CM should include the HCA ACK delay when calculating the local ACK timeout value to use for RC QPs. If the HCA ACK delay is large enough relative to the packet life time, then if it is not taken into account, the calculated timeout value ends up being too small, which can result in "retry exceeded" errors. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 71 ++++++++++++++++++++----- drivers/infiniband/core/cma.c | 1 - drivers/infiniband/core/ucm.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 - include/rdma/ib_cm.h | 1 - 5 files changed, 57 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 16181d655854..c7007c45889a 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -87,6 +87,7 @@ struct cm_port { struct cm_device { struct list_head list; struct ib_device *device; + u8 ack_delay; struct cm_port port[0]; }; @@ -95,7 +96,7 @@ struct cm_av { union ib_gid dgid; struct ib_ah_attr ah_attr; u16 pkey_index; - u8 packet_life_time; + u8 timeout; }; struct cm_work { @@ -154,6 +155,7 @@ struct cm_id_private { u8 retry_count; u8 rnr_retry_count; u8 service_timeout; + u8 target_ack_delay; struct list_head work_list; atomic_t work_count; @@ -293,7 +295,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) av->port = port; ib_init_ah_from_path(cm_dev->device, port->port_num, path, &av->ah_attr); - av->packet_life_time = path->packet_life_time; + av->timeout = path->packet_life_time + 1; return 0; } @@ -643,6 +645,25 @@ static inline int cm_convert_to_ms(int iba_time) return 1 << max(iba_time - 8, 0); } +/* + * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time + * Because of how ack_timeout is stored, adding one doubles the timeout. + * To avoid large timeouts, select the max(ack_delay, life_time + 1), and + * increment it (round up) only if the other is within 50%. + */ +static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) +{ + int ack_timeout = packet_life_time + 1; + + if (ack_timeout >= ca_ack_delay) + ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); + else + ack_timeout = ca_ack_delay + + (ack_timeout >= (ca_ack_delay - 1)); + + return min(31, ack_timeout); +} + static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { if (timewait_info->inserted_remote_id) { @@ -686,7 +707,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; - wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1); + wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); cm_id_priv->timewait_info = NULL; @@ -908,7 +929,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_primary_sl(req_msg, param->primary_path->sl); cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ cm_req_set_primary_local_ack_timeout(req_msg, - min(31, param->primary_path->packet_life_time + 1)); + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + param->primary_path->packet_life_time)); if (param->alternate_path) { req_msg->alt_local_lid = param->alternate_path->slid; @@ -923,7 +945,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_alt_sl(req_msg, param->alternate_path->sl); cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ cm_req_set_alt_local_ack_timeout(req_msg, - min(31, param->alternate_path->packet_life_time + 1)); + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + param->alternate_path->packet_life_time)); } if (param->private_data && param->private_data_len) @@ -1433,7 +1456,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; rep_msg->initiator_depth = param->initiator_depth; - cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay); + cm_rep_set_target_ack_delay(rep_msg, + cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); @@ -1680,6 +1704,13 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + cm_id_priv->av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->av.timeout - 1); + cm_id_priv->alt_av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->alt_av.timeout - 1); /* todo: handle peer_to_peer */ @@ -2291,7 +2322,7 @@ static int cm_mra_handler(struct cm_work *work) work->cm_event.param.mra_rcvd.service_timeout = cm_mra_get_service_timeout(mra_msg); timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + - cm_convert_to_ms(cm_id_priv->av.packet_life_time); + cm_convert_to_ms(cm_id_priv->av.timeout); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { @@ -2363,7 +2394,8 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, cm_lap_set_sl(lap_msg, alternate_path->sl); cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ cm_lap_set_local_ack_timeout(lap_msg, - min(31, alternate_path->packet_life_time + 1)); + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + alternate_path->packet_life_time)); if (private_data && private_data_len) memcpy(lap_msg->private_data, private_data, private_data_len); @@ -2394,6 +2426,9 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); if (ret) goto out; + cm_id_priv->alt_av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->alt_av.timeout - 1); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) @@ -3248,8 +3283,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; - qp_attr->alt_timeout = - cm_id_priv->alt_av.packet_life_time + 1; + qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; @@ -3287,8 +3321,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = - cm_id_priv->av.packet_life_time + 1; + qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; qp_attr->max_rd_atomic = @@ -3302,8 +3335,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; - qp_attr->alt_timeout = - cm_id_priv->alt_av.packet_life_time + 1; + qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } @@ -3343,6 +3375,16 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); +void cm_get_ack_delay(struct cm_device *cm_dev) +{ + struct ib_device_attr attr; + + if (ib_query_device(cm_dev->device, &attr)) + cm_dev->ack_delay = 0; /* acks will rely on packet life time */ + else + cm_dev->ack_delay = attr.local_ca_ack_delay; +} + static void cm_add_one(struct ib_device *device) { struct cm_device *cm_dev; @@ -3367,6 +3409,7 @@ static void cm_add_one(struct ib_device *device) return; cm_dev->device = device; + cm_get_ack_delay(cm_dev); set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= device->phys_port_cnt; i++) { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 32a0e66d2a23..23af7a032a03 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2326,7 +2326,6 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.private_data_len = conn_param->private_data_len; rep.responder_resources = conn_param->responder_resources; rep.initiator_depth = conn_param->initiator_depth; - rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = conn_param->rnr_retry_count; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 2586a3ee8eba..424983f5b1ee 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -823,7 +823,6 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, param.private_data_len = cmd.len; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; - param.target_ack_delay = cmd.target_ack_delay; param.failover_accepted = cmd.failover_accepted; param.flow_control = cmd.flow_control; param.rnr_retry_count = cmd.rnr_retry_count; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6764d216c887..08b4676a3820 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -281,7 +281,6 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, rep.private_data_len = sizeof data; rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; - rep.target_ack_delay = 20; /* FIXME */ rep.srq = 1; rep.qp_num = qp->qp_num; rep.starting_psn = psn; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 5c070176d9ab..12243e80c706 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -385,7 +385,6 @@ struct ib_cm_rep_param { u8 private_data_len; u8 responder_resources; u8 initiator_depth; - u8 target_ack_delay; u8 failover_accepted; u8 flow_control; u8 rnr_retry_count; From 5d861be8c87b8a8f8e0f58b3b8864f31da7ee9c3 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 28 Jun 2007 19:16:51 -0700 Subject: [PATCH 71/76] IB/cm: cm_msgs.h should include ib_cm.h cm_msgs.h uses definitions from ib_cm.h. Include it directly, rather than depending on a specific include order. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm_msgs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 4d3aee90c249..aec9c7af825d 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -35,6 +35,7 @@ #define CM_MSGS_H #include +#include /* * Parameters to routines below should be in network-byte order, and values From 29c2731cbfa9d5b560a73461775f2be03b46dd4c Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 2 Jul 2007 14:36:19 -0700 Subject: [PATCH 72/76] IB/cm: Fix handling of duplicate SIDR REQs Fix handling to duplicate SIDR REQs to avoid sending a reject if a duplicate is detected. Duplicates should just be silently discarded. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c7007c45889a..9135a8c1d4ac 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2794,7 +2794,6 @@ static int cm_sidr_req_handler(struct cm_work *work) work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; - cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); @@ -2804,6 +2803,7 @@ static int cm_sidr_req_handler(struct cm_work *work) spin_unlock_irq(&cm.lock); goto out; /* Duplicate message. */ } + cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cur_cm_id_priv = cm_find_listen(cm_id->device, sidr_req_msg->service_id, sidr_req_msg->private_data); From 6164c8cd1333403a28202f7c7e64ff9086d8f1aa Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 2 Jul 2007 16:14:14 -0700 Subject: [PATCH 73/76] IB/cm: Send no match if a SIDR REQ does not match a listen If a SIDR REQ does not match a listen, we should reply with status value 1 (service ID not supported), rather than dropping through to the default case of status 2 (rejected by service provider). Doing this also fixes a bug where the cm_id_priv is removed from the remote_sidr_table twice. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 9135a8c1d4ac..9820c67ba47d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2808,9 +2808,8 @@ static int cm_sidr_req_handler(struct cm_work *work) sidr_req_msg->service_id, sidr_req_msg->private_data); if (!cur_cm_id_priv) { - rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irq(&cm.lock); - /* todo: reply with no match */ + cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); From 6a775e2ba4f7635849ade628e64723ab2beef0bc Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 21 Jun 2007 12:27:47 +0300 Subject: [PATCH 74/76] IB/mlx4: Implement query QP Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 + drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 + drivers/infiniband/hw/mlx4/qp.c | 137 +++++++++++++++++++++++++++ drivers/net/mlx4/qp.c | 21 ++++ include/linux/mlx4/qp.h | 3 + 5 files changed, 165 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2fc8ccebaac1..6b9870a50bea 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -523,6 +523,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | @@ -550,6 +551,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; + ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 40b83914b7b2..d6dc57c5ccca 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -267,6 +267,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, int mlx4_ib_destroy_qp(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 28a08bdd1800..40042184ad58 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1455,3 +1455,140 @@ out: return err; } + +static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) +{ + switch (mlx4_state) { + case MLX4_QP_STATE_RST: return IB_QPS_RESET; + case MLX4_QP_STATE_INIT: return IB_QPS_INIT; + case MLX4_QP_STATE_RTR: return IB_QPS_RTR; + case MLX4_QP_STATE_RTS: return IB_QPS_RTS; + case MLX4_QP_STATE_SQ_DRAINING: + case MLX4_QP_STATE_SQD: return IB_QPS_SQD; + case MLX4_QP_STATE_SQER: return IB_QPS_SQE; + case MLX4_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) +{ + switch (mlx4_mig_state) { + case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX4_QP_PM_REARM: return IB_MIG_REARM; + case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx4_flags) +{ + int ib_flags = 0; + + if (mlx4_flags & MLX4_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx4_flags & MLX4_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx4_flags & MLX4_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, + struct mlx4_qp_path *path) +{ + memset(ib_ah_attr, 0, sizeof *path); + ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xffffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof ib_ah_attr->grh.dgid.raw); + } +} + +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_qp_context context; + int mlx4_state; + int err; + + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + err = mlx4_qp_query(dev->dev, &qp->mqp, &context); + if (err) + return -EINVAL; + + mlx4_state = be32_to_cpu(context.flags) >> 28; + + qp_attr->qp_state = to_ib_qp_state(mlx4_state); + qp_attr->path_mtu = context.mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context.qkey); + qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context.params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context.pri_path.ackto >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; + qp_attr->alt_timeout = context.alt_path.ackto >> 3; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - + send_wqe_overhead(qp->ibqp.qp_type) - + sizeof (struct mlx4_wqe_inline_seg); + qp_init_attr->cap = qp_attr->cap; + } + + return 0; +} + diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index 492cfaaaa75c..19b48c71cf7f 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -277,3 +277,24 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev) mlx4_CONF_SPECIAL_QP(dev, 0); mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap); } + +int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, + struct mlx4_qp_context *context) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0, + MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A); + if (!err) + memcpy(context, mailbox->buf + 8, sizeof *context); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_qp_query); + diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 10c57d279144..3968b943259a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -282,6 +282,9 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, int sqd_event, struct mlx4_qp *qp); +int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, + struct mlx4_qp_context *context); + static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); From 65541cb7cf353946ecd78016a453b453b8830656 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 21 Jun 2007 13:03:11 +0300 Subject: [PATCH 75/76] IB/mlx4: Implement query SRQ Signed-off-by: Dotan Barak Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 ++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/srq.c | 18 +++++++++++++++++ drivers/net/mlx4/srq.c | 30 ++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 1 + 5 files changed, 52 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 6b9870a50bea..dde8fe9af47e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -529,6 +529,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); ibdev->ib_dev.query_device = mlx4_ib_query_device; @@ -547,6 +548,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; ibdev->ib_dev.create_srq = mlx4_ib_create_srq; ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; + ibdev->ib_dev.query_srq = mlx4_ib_query_srq; ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d6dc57c5ccca..705ff2fa237e 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -256,6 +256,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 12fac1c8989d..408748fb5285 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -240,6 +240,24 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + int ret; + int limit_watermark; + + ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark); + if (ret) + return ret; + + srq_attr->srq_limit = be16_to_cpu(limit_watermark); + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + + return 0; +} + int mlx4_ib_destroy_srq(struct ib_srq *srq) { struct mlx4_ib_dev *dev = to_mdev(srq->device); diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c index 2134f83aed87..b061c86d6839 100644 --- a/drivers/net/mlx4/srq.c +++ b/drivers/net/mlx4/srq.c @@ -102,6 +102,13 @@ static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark) MLX4_CMD_TIME_CLASS_B); } +static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, + int srq_num) +{ + return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ, + MLX4_CMD_TIME_CLASS_A); +} + int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq) { @@ -205,6 +212,29 @@ int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark } EXPORT_SYMBOL_GPL(mlx4_srq_arm); +int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_srq_context *srq_context; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + + err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn); + if (err) + goto err_out; + *limit_watermark = srq_context->limit_watermark; + +err_out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_srq_query); + int __devinit mlx4_init_srq_table(struct mlx4_dev *dev) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8209387ee854..cfb78fb2c046 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -323,6 +323,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); +int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); From cec7c893d8654723028f09d33341e42673558057 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 12 Jul 2007 15:59:36 -0700 Subject: [PATCH 76/76] IB: Update MAINTAINERS with Hal's new email address Signed-off-by: Roland Dreier --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96a174b64e10..336edd9cb11b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1850,7 +1850,7 @@ M: rolandd@cisco.com P: Sean Hefty M: mshefty@ichips.intel.com P: Hal Rosenstock -M: halr@voltaire.com +M: hal.rosenstock@gmail.com L: general@lists.openfabrics.org W: http://www.openib.org/ T: git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git