diff --git a/.clang-format b/.clang-format index bc2ffb2a0b53..335ce29ab813 100644 --- a/.clang-format +++ b/.clang-format @@ -240,6 +240,7 @@ ForEachMacros: - 'for_each_set_bit' - 'for_each_set_bit_from' - 'for_each_sg' + - 'for_each_sg_dma_page' - 'for_each_sg_page' - 'for_each_sibling_event' - '__for_each_thread' diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 31786b200afc..a3357ff7540d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -311,7 +311,13 @@ static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter) static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter) { - return sg_page_iter_dma_address(&viter->iter); + /* + * FIXME: This driver wrongly mixes DMA and CPU SG list iteration and + * needs revision. See + * https://lore.kernel.org/lkml/20190104223531.GA1705@ziepe.ca/ + */ + return sg_page_iter_dma_address( + container_of(&viter->iter, struct sg_dma_page_iter, base)); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 7b04590f307f..2338d0b3a0ca 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1460,6 +1460,9 @@ void ib_cache_release_one(struct ib_device *device) { int p; + if (!device->cache.ports) + return; + /* * The release function frees all the cache elements. * This function should be called as part of freeing diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e15546ae4d0f..c43512752b8a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -888,6 +888,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; id_priv->tos_set = false; + id_priv->timeout_set = false; id_priv->gid_type = IB_GID_TYPE_IB; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); @@ -1130,6 +1131,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } else ret = -ENOSYS; + if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set) + qp_attr->timeout = id_priv->timeout; + return ret; } EXPORT_SYMBOL(rdma_init_qp_attr); @@ -2410,6 +2414,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) return PTR_ERR(id); id->tos = id_priv->tos; + id->tos_set = id_priv->tos_set; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), @@ -2462,6 +2467,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; + dev_id_priv->tos_set = id_priv->tos_set; + dev_id_priv->tos = id_priv->tos; ret = rdma_listen(id, id_priv->backlog); if (ret) @@ -2490,6 +2497,34 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos) } EXPORT_SYMBOL(rdma_set_service_type); +/** + * rdma_set_ack_timeout() - Set the ack timeout of QP associated + * with a connection identifier. + * @id: Communication identifier to associated with service type. + * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec. + * + * This function should be called before rdma_connect() on active side, + * and on passive side before rdma_accept(). It is applicable to primary + * path only. The timeout will affect the local side of the QP, it is not + * negotiated with remote side and zero disables the timer. + * + * Return: 0 for success + */ +int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) +{ + struct rdma_id_private *id_priv; + + if (id->qp_type != IB_QPT_RC) + return -EINVAL; + + id_priv = container_of(id, struct rdma_id_private, id); + id_priv->timeout = timeout; + id_priv->timeout_set = true; + + return 0; +} +EXPORT_SYMBOL(rdma_set_ack_timeout); + static void cma_query_handler(int status, struct sa_path_rec *path_rec, void *context) { @@ -3809,6 +3844,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, return PTR_ERR(cm_id); cm_id->tos = id_priv->tos; + cm_id->tos_set = id_priv->tos_set; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index cf47c69436a7..ca7307277518 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -84,9 +84,11 @@ struct rdma_id_private { u32 options; u8 srq; u8 tos; - bool tos_set; + u8 tos_set:1; + u8 timeout_set:1; u8 reuseaddr; u8 afonly; + u8 timeout; enum ib_gid_type gid_type; /* diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index bcb3e3029a9b..a1826f4c2e23 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -181,7 +181,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device, u64 *sn_pfx); #ifdef CONFIG_SECURITY_INFINIBAND -void ib_security_destroy_port_pkey_list(struct ib_device *device); +void ib_security_release_port_pkey_list(struct ib_device *device); void ib_security_cache_change(struct ib_device *device, u8 port_num, @@ -202,8 +202,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, enum ib_qp_type qp_type); void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent); int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index); +void ib_mad_agent_security_change(void); #else -static inline void ib_security_destroy_port_pkey_list(struct ib_device *device) +static inline void ib_security_release_port_pkey_list(struct ib_device *device) { } @@ -267,6 +268,10 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, { return 0; } + +static inline void ib_mad_agent_security_change(void) +{ +} #endif struct ib_device *ib_device_get_by_index(u32 ifindex); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 55221990d946..3325be4f91a5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -51,39 +50,88 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); MODULE_LICENSE("Dual BSD/GPL"); -struct ib_client_data { - struct list_head list; - struct ib_client *client; - void * data; - /* The device or client is going down. Do not call client or device - * callbacks other than remove(). */ - bool going_down; -}; - struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); -/* The device_list and client_list contain devices and clients after their - * registration has completed, and the devices and clients are removed - * during unregistration. */ -static LIST_HEAD(device_list); -static LIST_HEAD(client_list); +/* + * Each of the three rwsem locks (devices, clients, client_data) protects the + * xarray of the same name. Specifically it allows the caller to assert that + * the MARK will/will not be changing under the lock, and for devices and + * clients, that the value in the xarray is still a valid pointer. Change of + * the MARK is linked to the object state, so holding the lock and testing the + * MARK also asserts that the contained object is in a certain state. + * + * This is used to build a two stage register/unregister flow where objects + * can continue to be in the xarray even though they are still in progress to + * register/unregister. + * + * The xarray itself provides additional locking, and restartable iteration, + * which is also relied on. + * + * Locks should not be nested, with the exception of client_data, which is + * allowed to nest under the read side of the other two locks. + * + * The devices_rwsem also protects the device name list, any change or + * assignment of device name must also hold the write side to guarantee unique + * names. + */ /* - * device_mutex and lists_rwsem protect access to both device_list and - * client_list. device_mutex protects writer access by device and client - * registration / de-registration. lists_rwsem protects reader access to - * these lists. Iterators of these lists must lock it for read, while updates - * to the lists must be done with a write lock. A special case is when the - * device_mutex is locked. In this case locking the lists for read access is - * not necessary as the device_mutex implies it. + * devices contains devices that have had their names assigned. The + * devices may not be registered. Users that care about the registration + * status need to call ib_device_try_get() on the device to ensure it is + * registered, and keep it registered, for the required duration. * - * lists_rwsem also protects access to the client data list. */ -static DEFINE_MUTEX(device_mutex); -static DECLARE_RWSEM(lists_rwsem); +static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); +static DECLARE_RWSEM(devices_rwsem); +#define DEVICE_REGISTERED XA_MARK_1 + +static LIST_HEAD(client_list); +#define CLIENT_REGISTERED XA_MARK_1 +static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); +static DECLARE_RWSEM(clients_rwsem); + +/* + * If client_data is registered then the corresponding client must also still + * be registered. + */ +#define CLIENT_DATA_REGISTERED XA_MARK_1 +/* + * xarray has this behavior where it won't iterate over NULL values stored in + * allocated arrays. So we need our own iterator to see all values stored in + * the array. This does the same thing as xa_for_each except that it also + * returns NULL valued entries if the array is allocating. Simplified to only + * work on simple xarrays. + */ +static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, + xa_mark_t filter) +{ + XA_STATE(xas, xa, *indexp); + void *entry; + + rcu_read_lock(); + do { + entry = xas_find_marked(&xas, ULONG_MAX, filter); + if (xa_is_zero(entry)) + break; + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + if (entry) { + *indexp = xas.xa_index; + if (xa_is_zero(entry)) + return NULL; + return entry; + } + return XA_ERROR(-ENOENT); +} +#define xan_for_each_marked(xa, index, entry, filter) \ + for (index = 0, entry = xan_find_marked(xa, &(index), filter); \ + !xa_is_err(entry); \ + (index)++, entry = xan_find_marked(xa, &(index), filter)) static int ib_security_change(struct notifier_block *nb, unsigned long event, void *lsm_data); @@ -133,17 +181,6 @@ static int ib_device_check_mandatory(struct ib_device *device) return 0; } -static struct ib_device *__ib_device_get_by_index(u32 index) -{ - struct ib_device *device; - - list_for_each_entry(device, &device_list, core_list) - if (device->index == index) - return device; - - return NULL; -} - /* * Caller must perform ib_device_put() to return the device reference count * when ib_device_get_by_index() returns valid device pointer. @@ -152,13 +189,13 @@ struct ib_device *ib_device_get_by_index(u32 index) { struct ib_device *device; - down_read(&lists_rwsem); - device = __ib_device_get_by_index(index); + down_read(&devices_rwsem); + device = xa_load(&devices, index); if (device) { if (!ib_device_try_get(device)) device = NULL; } - up_read(&lists_rwsem); + up_read(&devices_rwsem); return device; } @@ -179,8 +216,9 @@ EXPORT_SYMBOL(ib_device_put); static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; + unsigned long index; - list_for_each_entry(device, &device_list, core_list) + xa_for_each (&devices, index, device) if (!strcmp(name, dev_name(&device->dev))) return device; @@ -189,12 +227,14 @@ static struct ib_device *__ib_device_get_by_name(const char *name) int ib_device_rename(struct ib_device *ibdev, const char *name) { - int ret = 0; + int ret; - if (!strcmp(name, dev_name(&ibdev->dev))) - return ret; + down_write(&devices_rwsem); + if (!strcmp(name, dev_name(&ibdev->dev))) { + ret = 0; + goto out; + } - mutex_lock(&device_mutex); if (__ib_device_get_by_name(name)) { ret = -EEXIST; goto out; @@ -205,52 +245,56 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) goto out; strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); out: - mutex_unlock(&device_mutex); + up_write(&devices_rwsem); return ret; } static int alloc_name(struct ib_device *ibdev, const char *name) { - unsigned long *inuse; struct ib_device *device; + unsigned long index; + struct ida inuse; + int rc; int i; - inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!inuse) - return -ENOMEM; - - list_for_each_entry(device, &device_list, core_list) { + lockdep_assert_held_exclusive(&devices_rwsem); + ida_init(&inuse); + xa_for_each (&devices, index, device) { char buf[IB_DEVICE_NAME_MAX]; if (sscanf(dev_name(&device->dev), name, &i) != 1) continue; - if (i < 0 || i >= PAGE_SIZE * 8) + if (i < 0 || i >= INT_MAX) continue; snprintf(buf, sizeof buf, name, i); - if (!strcmp(buf, dev_name(&device->dev))) - set_bit(i, inuse); + if (strcmp(buf, dev_name(&device->dev)) != 0) + continue; + + rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL); + if (rc < 0) + goto out; } - i = find_first_zero_bit(inuse, PAGE_SIZE * 8); - free_page((unsigned long) inuse); + rc = ida_alloc(&inuse, GFP_KERNEL); + if (rc < 0) + goto out; - return dev_set_name(&ibdev->dev, name, i); + rc = dev_set_name(&ibdev->dev, name, rc); +out: + ida_destroy(&inuse); + return rc; } static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); - WARN_ON(dev->reg_state == IB_DEV_REGISTERED); - if (dev->reg_state == IB_DEV_UNREGISTERED) { - /* - * In IB_DEV_UNINITIALIZED state, cache or port table - * is not even created. Free cache and port table only when - * device reaches UNREGISTERED state. - */ - ib_cache_release_one(dev); - kfree(dev->port_immutable); - } + WARN_ON(refcount_read(&dev->refcount)); + ib_cache_release_one(dev); + ib_security_release_port_pkey_list(dev); + kfree(dev->port_pkey_list); + kfree(dev->port_immutable); + xa_destroy(&dev->client_data); kfree(dev); } @@ -301,8 +345,12 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); - rwlock_init(&device->client_data_lock); - INIT_LIST_HEAD(&device->client_data_list); + /* + * client_data needs to be alloc because we don't want our mark to be + * destroyed if the user stores NULL in the client data. + */ + xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); + init_rwsem(&device->client_data_rwsem); INIT_LIST_HEAD(&device->port_list); init_completion(&device->unreg_completion); @@ -318,36 +366,93 @@ EXPORT_SYMBOL(_ib_alloc_device); */ void ib_dealloc_device(struct ib_device *device) { - WARN_ON(!list_empty(&device->client_data_list)); - WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && - device->reg_state != IB_DEV_UNINITIALIZED); + WARN_ON(!xa_empty(&device->client_data)); + WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); put_device(&device->dev); } EXPORT_SYMBOL(ib_dealloc_device); -static int add_client_context(struct ib_device *device, struct ib_client *client) +/* + * add_client_context() and remove_client_context() must be safe against + * parallel calls on the same device - registration/unregistration of both the + * device and client can be occurring in parallel. + * + * The routines need to be a fence, any caller must not return until the add + * or remove is fully completed. + */ +static int add_client_context(struct ib_device *device, + struct ib_client *client) { - struct ib_client_data *context; + int ret = 0; if (!device->kverbs_provider && !client->no_kverbs_req) - return -EOPNOTSUPP; + return 0; - context = kmalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; + down_write(&device->client_data_rwsem); + /* + * Another caller to add_client_context got here first and has already + * completely initialized context. + */ + if (xa_get_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED)) + goto out; - context->client = client; - context->data = NULL; - context->going_down = false; - - down_write(&lists_rwsem); - write_lock_irq(&device->client_data_lock); - list_add(&context->list, &device->client_data_list); - write_unlock_irq(&device->client_data_lock); - up_write(&lists_rwsem); + ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, + GFP_KERNEL)); + if (ret) + goto out; + downgrade_write(&device->client_data_rwsem); + if (client->add) + client->add(device); + /* Readers shall not see a client until add has been completed */ + xa_set_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED); + up_read(&device->client_data_rwsem); return 0; + +out: + up_write(&device->client_data_rwsem); + return ret; +} + +static void remove_client_context(struct ib_device *device, + unsigned int client_id) +{ + struct ib_client *client; + void *client_data; + + down_write(&device->client_data_rwsem); + if (!xa_get_mark(&device->client_data, client_id, + CLIENT_DATA_REGISTERED)) { + up_write(&device->client_data_rwsem); + return; + } + client_data = xa_load(&device->client_data, client_id); + xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); + client = xa_load(&clients, client_id); + downgrade_write(&device->client_data_rwsem); + + /* + * Notice we cannot be holding any exclusive locks when calling the + * remove callback as the remove callback can recurse back into any + * public functions in this module and thus try for any locks those + * functions take. + * + * For this reason clients and drivers should not call the + * unregistration functions will holdling any locks. + * + * It tempting to drop the client_data_rwsem too, but this is required + * to ensure that unregister_client does not return until all clients + * are completely unregistered, which is required to avoid module + * unloading races. + */ + if (client->remove) + client->remove(device, client_data); + + xa_erase(&device->client_data, client_id); + up_read(&device->client_data_rwsem); } static int verify_immutable(const struct ib_device *dev, u8 port) @@ -424,9 +529,10 @@ static int setup_port_pkey_list(struct ib_device *device) static void ib_policy_change_task(struct work_struct *work) { struct ib_device *dev; + unsigned long index; - down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) { + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { int i; for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) { @@ -442,7 +548,7 @@ static void ib_policy_change_task(struct work_struct *work) ib_security_cache_change(dev, i, sp); } } - up_read(&lists_rwsem); + up_read(&devices_rwsem); } static int ib_security_change(struct notifier_block *nb, unsigned long event, @@ -452,32 +558,58 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; schedule_work(&ib_policy_change_work); + ib_mad_agent_security_change(); return NOTIFY_OK; } -/** - * __dev_new_index - allocate an device index - * - * Returns a suitable unique value for a new device interface - * number. It assumes that there are less than 2^32-1 ib devices - * will be present in the system. +/* + * Assign the unique string device name and the unique device index. */ -static u32 __dev_new_index(void) +static int assign_name(struct ib_device *device, const char *name) { - /* - * The device index to allow stable naming. - * Similar to struct net -> ifindex. - */ - static u32 index; + static u32 last_id; + int ret; - for (;;) { - if (!(++index)) - index = 1; + down_write(&devices_rwsem); + /* Assign a unique name to the device */ + if (strchr(name, '%')) + ret = alloc_name(device, name); + else + ret = dev_set_name(&device->dev, name); + if (ret) + goto out; - if (!__ib_device_get_by_index(index)) - return index; + if (__ib_device_get_by_name(dev_name(&device->dev))) { + ret = -ENFILE; + goto out; } + strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + + /* Cyclically allocate a user visible ID for the device */ + device->index = last_id; + ret = xa_alloc(&devices, &device->index, INT_MAX, device, GFP_KERNEL); + if (ret == -ENOSPC) { + device->index = 0; + ret = xa_alloc(&devices, &device->index, INT_MAX, device, + GFP_KERNEL); + } + if (ret) + goto out; + last_id = device->index + 1; + + ret = 0; + +out: + up_write(&devices_rwsem); + return ret; +} + +static void release_name(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_erase(&devices, device->index); + up_write(&devices_rwsem); } static void setup_dma_device(struct ib_device *device) @@ -515,19 +647,18 @@ static void setup_dma_device(struct ib_device *device) } } -static void cleanup_device(struct ib_device *device) -{ - ib_cache_cleanup_one(device); - ib_cache_release_one(device); - kfree(device->port_pkey_list); - kfree(device->port_immutable); -} - +/* + * setup_device() allocates memory and sets up data that requires calling the + * device ops, this is the only reason these actions are not done during + * ib_alloc_device. It is undone by ib_dealloc_device(). + */ static int setup_device(struct ib_device *device) { struct ib_udata uhw = {.outlen = 0, .inlen = 0}; int ret; + setup_dma_device(device); + ret = ib_device_check_mandatory(device); if (ret) return ret; @@ -544,28 +675,64 @@ static int setup_device(struct ib_device *device) if (ret) { dev_warn(&device->dev, "Couldn't query the device attributes\n"); - goto port_cleanup; + return ret; } ret = setup_port_pkey_list(device); if (ret) { dev_warn(&device->dev, "Couldn't create per port_pkey_list\n"); - goto port_cleanup; + return ret; } - ret = ib_cache_setup_one(device); - if (ret) { - dev_warn(&device->dev, - "Couldn't set up InfiniBand P_Key/GID cache\n"); - goto pkey_cleanup; - } return 0; +} -pkey_cleanup: - kfree(device->port_pkey_list); -port_cleanup: - kfree(device->port_immutable); - return ret; +static void disable_device(struct ib_device *device) +{ + struct ib_client *client; + + WARN_ON(!refcount_read(&device->refcount)); + + down_write(&devices_rwsem); + xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); + up_write(&devices_rwsem); + + down_read(&clients_rwsem); + list_for_each_entry_reverse(client, &client_list, list) + remove_client_context(device, client->client_id); + up_read(&clients_rwsem); + + /* Pairs with refcount_set in enable_device */ + ib_device_put(device); + wait_for_completion(&device->unreg_completion); +} + +/* + * An enabled device is visible to all clients and to all the public facing + * APIs that return a device pointer. + */ +static int enable_device(struct ib_device *device) +{ + struct ib_client *client; + unsigned long index; + int ret; + + refcount_set(&device->refcount, 1); + down_write(&devices_rwsem); + xa_set_mark(&devices, device->index, DEVICE_REGISTERED); + up_write(&devices_rwsem); + + down_read(&clients_rwsem); + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { + ret = add_client_context(device, client); + if (ret) { + up_read(&clients_rwsem); + disable_device(device); + return ret; + } + } + up_read(&clients_rwsem); + return 0; } /** @@ -580,32 +747,21 @@ port_cleanup: int ib_register_device(struct ib_device *device, const char *name) { int ret; - struct ib_client *client; - setup_dma_device(device); - - mutex_lock(&device_mutex); - - if (strchr(name, '%')) { - ret = alloc_name(device, name); - if (ret) - goto out; - } else { - ret = dev_set_name(&device->dev, name); - if (ret) - goto out; - } - if (__ib_device_get_by_name(dev_name(&device->dev))) { - ret = -ENFILE; - goto out; - } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + ret = assign_name(device, name); + if (ret) + return ret; ret = setup_device(device); if (ret) goto out; - device->index = __dev_new_index(); + ret = ib_cache_setup_one(device); + if (ret) { + dev_warn(&device->dev, + "Couldn't set up InfiniBand P_Key/GID cache\n"); + goto out; + } ib_device_register_rdmacg(device); @@ -616,24 +772,19 @@ int ib_register_device(struct ib_device *device, const char *name) goto cg_cleanup; } - refcount_set(&device->refcount, 1); - device->reg_state = IB_DEV_REGISTERED; + ret = enable_device(device); + if (ret) + goto sysfs_cleanup; - list_for_each_entry(client, &client_list, list) - if (!add_client_context(device, client) && client->add) - client->add(device); - - down_write(&lists_rwsem); - list_add_tail(&device->core_list, &device_list); - up_write(&lists_rwsem); - mutex_unlock(&device_mutex); return 0; +sysfs_cleanup: + ib_device_unregister_sysfs(device); cg_cleanup: ib_device_unregister_rdmacg(device); - cleanup_device(device); + ib_cache_cleanup_one(device); out: - mutex_unlock(&device_mutex); + release_name(device); return ret; } EXPORT_SYMBOL(ib_register_device); @@ -646,56 +797,45 @@ EXPORT_SYMBOL(ib_register_device); */ void ib_unregister_device(struct ib_device *device) { - struct ib_client_data *context, *tmp; - unsigned long flags; - - /* - * Wait for all netlink command callers to finish working on the - * device. - */ - ib_device_put(device); - wait_for_completion(&device->unreg_completion); - - mutex_lock(&device_mutex); - - down_write(&lists_rwsem); - list_del(&device->core_list); - write_lock_irq(&device->client_data_lock); - list_for_each_entry(context, &device->client_data_list, list) - context->going_down = true; - write_unlock_irq(&device->client_data_lock); - downgrade_write(&lists_rwsem); - - list_for_each_entry(context, &device->client_data_list, list) { - if (context->client->remove) - context->client->remove(device, context->data); - } - up_read(&lists_rwsem); - + disable_device(device); ib_device_unregister_sysfs(device); ib_device_unregister_rdmacg(device); - - mutex_unlock(&device_mutex); - ib_cache_cleanup_one(device); - - ib_security_destroy_port_pkey_list(device); - kfree(device->port_pkey_list); - - down_write(&lists_rwsem); - write_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, - list) { - list_del(&context->list); - kfree(context); - } - write_unlock_irqrestore(&device->client_data_lock, flags); - up_write(&lists_rwsem); - - device->reg_state = IB_DEV_UNREGISTERED; + release_name(device); } EXPORT_SYMBOL(ib_unregister_device); +static int assign_client_id(struct ib_client *client) +{ + int ret; + + down_write(&clients_rwsem); + /* + * The add/remove callbacks must be called in FIFO/LIFO order. To + * achieve this we assign client_ids so they are sorted in + * registration order, and retain a linked list we can reverse iterate + * to get the LIFO order. The extra linked list can go away if xarray + * learns to reverse iterate. + */ + if (list_empty(&client_list)) + client->client_id = 0; + else + client->client_id = + list_last_entry(&client_list, struct ib_client, list) + ->client_id; + ret = xa_alloc(&clients, &client->client_id, INT_MAX, client, + GFP_KERNEL); + if (ret) + goto out; + + xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); + list_add_tail(&client->list, &client_list); + +out: + up_write(&clients_rwsem); + return ret; +} + /** * ib_register_client - Register an IB client * @client:Client to register @@ -712,19 +852,23 @@ EXPORT_SYMBOL(ib_unregister_device); int ib_register_client(struct ib_client *client) { struct ib_device *device; + unsigned long index; + int ret; - mutex_lock(&device_mutex); - - list_for_each_entry(device, &device_list, core_list) - if (!add_client_context(device, client) && client->add) - client->add(device); - - down_write(&lists_rwsem); - list_add_tail(&client->list, &client_list); - up_write(&lists_rwsem); - - mutex_unlock(&device_mutex); + ret = assign_client_id(client); + if (ret) + return ret; + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { + ret = add_client_context(device, client); + if (ret) { + up_read(&devices_rwsem); + ib_unregister_client(client); + return ret; + } + } + up_read(&devices_rwsem); return 0; } EXPORT_SYMBOL(ib_register_client); @@ -736,108 +880,56 @@ EXPORT_SYMBOL(ib_register_client); * Upper level users use ib_unregister_client() to remove their client * registration. When ib_unregister_client() is called, the client * will receive a remove callback for each IB device still registered. + * + * This is a full fence, once it returns no client callbacks will be called, + * or are running in another thread. */ void ib_unregister_client(struct ib_client *client) { - struct ib_client_data *context; struct ib_device *device; + unsigned long index; - mutex_lock(&device_mutex); + down_write(&clients_rwsem); + xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); + up_write(&clients_rwsem); + /* + * Every device still known must be serialized to make sure we are + * done with the client callbacks before we return. + */ + down_read(&devices_rwsem); + xa_for_each (&devices, index, device) + remove_client_context(device, client->client_id); + up_read(&devices_rwsem); - down_write(&lists_rwsem); + down_write(&clients_rwsem); list_del(&client->list); - up_write(&lists_rwsem); - - list_for_each_entry(device, &device_list, core_list) { - struct ib_client_data *found_context = NULL; - - down_write(&lists_rwsem); - write_lock_irq(&device->client_data_lock); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - context->going_down = true; - found_context = context; - break; - } - write_unlock_irq(&device->client_data_lock); - up_write(&lists_rwsem); - - if (client->remove) - client->remove(device, found_context ? - found_context->data : NULL); - - if (!found_context) { - dev_warn(&device->dev, - "No client context found for %s\n", - client->name); - continue; - } - - down_write(&lists_rwsem); - write_lock_irq(&device->client_data_lock); - list_del(&found_context->list); - write_unlock_irq(&device->client_data_lock); - up_write(&lists_rwsem); - kfree(found_context); - } - - mutex_unlock(&device_mutex); + xa_erase(&clients, client->client_id); + up_write(&clients_rwsem); } EXPORT_SYMBOL(ib_unregister_client); -/** - * ib_get_client_data - Get IB client context - * @device:Device to get context for - * @client:Client to get context for - * - * ib_get_client_data() returns client context set with - * ib_set_client_data(). - */ -void *ib_get_client_data(struct ib_device *device, struct ib_client *client) -{ - struct ib_client_data *context; - void *ret = NULL; - unsigned long flags; - - read_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - ret = context->data; - break; - } - read_unlock_irqrestore(&device->client_data_lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_get_client_data); - /** * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set * - * ib_set_client_data() sets client context that can be retrieved with - * ib_get_client_data(). + * ib_set_client_data() sets client context data that can be retrieved with + * ib_get_client_data(). This can only be called while the client is + * registered to the device, once the ib_client remove() callback returns this + * cannot be called. */ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data) { - struct ib_client_data *context; - unsigned long flags; + void *rc; - write_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - context->data = data; - goto out; - } + if (WARN_ON(IS_ERR(data))) + data = NULL; - dev_warn(&device->dev, "No client context found for %s\n", - client->name); - -out: - write_unlock_irqrestore(&device->client_data_lock, flags); + rc = xa_store(&device->client_data, client->client_id, data, + GFP_KERNEL); + WARN_ON(xa_is_err(rc)); } EXPORT_SYMBOL(ib_set_client_data); @@ -995,11 +1087,12 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, void *cookie) { struct ib_device *dev; + unsigned long index; - down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); - up_read(&lists_rwsem); + up_read(&devices_rwsem); } /** @@ -1011,19 +1104,19 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, struct netlink_callback *cb) { + unsigned long index; struct ib_device *dev; unsigned int idx = 0; int ret = 0; - down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) { + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { ret = nldev_cb(dev, skb, cb, idx); if (ret) break; idx++; } - - up_read(&lists_rwsem); + up_read(&devices_rwsem); return ret; } @@ -1181,6 +1274,7 @@ EXPORT_SYMBOL(ib_find_pkey); * @gid: A GID that the net_dev uses to communicate. * @addr: Contains the IP address that the request specified as its * destination. + * */ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, @@ -1189,29 +1283,30 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, const struct sockaddr *addr) { struct net_device *net_dev = NULL; - struct ib_client_data *context; + unsigned long index; + void *client_data; if (!rdma_protocol_ib(dev, port)) return NULL; - down_read(&lists_rwsem); + /* + * Holding the read side guarantees that the client will not become + * unregistered while we are calling get_net_dev_by_params() + */ + down_read(&dev->client_data_rwsem); + xan_for_each_marked (&dev->client_data, index, client_data, + CLIENT_DATA_REGISTERED) { + struct ib_client *client = xa_load(&clients, index); - list_for_each_entry(context, &dev->client_data_list, list) { - struct ib_client *client = context->client; - - if (context->going_down) + if (!client || !client->get_net_dev_by_params) continue; - if (client->get_net_dev_by_params) { - net_dev = client->get_net_dev_by_params(dev, port, pkey, - gid, addr, - context->data); - if (net_dev) - break; - } + net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, + addr, client_data); + if (net_dev) + break; } - - up_read(&lists_rwsem); + up_read(&dev->client_data_rwsem); return net_dev; } @@ -1227,6 +1322,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) (ptr)->name = ops->name; \ } while (0) +#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) + SET_DEVICE_OP(dev_ops, add_gid); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); @@ -1316,6 +1413,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_DEVICE_OP(dev_ops, unmap_fmr); + + SET_OBJ_SIZE(dev_ops, ib_pd); } EXPORT_SYMBOL(ib_set_device_ops); @@ -1434,6 +1533,8 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); + WARN_ON(!xa_empty(&clients)); + WARN_ON(!xa_empty(&devices)); } MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1efadbccf394..dad6a94a43f3 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -39,6 +39,10 @@ #include "core_priv.h" #include "mad_priv.h" +static LIST_HEAD(mad_agent_list); +/* Lock to protect mad_agent_list */ +static DEFINE_SPINLOCK(mad_agent_list_lock); + static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp) { struct pkey_index_qp_list *pkey = NULL; @@ -554,13 +558,12 @@ void ib_security_cache_change(struct ib_device *device, } } -void ib_security_destroy_port_pkey_list(struct ib_device *device) +void ib_security_release_port_pkey_list(struct ib_device *device) { struct pkey_index_qp_list *pkey, *tmp_pkey; int i; for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { - spin_lock(&device->port_pkey_list[i].list_lock); list_for_each_entry_safe(pkey, tmp_pkey, &device->port_pkey_list[i].pkey_list, @@ -568,7 +571,6 @@ void ib_security_destroy_port_pkey_list(struct ib_device *device) list_del(&pkey->pkey_index_list); kfree(pkey); } - spin_unlock(&device->port_pkey_list[i].list_lock); } } @@ -676,19 +678,18 @@ static int ib_security_pkey_access(struct ib_device *dev, return security_ib_pkey_access(sec, subnet_prefix, pkey); } -static int ib_mad_agent_security_change(struct notifier_block *nb, - unsigned long event, - void *data) +void ib_mad_agent_security_change(void) { - struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb); + struct ib_mad_agent *ag; - if (event != LSM_POLICY_CHANGE) - return NOTIFY_DONE; - - ag->smp_allowed = !security_ib_endport_manage_subnet( - ag->security, dev_name(&ag->device->dev), ag->port_num); - - return NOTIFY_OK; + spin_lock(&mad_agent_list_lock); + list_for_each_entry(ag, + &mad_agent_list, + mad_agent_sec_list) + WRITE_ONCE(ag->smp_allowed, + !security_ib_endport_manage_subnet(ag->security, + dev_name(&ag->device->dev), ag->port_num)); + spin_unlock(&mad_agent_list_lock); } int ib_mad_agent_security_setup(struct ib_mad_agent *agent, @@ -699,6 +700,8 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, if (!rdma_protocol_ib(agent->device, agent->port_num)) return 0; + INIT_LIST_HEAD(&agent->mad_agent_sec_list); + ret = security_ib_alloc_security(&agent->security); if (ret) return ret; @@ -706,20 +709,22 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, if (qp_type != IB_QPT_SMI) return 0; + spin_lock(&mad_agent_list_lock); ret = security_ib_endport_manage_subnet(agent->security, dev_name(&agent->device->dev), agent->port_num); if (ret) - return ret; + goto free_security; - agent->lsm_nb.notifier_call = ib_mad_agent_security_change; - ret = register_lsm_notifier(&agent->lsm_nb); - if (ret) - return ret; - - agent->smp_allowed = true; - agent->lsm_nb_reg = true; + WRITE_ONCE(agent->smp_allowed, true); + list_add(&agent->mad_agent_sec_list, &mad_agent_list); + spin_unlock(&mad_agent_list_lock); return 0; + +free_security: + spin_unlock(&mad_agent_list_lock); + security_ib_free_security(agent->security); + return ret; } void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) @@ -727,9 +732,13 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) if (!rdma_protocol_ib(agent->device, agent->port_num)) return; + if (agent->qp->qp_type == IB_QPT_SMI) { + spin_lock(&mad_agent_list_lock); + list_del(&agent->mad_agent_sec_list); + spin_unlock(&mad_agent_list_lock); + } + security_ib_free_security(agent->security); - if (agent->lsm_nb_reg) - unregister_lsm_notifier(&agent->lsm_nb); } int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) @@ -738,7 +747,7 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) return 0; if (map->agent.qp->qp_type == IB_QPT_SMI) { - if (!map->agent.smp_allowed) + if (!READ_ONCE(map->agent.smp_allowed)) return -EACCES; return 0; } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 01d68ed46c1b..7468b26b8a01 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1236,6 +1236,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, } ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); break; + case RDMA_OPTION_ID_ACK_TIMEOUT: + if (optlen != sizeof(u8)) { + ret = -EINVAL; + break; + } + ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); + break; default: ret = -ENOSYS; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index aa260cafbd85..5ac143f22df0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -407,9 +407,9 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); + pd = rdma_zalloc_drv_obj(ib_dev, ib_pd); + if (!pd) { + ret = -ENOMEM; goto err; } @@ -417,11 +417,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) pd->uobject = uobj; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); + pd->res.type = RDMA_RESTRACK_PD; + + ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata); + if (ret) + goto err_alloc; uobj->object = pd; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; - pd->res.type = RDMA_RESTRACK_PD; rdma_restrack_uadd(&pd->res); ret = uverbs_response(attrs, &resp, sizeof(resp)); @@ -432,7 +436,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) err_copy: ib_dealloc_pd(pd); - +err_alloc: + kfree(pd); err: uobj_alloc_abort(uobj); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index cbc72312eb41..f224cb727224 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -188,7 +188,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject, if (ret) return ret; - ib_dealloc_pd((struct ib_pd *)uobject->object); + ib_dealloc_pd(pd); return 0; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3220fb42ecce..de5d895a5054 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -254,10 +254,11 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, { struct ib_pd *pd; int mr_access_flags = 0; + int ret; - pd = device->ops.alloc_pd(device, NULL, NULL); - if (IS_ERR(pd)) - return pd; + pd = rdma_zalloc_drv_obj(device, ib_pd); + if (!pd) + return ERR_PTR(-ENOMEM); pd->device = device; pd->uobject = NULL; @@ -265,6 +266,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, atomic_set(&pd->usecnt, 0); pd->flags = flags; + pd->res.type = RDMA_RESTRACK_PD; + rdma_restrack_set_task(&pd->res, caller); + + ret = device->ops.alloc_pd(pd, NULL, NULL); + if (ret) { + kfree(pd); + return ERR_PTR(ret); + } + rdma_restrack_kadd(&pd->res); + if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else @@ -275,10 +286,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; } - pd->res.type = RDMA_RESTRACK_PD; - rdma_restrack_set_task(&pd->res, caller); - rdma_restrack_kadd(&pd->res); - if (mr_access_flags) { struct ib_mr *mr; @@ -329,10 +336,8 @@ void ib_dealloc_pd(struct ib_pd *pd) WARN_ON(atomic_read(&pd->usecnt)); rdma_restrack_del(&pd->res); - /* Making delalloc_pd a void return is a WIP, no driver should return - an error here. */ - ret = pd->device->ops.dealloc_pd(pd); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); + pd->device->ops.dealloc_pd(pd); + kfree(pd); } EXPORT_SYMBOL(ib_dealloc_pd); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1d7469e23cde..2ed778683c6b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -563,41 +563,29 @@ fail: } /* Protection Domains */ -int bnxt_re_dealloc_pd(struct ib_pd *ib_pd) +void bnxt_re_dealloc_pd(struct ib_pd *ib_pd) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; - int rc; bnxt_re_destroy_fence_mr(pd); - if (pd->qplib_pd.id) { - rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res, - &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); - if (rc) - dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD"); - } - - kfree(pd); - return 0; + if (pd->qplib_pd.id) + bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, + &pd->qplib_pd); } -struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - struct ib_udata *udata) +int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_re_ucontext *ucntx = container_of(ucontext, struct bnxt_re_ucontext, ib_uctx); - struct bnxt_re_pd *pd; + struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); int rc; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - pd->rdev = rdev; if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD"); @@ -637,13 +625,12 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, if (bnxt_re_create_fence_mr(pd)) dev_warn(rdev_to_dev(rdev), "Failed to create Fence-MR\n"); - return &pd->ib_pd; + return 0; dbfail: - (void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); + bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, + &pd->qplib_pd); fail: - kfree(pd); - return ERR_PTR(rc); + return rc; } /* Address Handles */ @@ -3566,19 +3553,14 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, u64 *pbl_tbl = pbl_tbl_orig; u64 paddr; u64 page_mask = (1ULL << page_shift) - 1; - int i, pages; - struct scatterlist *sg; - int entry; + struct sg_dma_page_iter sg_iter; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - pages = sg_dma_len(sg) >> PAGE_SHIFT; - for (i = 0; i < pages; i++) { - paddr = sg_dma_address(sg) + (i << PAGE_SHIFT); - if (pbl_tbl == pbl_tbl_orig) - *pbl_tbl++ = paddr & ~page_mask; - else if ((paddr & page_mask) == 0) - *pbl_tbl++ = paddr; - } + for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + paddr = sg_page_iter_dma_address(&sg_iter); + if (pbl_tbl == pbl_tbl_orig) + *pbl_tbl++ = paddr & ~page_mask; + else if ((paddr & page_mask) == 0) + *pbl_tbl++ = paddr; } return pbl_tbl - pbl_tbl_orig; } @@ -3641,7 +3623,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, goto free_umem; } - page_shift = umem->page_shift; + page_shift = PAGE_SHIFT; if (!bnxt_re_page_size_ok(page_shift)) { dev_err(rdev_to_dev(rdev), "umem page size unsupported!"); @@ -3720,7 +3702,7 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, } spin_lock_init(&uctx->sh_lock); - resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_CCTX; + resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX; chip_met_rev_num = rdev->chip_ctx.chip_num; chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) << BNXT_RE_CHIP_ID0_CHIP_REV_SFT; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index c4af72604b4f..c7cca803cfa3 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -56,8 +56,8 @@ struct bnxt_re_fence_data { }; struct bnxt_re_pd { + struct ib_pd ib_pd; struct bnxt_re_dev *rdev; - struct ib_pd ib_pd; struct bnxt_qplib_pd qplib_pd; struct bnxt_re_fence_data fence; }; @@ -163,10 +163,9 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); -struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int bnxt_re_dealloc_pd(struct ib_pd *pd); +int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void bnxt_re_dealloc_pd(struct ib_pd *pd); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0d40a930c192..0a89ef6e5754 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -637,6 +637,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), }; static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index c8502c2844a2..d08b9d9948fd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -85,7 +85,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, struct scatterlist *sghead, u32 pages, u32 pg_size) { - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; bool is_umem = false; int i; @@ -116,12 +116,13 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, } else { i = 0; is_umem = true; - for_each_sg(sghead, sg, pages, i) { - pbl->pg_map_arr[i] = sg_dma_address(sg); - pbl->pg_arr[i] = sg_virt(sg); + for_each_sg_dma_page (sghead, &sg_iter, pages, 0) { + pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); + pbl->pg_arr[i] = NULL; if (!pbl->pg_arr[i]) goto fail; + i++; pbl->pg_count++; } } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 804c1fc7bfc1..80dff6804e48 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -370,7 +370,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static int iwch_deallocate_pd(struct ib_pd *pd) +static void iwch_deallocate_pd(struct ib_pd *pd) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -379,15 +379,13 @@ static int iwch_deallocate_pd(struct ib_pd *pd) rhp = php->rhp; pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); - kfree(php); - return 0; } -static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct iwch_pd *php; + struct iwch_pd *php = to_iwch_pd(pd); + struct ib_device *ibdev = pd->device; u32 pdid; struct iwch_dev *rhp; @@ -395,12 +393,8 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, rhp = (struct iwch_dev *) ibdev; pdid = cxio_hal_get_pdid(rhp->rdev.rscp); if (!pdid) - return ERR_PTR(-EINVAL); - php = kzalloc(sizeof(*php), GFP_KERNEL); - if (!php) { - cxio_hal_put_pdid(rhp->rdev.rscp, pdid); - return ERR_PTR(-ENOMEM); - } + return -EINVAL; + php->pdid = pdid; php->rhp = rhp; if (context) { @@ -408,11 +402,11 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { iwch_deallocate_pd(&php->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return &php->ibpd; + return 0; } static int iwch_dereg_mr(struct ib_mr *ib_mr) @@ -522,14 +516,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { __be64 *pages; - int shift, n, len; - int i, k, entry; + int shift, n, i; int err = 0; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; pr_debug("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); @@ -547,7 +540,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(err); } - shift = mhp->umem->page_shift; + shift = PAGE_SHIFT; n = mhp->umem->nmap; @@ -563,19 +556,15 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { - len = sg_dma_len(sg) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address(sg) + - (k << shift)); - if (i == PAGE_SIZE / sizeof *pages) { - err = iwch_write_pbl(mhp, pages, i, n); - if (err) - goto pbl_done; - n += i; - i = 0; - } - } + for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { + pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); + if (i == PAGE_SIZE / sizeof *pages) { + err = iwch_write_pbl(mhp, pages, i, n); + if (err) + goto pbl_done; + n += i; + i = 0; + } } if (i) @@ -1350,6 +1339,7 @@ static const struct ib_device_ops iwch_dev_ops = { .reg_user_mr = iwch_reg_user_mr, .req_notify_cq = iwch_arm_cq, .resize_cq = iwch_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), }; int iwch_register_device(struct iwch_dev *dev) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 59917eb124da..ae90b2932bd2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -655,7 +655,33 @@ static int send_halfclose(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -static int send_abort(struct c4iw_ep *ep) +void read_tcb(struct c4iw_ep *ep) +{ + struct sk_buff *skb; + struct cpl_get_tcb *req; + int wrlen = roundup(sizeof(*req), 16); + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (WARN_ON(!skb)) + return; + + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + req = (struct cpl_get_tcb *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid)); + req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid)); + + /* + * keep a ref on the ep so the tcb is not unlocked before this + * cpl completes. The ref is released in read_tcb_rpl(). + */ + c4iw_get_ep(&ep->com); + if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb))) + c4iw_put_ep(&ep->com); +} + +static int send_abort_req(struct c4iw_ep *ep) { u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); @@ -670,6 +696,17 @@ static int send_abort(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } +static int send_abort(struct c4iw_ep *ep) +{ + if (!ep->com.qp || !ep->com.qp->srq) { + send_abort_req(ep); + return 0; + } + set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags); + read_tcb(ep); + return 0; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req = NULL; @@ -1851,14 +1888,11 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void complete_cached_srq_buffers(struct c4iw_ep *ep, - __be32 srqidx_status) +static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx) { enum chip_type adapter_type; - u32 srqidx; adapter_type = ep->com.dev->rdev.lldi.adapter_type; - srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); /* * If this TCB had a srq buffer cached, then we must complete @@ -1876,6 +1910,7 @@ static void complete_cached_srq_buffers(struct c4iw_ep *ep, static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { + u32 srqidx; struct c4iw_ep *ep; struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); int release = 0; @@ -1887,7 +1922,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } - complete_cached_srq_buffers(ep, rpl->srqidx_status); + if (ep->com.qp && ep->com.qp->srq) { + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status)); + complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx); + } pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); @@ -1903,8 +1941,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } mutex_unlock(&ep->com.mutex); - if (release) + if (release) { + close_complete_upcall(ep, -ECONNRESET); release_ep_resources(ep); + } c4iw_put_ep(&ep->com); return 0; } @@ -2072,7 +2112,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = dst_mtu(dst); @@ -2161,7 +2201,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, + ep->com.cm_id->tos, raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -2476,7 +2517,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; unsigned short hdrs; - u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + u8 tos; parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); if (!parent_ep) { @@ -2490,6 +2531,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (parent_ep->com.cm_id->tos_set) + tos = parent_ep->com.cm_id->tos; + else + tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, local_ip, peer_ip, &local_port, &peer_port); @@ -2509,7 +2555,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(peer_port), peer_mss); dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + tos, ((struct sockaddr_in6 *) &parent_ep->com.local_addr)->sin6_scope_id); } @@ -2740,6 +2786,21 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep) +{ + complete_cached_srq_buffers(ep, ep->srqe_idx); + if (ep->com.cm_id && ep->com.qp) { + struct c4iw_qp_attributes attrs; + + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + peer_abort_upcall(ep); + release_ep_resources(ep); + c4iw_put_ep(&ep->com); +} + static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss6 *req = cplhdr(skb); @@ -2750,6 +2811,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) int release = 0; unsigned int tid = GET_TID(req); u8 status; + u32 srqidx; u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); @@ -2769,8 +2831,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) goto deref_ep; } - complete_cached_srq_buffers(ep, req->srqidx_status); - pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); @@ -2819,6 +2879,23 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) stop_ep_timer(ep); /*FALLTHROUGH*/ case FPDU_MODE: + if (ep->com.qp && ep->com.qp->srq) { + srqidx = ABORT_RSS_SRQIDX_G( + be32_to_cpu(req->srqidx_status)); + if (srqidx) { + complete_cached_srq_buffers(ep, + req->srqidx_status); + } else { + /* Hold ep ref until finish_peer_abort() */ + c4iw_get_ep(&ep->com); + __state_set(&ep->com, ABORTING); + set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags); + read_tcb(ep); + break; + + } + } + if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_ERROR; ret = c4iw_modify_qp(ep->com.qp->rhp, @@ -3321,7 +3398,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, cm_id->tos, raddr6->sin6_scope_id); } if (!ep->dst) { @@ -3609,7 +3686,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -3720,6 +3796,80 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, return; } +static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word) +{ + u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]); + u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]); + u64 t; + u32 shift = 32; + + t = (thi << shift) | (tlo >> shift); + + return t; +} + +static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift) +{ + u32 v; + u64 t = be64_to_cpu(tcb[(31 - word) / 2]); + + if (word & 0x1) + shift += 32; + v = (t >> shift) & mask; + return v; +} + +static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_get_tcb_rpl *rpl = cplhdr(skb); + __be64 *tcb = (__be64 *)(rpl + 1); + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + u64 t_flags_64; + u32 rx_pdu_out; + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to + * determine if there's a rx PDU feedback event pending. + * + * If that bit is set, it means we'll need to re-read the TCB's + * rq_start value. The final value is the one present in a TCB + * with the TF_RX_PDU_OUT bit cleared. + */ + + t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W); + rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S; + + c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */ + c4iw_put_ep(&ep->com); /* from read_tcb() */ + + /* If TF_RX_PDU_OUT bit is set, re-read the TCB */ + if (rx_pdu_out) { + if (++ep->rx_pdu_out_cnt >= 2) { + WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n"); + goto cleanup; + } + read_tcb(ep); + return 0; + } + + ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W, + TCB_RQ_START_S); +cleanup: + pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); + + if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) + finish_peer_abort(dev, ep); + else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) + send_abort_req(ep); + else + WARN_ONCE(1, "unexpected state!"); + + return 0; +} + static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); @@ -4040,6 +4190,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, + [CPL_GET_TCB_RPL] = read_tcb_rpl, [CPL_FW6_MSG] = deferred_fw6_msg, [CPL_RX_PKT] = rx_pkt, [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, @@ -4271,6 +4422,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, + [CPL_GET_TCB_RPL] = sched, [CPL_FW6_MSG] = fw6_msg, [CPL_RX_PKT] = sched }; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f0fceadd0d12..3a0923f7c60e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -982,6 +982,9 @@ struct c4iw_ep { int rcv_win; u32 snd_wscale; struct c4iw_ep_stats stats; + u32 srqe_idx; + u32 rx_pdu_out_cnt; + struct sk_buff *peer_abort_skb; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 96760a36b9fc..8d1ab8273c2b 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -502,10 +502,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { __be64 *pages; - int shift, n, len; - int i, k, entry; + int shift, n, i; int err = -ENOMEM; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -541,7 +540,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(mhp->umem)) goto err_free_skb; - shift = mhp->umem->page_shift; + shift = PAGE_SHIFT; n = mhp->umem->nmap; err = alloc_pbl(mhp, n); @@ -556,21 +555,16 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { - len = sg_dma_len(sg) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address(sg) + - (k << shift)); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i, - mhp->wr_waitp); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { + pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); + if (i == PAGE_SIZE / sizeof(*pages)) { + err = write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (n << 3), i, + mhp->wr_waitp); + if (err) + goto pbl_done; + n += i; + i = 0; } } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index f59bf7e5a589..680b5e98491d 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static int c4iw_deallocate_pd(struct ib_pd *pd) +static void c4iw_deallocate_pd(struct ib_pd *pd) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -221,15 +221,13 @@ static int c4iw_deallocate_pd(struct ib_pd *pd) mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); - kfree(php); - return 0; } -static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct c4iw_pd *php; + struct c4iw_pd *php = to_c4iw_pd(pd); + struct ib_device *ibdev = pd->device; u32 pdid; struct c4iw_dev *rhp; @@ -237,12 +235,8 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, rhp = (struct c4iw_dev *) ibdev; pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) - return ERR_PTR(-EINVAL); - php = kzalloc(sizeof(*php), GFP_KERNEL); - if (!php) { - c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); - return ERR_PTR(-ENOMEM); - } + return -EINVAL; + php->pdid = pdid; php->rhp = rhp; if (context) { @@ -250,7 +244,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { c4iw_deallocate_pd(&php->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } mutex_lock(&rhp->rdev.stats.lock); @@ -259,7 +253,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; mutex_unlock(&rhp->rdev.stats.lock); pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php); - return &php->ibpd; + return 0; } static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, @@ -570,6 +564,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), }; void c4iw_register_device(struct work_struct *work) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index fff6d48d262f..b170817b2741 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -35,6 +35,7 @@ #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" +#include "t4_tcb.h" #include "t4fw_ri_api.h" #define T4_MAX_NUM_PD 65536 diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 8ca8d74dfb6a..9ee86daf1700 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1114,10 +1114,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, - struct ib_ucontext *context, - struct ib_udata *udata); -int hns_roce_dealloc_pd(struct ib_pd *pd); +int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void hns_roce_dealloc_pd(struct ib_pd *pd); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index fa08c22aad66..c8c90072badd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -711,13 +711,14 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) struct ib_qp_attr attr = { 0 }; struct hns_roce_v1_priv *priv; struct hns_roce_qp *hr_qp; + struct ib_device *ibdev; struct ib_cq *cq; struct ib_pd *pd; union ib_gid dgid; u64 subnet_prefix; int attr_mask = 0; + int ret = -ENOMEM; int i, j; - int ret; u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 }; u8 phy_port; u8 port = 0; @@ -742,12 +743,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_cq->ib_cq.cq_context = NULL; atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); - pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); - if (IS_ERR(pd)) { - dev_err(dev, "Create pd for reserved loop qp failed!"); - ret = -ENOMEM; + ibdev = &hr_dev->ib_dev; + pd = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (pd) + goto alloc_mem_failed; + + pd->device = ibdev; + ret = hns_roce_alloc_pd(pd, NULL, NULL); + if (ret) goto alloc_pd_failed; - } + free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.uobject = NULL; @@ -854,10 +859,12 @@ create_lp_qp_failed: dev_err(dev, "Destroy qp %d for mr free failed!\n", i); } - if (hns_roce_dealloc_pd(pd)) - dev_err(dev, "Destroy pd for create_lp_qp failed!\n"); + hns_roce_dealloc_pd(pd); alloc_pd_failed: + kfree(pd); + +alloc_mem_failed: if (hns_roce_ib_destroy_cq(cq)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); @@ -891,9 +898,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) if (ret) dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); - ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); - if (ret) - dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret); + hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); } static int hns_roce_db_init(struct hns_roce_dev *hr_dev) @@ -1866,9 +1871,8 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx) { struct hns_roce_v1_mpt_entry *mpt_entry; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; u64 *pages; - int entry; int i; /* MPT filled into mailbox buf */ @@ -1923,8 +1927,8 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, return -ENOMEM; i = 0; - for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { - pages[i] = ((u64)sg_dma_address(sg)) >> 12; + for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { + pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12; /* Directly record to MTPT table firstly 7 entry */ if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 19fefff4f699..c648ee825852 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2084,12 +2084,10 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, struct hns_roce_mr *mr) { - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; u64 page_addr; u64 *pages; - int i, j; - int len; - int entry; + int i; mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); @@ -2102,17 +2100,14 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, return -ENOMEM; i = 0; - for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - for (j = 0; j < len; ++j) { - page_addr = sg_dma_address(sg) + - (j << mr->umem->page_shift); - pages[i] = page_addr >> 6; - /* Record the first 2 entry directly to MTPT table */ - if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) - goto found; - i++; - } + for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { + page_addr = sg_page_iter_dma_address(&sg_iter); + pages[i] = page_addr >> 6; + + /* Record the first 2 entry directly to MTPT table */ + if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) + goto found; + i++; } found: mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 67a8c4333f4f..ccf10622586c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -472,6 +472,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd), }; static const struct ib_device_ops hns_roce_dev_mr_ops = { diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index da4fffedb879..b09f1cde2ff5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -976,12 +976,11 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem) { struct device *dev = hr_dev->dev; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; unsigned int order; - int i, k, entry; int npage = 0; int ret = 0; - int len; + int i; u64 page_addr; u64 *pages; u32 bt_page_size; @@ -1014,29 +1013,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, i = n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - for (k = 0; k < len; ++k) { - page_addr = - sg_dma_address(sg) + (k << umem->page_shift); - if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { - if (page_addr & ((1 << mtt->page_shift) - 1)) { - dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n", - page_addr, mtt->page_shift); - ret = -EINVAL; - goto out; - } - pages[i++] = page_addr; - } - npage++; - if (i == bt_page_size / sizeof(u64)) { - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, - pages); - if (ret) - goto out; - n += i; - i = 0; + for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + page_addr = sg_page_iter_dma_address(&sg_iter); + if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { + if (page_addr & ((1 << mtt->page_shift) - 1)) { + dev_err(dev, + "page_addr 0x%llx is not page_shift %d alignment!\n", + page_addr, mtt->page_shift); + ret = -EINVAL; + goto out; } + pages[i++] = page_addr; + } + npage++; + if (i == bt_page_size / sizeof(u64)) { + ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); + if (ret) + goto out; + n += i; + i = 0; } } @@ -1052,10 +1047,8 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_umem *umem) { - struct scatterlist *sg; - int i = 0, j = 0, k; - int entry; - int len; + struct sg_dma_page_iter sg_iter; + int i = 0, j = 0; u64 page_addr; u32 pbl_bt_sz; @@ -1063,27 +1056,22 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, return 0; pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - for (k = 0; k < len; ++k) { - page_addr = sg_dma_address(sg) + - (k << umem->page_shift); + for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + page_addr = sg_page_iter_dma_address(&sg_iter); + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf[i++] = page_addr >> 12; + } else if (hr_dev->caps.pbl_hop_num == 1) { + mr->pbl_buf[i++] = page_addr; + } else { + if (hr_dev->caps.pbl_hop_num == 2) + mr->pbl_bt_l1[i][j] = page_addr; + else if (hr_dev->caps.pbl_hop_num == 3) + mr->pbl_bt_l2[i][j] = page_addr; - if (!hr_dev->caps.pbl_hop_num) { - mr->pbl_buf[i++] = page_addr >> 12; - } else if (hr_dev->caps.pbl_hop_num == 1) { - mr->pbl_buf[i++] = page_addr; - } else { - if (hr_dev->caps.pbl_hop_num == 2) - mr->pbl_bt_l1[i][j] = page_addr; - else if (hr_dev->caps.pbl_hop_num == 3) - mr->pbl_bt_l2[i][j] = page_addr; - - j++; - if (j >= (pbl_bt_sz / 8)) { - i++; - j = 0; - } + j++; + if (j >= (pbl_bt_sz / 8)) { + i++; + j = 0; } } } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 4a29b2cb9bab..b9b97c5e97e6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -57,24 +57,19 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap); } -struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, - struct ib_ucontext *context, - struct ib_udata *udata) +int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ib_dev = ibpd->device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); struct device *dev = hr_dev->dev; - struct hns_roce_pd *pd; + struct hns_roce_pd *pd = to_hr_pd(ibpd); int ret; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - kfree(pd); dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n"); - return ERR_PTR(ret); + return ret; } if (context) { @@ -83,21 +78,17 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n"); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } EXPORT_SYMBOL_GPL(hns_roce_alloc_pd); -int hns_roce_dealloc_pd(struct ib_pd *pd) +void hns_roce_dealloc_pd(struct ib_pd *pd) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); - kfree(to_hr_pd(pd)); - - return 0; } EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 73066bf38e47..51ca22b9f960 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -640,19 +640,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->mtt.mtt_type = MTT_TYPE_WQE; + page_shift = PAGE_SHIFT; if (hr_dev->caps.mtt_buf_pg_sz) { npages = (ib_umem_page_count(hr_qp->umem) + (1 << hr_dev->caps.mtt_buf_pg_sz) - 1) / - (1 << hr_dev->caps.mtt_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + (1 << hr_dev->caps.mtt_buf_pg_sz); + page_shift += hr_dev->caps.mtt_buf_pg_sz; ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &hr_qp->mtt); } else { ret = hns_roce_mtt_init(hr_dev, - ib_umem_page_count(hr_qp->umem), - hr_qp->umem->page_shift, - &hr_qp->mtt); + ib_umem_page_count(hr_qp->umem), + page_shift, &hr_qp->mtt); } if (ret) { dev_err(dev, "hns_roce_mtt_init error for create qp\n"); diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 59e978141ad4..c5a881172524 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -601,7 +601,6 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev) if (!atomic_dec_and_test(&iwpd->usecount)) return; i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id); - kfree(iwpd); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index d4ab46dd9e6c..d5fb2b927587 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -312,16 +312,15 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ /** * i40iw_alloc_pd - allocate protection domain - * @ibdev: device pointer from stack + * @pd: PD pointer * @context: user context created during alloc * @udata: user data */ -static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct i40iw_pd *iwpd; - struct i40iw_device *iwdev = to_iwdev(ibdev); + struct i40iw_pd *iwpd = to_iwpd(pd); + struct i40iw_device *iwdev = to_iwdev(pd->device); struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_alloc_pd_resp uresp; struct i40iw_sc_pd *sc_pd; @@ -330,19 +329,13 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, int err; if (iwdev->closing) - return ERR_PTR(-ENODEV); + return -ENODEV; err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds, iwdev->max_pd, &pd_id, &iwdev->next_pd); if (err) { i40iw_pr_err("alloc resource failed\n"); - return ERR_PTR(err); - } - - iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL); - if (!iwpd) { - err = -ENOMEM; - goto free_res; + return err; } sc_pd = &iwpd->sc_pd; @@ -361,25 +354,23 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, } i40iw_add_pdusecount(iwpd); - return &iwpd->ibpd; + return 0; + error: - kfree(iwpd); -free_res: i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id); - return ERR_PTR(err); + return err; } /** * i40iw_dealloc_pd - deallocate pd * @ibpd: ptr of pd to be deallocated */ -static int i40iw_dealloc_pd(struct ib_pd *ibpd) +static void i40iw_dealloc_pd(struct ib_pd *ibpd) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); i40iw_rem_pdusecount(iwpd, iwdev); - return 0; } /** @@ -1369,32 +1360,29 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, { struct ib_umem *region = iwmr->region; struct i40iw_pbl *iwpbl = &iwmr->iwpbl; - int chunk_pages, entry, i; struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; struct i40iw_pble_info *pinfo; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; u64 pg_addr = 0; u32 idx = 0; + bool first_pg = true; pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf; - for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { - chunk_pages = sg_dma_len(sg) >> region->page_shift; - if ((iwmr->type == IW_MEMREG_TYPE_QP) && - !iwpbl->qp_mr.sq_page) - iwpbl->qp_mr.sq_page = sg_page(sg); - for (i = 0; i < chunk_pages; i++) { - pg_addr = sg_dma_address(sg) + - (i << region->page_shift); + if (iwmr->type == IW_MEMREG_TYPE_QP) + iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); - if ((entry + i) == 0) - *pbl = cpu_to_le64(pg_addr & iwmr->page_msk); - else if (!(pg_addr & ~iwmr->page_msk)) - *pbl = cpu_to_le64(pg_addr); - else - continue; - pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); - } + for_each_sg_dma_page (region->sg_head.sgl, &sg_iter, region->nmap, 0) { + pg_addr = sg_page_iter_dma_address(&sg_iter); + if (first_pg) + *pbl = cpu_to_le64(pg_addr & iwmr->page_msk); + else if (!(pg_addr & ~iwmr->page_msk)) + *pbl = cpu_to_le64(pg_addr); + else + continue; + + first_pg = false; + pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); } } @@ -2750,6 +2738,7 @@ static const struct ib_device_ops i40iw_dev_ops = { .query_qp = i40iw_query_qp, .reg_user_mr = i40iw_reg_user_mr, .req_notify_cq = i40iw_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd), }; /** diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d66002a31000..c0f6aea7ed7c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1186,38 +1186,27 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } } -static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct mlx4_ib_pd *pd; + struct mlx4_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; int err; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; - if (context) - if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { - mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); - kfree(pd); - return ERR_PTR(-EFAULT); - } - return &pd->ibpd; + if (context && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); + return -EFAULT; + } + return 0; } -static int mlx4_ib_dealloc_pd(struct ib_pd *pd) +static void mlx4_ib_dealloc_pd(struct ib_pd *pd) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); - kfree(pd); - - return 0; } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, @@ -2580,6 +2569,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .req_notify_cq = mlx4_ib_arm_cq, .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), }; static const struct ib_device_ops mlx4_ib_dev_wq_ops = { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index cd43e39ced87..8e6d23d6859f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1204,14 +1204,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto obj_destroy; + goto err_copy; obj->obj_id = get_enc_obj_id(opcode, obj_id); return 0; -obj_destroy: +err_copy: if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) devx_cleanup_mkey(obj); +obj_destroy: mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 6d7b8bad4b61..95ac97af6166 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -78,8 +78,10 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, &rep_profile)) + if (!__mlx5_ib_add(ibdev, &rep_profile)) { + ib_dealloc_device(&ibdev->ib_dev); return -EINVAL; + } rep->rep_if[REP_IB].priv = ibdev; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 76d6c2557d0c..f9cddc6f2ab6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2280,30 +2280,24 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) return 0; } -static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct mlx5_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; struct mlx5_ib_alloc_pd_resp resp; - struct mlx5_ib_pd *pd; int err; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), out, sizeof(out)); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; pd->pdn = MLX5_GET(alloc_pd_out, out, pd); pd->uid = uid; @@ -2311,23 +2305,19 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); - kfree(mpd); - - return 0; } enum { @@ -4680,23 +4670,28 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); + ibdev = &dev->ib_dev; mutex_init(&devr->mutex); - devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); - if (IS_ERR(devr->p0)) { - ret = PTR_ERR(devr->p0); - goto error0; - } - devr->p0->device = &dev->ib_dev; + devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (!devr->p0) + return -ENOMEM; + + devr->p0->device = ibdev; devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); + ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL); + if (ret) + goto error0; + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); @@ -4794,6 +4789,7 @@ error2: error1: mlx5_ib_dealloc_pd(devr->p0); error0: + kfree(devr->p0); return ret; } @@ -4809,6 +4805,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); + kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ for (port = 0; port < dev->num_ports; ++port) @@ -5938,6 +5935,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), }; static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 1bb67562c8c8..516c8cf9c0fd 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -374,40 +374,30 @@ static int mthca_mmap_uar(struct ib_ucontext *context, return 0; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct mthca_pd *pd; + struct ib_device *ibdev = ibpd->device; + struct mthca_pd *pd = to_mpd(ibpd); int err; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; if (context) { if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { mthca_pd_free(to_mdev(ibdev), pd); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mthca_dealloc_pd(struct ib_pd *pd) +static void mthca_dealloc_pd(struct ib_pd *pd) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); - kfree(pd); - - return 0; } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, @@ -907,12 +897,11 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; - int shift, n, len; - int i, k, entry; + int n, i; int err = 0; int write_mtt_size; @@ -939,7 +928,6 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - shift = mr->umem->page_shift; n = mr->umem->nmap; mr->mtt = mthca_alloc_mtt(dev, n); @@ -958,21 +946,19 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { - len = sg_dma_len(sg) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(sg) + (k << shift); - /* - * Be friendly to write_mtt and pass it chunks - * of appropriate size. - */ - if (i == write_mtt_size) { - err = mthca_write_mtt(dev, mr->mtt, n, pages, i); - if (err) - goto mtt_done; - n += i; - i = 0; - } + for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { + pages[i++] = sg_page_iter_dma_address(&sg_iter); + + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; } } @@ -983,7 +969,7 @@ mtt_done: if (err) goto err_mtt; - err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length, + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, PAGE_SHIFT, virt, length, convert_access(acc), mr); if (err) @@ -1228,6 +1214,7 @@ static const struct ib_device_ops mthca_dev_ops = { .query_qp = mthca_query_qp, .reg_user_mr = mthca_reg_user_mr, .resize_cq = mthca_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd), }; static const struct ib_device_ops mthca_dev_arbel_srq_ops = { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 6eb991d40035..f18b28ae4bd9 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -658,10 +658,11 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /** * nes_alloc_pd */ -static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) +static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct nes_pd *nespd; + struct ib_device *ibdev = pd->device; + struct nes_pd *nespd = to_nespd(pd); struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; @@ -676,15 +677,8 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); - if (err) { - return ERR_PTR(err); - } - - nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL); - if (!nespd) { - nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - return ERR_PTR(-ENOMEM); - } + if (err) + return err; nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", nespd, dev_name(&nesvnic->nesibdev->ibdev.dev)); @@ -700,16 +694,14 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - kfree(nespd); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } uresp.pd_id = nespd->pd_id; uresp.mmap_db_index = nespd->mmap_db_index; if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) { nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - kfree(nespd); - return ERR_PTR(-EFAULT); + return -EFAULT; } set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); @@ -718,14 +710,14 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, } nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd); - return &nespd->ibpd; + return 0; } /** * nes_dealloc_pd */ -static int nes_dealloc_pd(struct ib_pd *ibpd) +static void nes_dealloc_pd(struct ib_pd *ibpd) { struct nes_ucontext *nesucontext; struct nes_pd *nespd = to_nespd(ibpd); @@ -748,9 +740,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) nespd->pd_id, nespd); nes_free_resource(nesadapter, nesadapter->allocated_pds, (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12)); - kfree(nespd); - - return 0; } @@ -3658,6 +3647,7 @@ static const struct ib_device_ops nes_dev_ops = { .query_qp = nes_query_qp, .reg_user_mr = nes_reg_user_mr, .req_notify_cq = nes_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd), }; /** diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 88970a6bb555..0de83c92691f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -179,6 +179,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .reg_user_mr = ocrdma_reg_user_mr, .req_notify_cq = ocrdma_arm_cq, .resize_cq = ocrdma_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd), }; static const struct ib_device_ops ocrdma_dev_srq_ops = { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2a62936bef4d..ed5da67b693d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -367,17 +367,12 @@ static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd) return status; } -static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, - struct ocrdma_ucontext *uctx, - struct ib_udata *udata) +static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) { - struct ocrdma_pd *pd = NULL; int status; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - if (udata && uctx && dev->attr.max_dpp_pds) { pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; @@ -386,15 +381,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, dev->attr.wqe_size) : 0; } - if (dev->pd_mgr->pd_prealloc_valid) { - status = ocrdma_get_pd_num(dev, pd); - if (status == 0) { - return pd; - } else { - kfree(pd); - return ERR_PTR(status); - } - } + if (dev->pd_mgr->pd_prealloc_valid) + return ocrdma_get_pd_num(dev, pd); retry: status = ocrdma_mbx_alloc_pd(dev, pd); @@ -403,13 +391,11 @@ retry: pd->dpp_enabled = false; pd->num_dpp_qp = 0; goto retry; - } else { - kfree(pd); - return ERR_PTR(status); } + return status; } - return pd; + return 0; } static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, @@ -418,30 +404,33 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, return (uctx->cntxt_pd == pd); } -static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, +static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { - int status; - if (dev->pd_mgr->pd_prealloc_valid) - status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); + ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); else - status = ocrdma_mbx_dealloc_pd(dev, pd); - - kfree(pd); - return status; + ocrdma_mbx_dealloc_pd(dev, pd); } static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev, struct ocrdma_ucontext *uctx, struct ib_udata *udata) { - int status = 0; + struct ib_device *ibdev = &dev->ibdev; + struct ib_pd *pd; + int status; - uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata); - if (IS_ERR(uctx->cntxt_pd)) { - status = PTR_ERR(uctx->cntxt_pd); - uctx->cntxt_pd = NULL; + pd = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (!pd) + return -ENOMEM; + + pd->device = ibdev; + uctx->cntxt_pd = get_ocrdma_pd(pd); + + status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata); + if (status) { + kfree(uctx->cntxt_pd); goto err; } @@ -460,6 +449,7 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } + kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; (void)_ocrdma_dealloc_pd(dev, pd); return 0; @@ -537,6 +527,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, return &ctx->ibucontext; cpy_err: + ocrdma_dealloc_ucontext_pd(ctx); pd_err: ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); map_err: @@ -658,10 +649,10 @@ dpp_map_err: return status; } -struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_pd *pd; struct ocrdma_ucontext *uctx = NULL; @@ -677,11 +668,10 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, } } - pd = _ocrdma_alloc_pd(dev, uctx, udata); - if (IS_ERR(pd)) { - status = PTR_ERR(pd); + pd = get_ocrdma_pd(ibpd); + status = _ocrdma_alloc_pd(dev, pd, uctx, udata); + if (status) goto exit; - } pd_mapping: if (udata && context) { @@ -689,25 +679,22 @@ pd_mapping: if (status) goto err; } - return &pd->ibpd; + return 0; err: - if (is_uctx_pd) { + if (is_uctx_pd) ocrdma_release_ucontext_pd(uctx); - } else { - if (_ocrdma_dealloc_pd(dev, pd)) - pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__); - } + else + _ocrdma_dealloc_pd(dev, pd); exit: - return ERR_PTR(status); + return status; } -int ocrdma_dealloc_pd(struct ib_pd *ibpd) +void ocrdma_dealloc_pd(struct ib_pd *ibpd) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_ucontext *uctx = NULL; - int status = 0; u64 usr_db; uctx = pd->uctx; @@ -721,11 +708,10 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) if (is_ucontext_pd(uctx, pd)) { ocrdma_release_ucontext_pd(uctx); - return status; + return; } } - status = _ocrdma_dealloc_pd(dev, pd); - return status; + _ocrdma_dealloc_pd(dev, pd); } static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, @@ -854,10 +840,11 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, u32 num_pbes) { struct ocrdma_pbe *pbe; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; struct ib_umem *umem = mr->umem; - int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0; + int pbe_cnt, total_num_pbes = 0; + u64 pg_addr; if (!mr->hwmr.num_pbes) return; @@ -865,36 +852,26 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, pbe = (struct ocrdma_pbe *)pbl_tbl->va; pbe_cnt = 0; - shift = umem->page_shift; + for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + /* store the page address in pbe */ + pg_addr = sg_page_iter_dma_address(&sg_iter); + pbe->pa_lo = cpu_to_le32(pg_addr); + pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr)); + pbe_cnt += 1; + total_num_pbes += 1; + pbe++; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - pages = sg_dma_len(sg) >> shift; - for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { - /* store the page address in pbe */ - pbe->pa_lo = - cpu_to_le32(sg_dma_address(sg) + - (pg_cnt << shift)); - pbe->pa_hi = - cpu_to_le32(upper_32_bits(sg_dma_address(sg) + - (pg_cnt << shift))); - pbe_cnt += 1; - total_num_pbes += 1; - pbe++; - - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - - /* if the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == - (mr->hwmr.pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - pbe_cnt = 0; - } + /* if done building pbes, issue the mbx cmd. */ + if (total_num_pbes == num_pbes) + return; + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == (mr->hwmr.pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; } } } @@ -926,7 +903,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, if (status) goto umem_err; - mr->hwmr.pbe_size = BIT(mr->umem->page_shift); + mr->hwmr.pbe_size = PAGE_SIZE; mr->hwmr.fbo = ib_umem_offset(mr->umem); mr->hwmr.va = usr_addr; mr->hwmr.len = len; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b69cfdce7970..1fd66721c930 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -70,9 +70,9 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -struct ib_pd *ocrdma_alloc_pd(struct ib_device *, - struct ib_ucontext *, struct ib_udata *); -int ocrdma_dealloc_pd(struct ib_pd *pd); +int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, + struct ib_udata *udata); +void ocrdma_dealloc_pd(struct ib_pd *pd); struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 878e9e23652b..44ce4989dcef 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -239,6 +239,7 @@ static const struct ib_device_ops qedr_dev_ops = { .reg_user_mr = qedr_reg_user_mr, .req_notify_cq = qedr_arm_cq, .resize_cq = qedr_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), }; static int qedr_register_device(struct qedr_dev *dev) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 989f08633fbe..a613ebde322f 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -450,11 +450,12 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) vma->vm_page_prot); } -struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) +int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct qedr_dev *dev = get_qedr_dev(ibdev); - struct qedr_pd *pd; + struct qedr_pd *pd = get_qedr_pd(ibpd); u16 pd_id; int rc; @@ -463,16 +464,12 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, if (!dev->rdma_ctx) { DP_ERR(dev, "invalid RDMA context\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); if (rc) - goto err; + return rc; pd->pd_id = pd_id; @@ -485,36 +482,23 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, if (rc) { DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); - goto err; + return rc; } pd->uctx = get_qedr_ucontext(context); pd->uctx->pd = pd; } - return &pd->ibpd; - -err: - kfree(pd); - return ERR_PTR(rc); + return 0; } -int qedr_dealloc_pd(struct ib_pd *ibpd) +void qedr_dealloc_pd(struct ib_pd *ibpd) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); - if (!pd) { - pr_err("Invalid PD received in dealloc_pd\n"); - return -EINVAL; - } - DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); - - kfree(pd); - - return 0; } static void qedr_free_pbl(struct qedr_dev *dev, @@ -636,13 +620,12 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, struct qedr_pbl *pbl, struct qedr_pbl_info *pbl_info, u32 pg_shift) { - int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + int pbe_cnt, total_num_pbes = 0; u32 fw_pg_cnt, fw_pg_per_umem_pg; struct qedr_pbl *pbl_tbl; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; struct regpair *pbe; u64 pg_addr; - int entry; if (!pbl_info->num_pbes) return; @@ -663,38 +646,32 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, pbe_cnt = 0; - shift = umem->page_shift; + fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift); - fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift); + for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + pg_addr = sg_page_iter_dma_address(&sg_iter); + for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { + pbe->lo = cpu_to_le32(pg_addr); + pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - pages = sg_dma_len(sg) >> shift; - pg_addr = sg_dma_address(sg); - for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { - for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { - pbe->lo = cpu_to_le32(pg_addr); - pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); + pg_addr += BIT(pg_shift); + pbe_cnt++; + total_num_pbes++; + pbe++; - pg_addr += BIT(pg_shift); - pbe_cnt++; - total_num_pbes++; - pbe++; + if (total_num_pbes == pbl_info->num_pbes) + return; - if (total_num_pbes == pbl_info->num_pbes) - return; - - /* If the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == - (pbl_info->pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct regpair *)pbl_tbl->va; - pbe_cnt = 0; - } - - fw_pg_cnt++; + /* If the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; } + + fw_pg_cnt++; } } } @@ -755,7 +732,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, } fw_pages = ib_umem_page_count(q->umem) << - (q->umem->page_shift - FW_PAGE_SHIFT); + (PAGE_SHIFT - FW_PAGE_SHIFT); rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); if (rc) @@ -1471,7 +1448,7 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, page_cnt = srq->usrq.pbl_info.num_pbes; pbl_base_addr = srq->usrq.pbl_tbl->pa; phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; - page_size = BIT(srq->usrq.umem->page_shift); + page_size = PAGE_SIZE; } else { struct qed_chain *pbl; @@ -2723,7 +2700,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, goto err1; qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table, - &mr->info.pbl_info, mr->umem->page_shift); + &mr->info.pbl_info, PAGE_SHIFT); rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); if (rc) { @@ -2744,7 +2721,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); - mr->hw_mr.page_size_log = mr->umem->page_shift; + mr->hw_mr.page_size_log = PAGE_SHIFT; mr->hw_mr.fbo = ib_umem_offset(mr->umem); mr->hw_mr.length = len; mr->hw_mr.vaddr = usr_addr; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 1852b7012bf4..97a6ff3f9afb 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,9 +47,9 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *); int qedr_dealloc_ucontext(struct ib_ucontext *); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -struct ib_pd *qedr_alloc_pd(struct ib_device *, - struct ib_ucontext *, struct ib_udata *); -int qedr_dealloc_pd(struct ib_pd *pd); +int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, + struct ib_udata *udata); +void qedr_dealloc_pd(struct ib_pd *pd); struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 577d9301251a..256ad2f236c8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -352,6 +352,7 @@ static const struct ib_device_ops usnic_dev_ops = { .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, + INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd), }; /* Start of PF discovery section */ @@ -470,15 +471,17 @@ static void usnic_ib_undiscover_pf(struct kref *kref) &usnic_ib_ibdev_list, ib_dev_link) { if (us_ibdev->pdev == dev) { list_del(&us_ibdev->ib_dev_link); - usnic_ib_device_remove(us_ibdev); found = true; break; } } - WARN(!found, "Failed to remove PF %s\n", pci_name(dev)); mutex_unlock(&usnic_ib_ibdev_list_lock); + if (found) + usnic_ib_device_remove(us_ibdev); + else + WARN(1, "Failed to remove PF %s\n", pci_name(dev)); } static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9dea18106247..0ced89b51448 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -456,37 +456,23 @@ int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, return 0; } -struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct usnic_ib_pd *pd; + struct usnic_ib_pd *pd = to_upd(ibpd); void *umem_pd; - usnic_dbg("\n"); - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - umem_pd = pd->umem_pd = usnic_uiom_alloc_pd(); if (IS_ERR_OR_NULL(umem_pd)) { - kfree(pd); - return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM); + return umem_pd ? PTR_ERR(umem_pd) : -ENOMEM; } - usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", - pd, context, dev_name(&ibdev->dev)); - return &pd->ibpd; + return 0; } -int usnic_ib_dealloc_pd(struct ib_pd *pd) +void usnic_ib_dealloc_pd(struct ib_pd *pd) { - usnic_info("freeing domain 0x%p\n", pd); - usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); - kfree(pd); - return 0; } struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 99a6d81c2bcd..44a9d2f82bf5 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -51,10 +51,9 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num); int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); -struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int usnic_ib_dealloc_pd(struct ib_pd *pd); +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata); +void usnic_ib_dealloc_pd(struct ib_pd *pd); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e582beaf9430..47e653d2495c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -195,6 +195,7 @@ static const struct ib_device_ops pvrdma_dev_ops = { .query_qp = pvrdma_query_qp, .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), }; static const struct ib_device_ops pvrdma_dev_srq_ops = { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c index fb0c5c0976b3..7944c58ded0e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -183,25 +183,20 @@ int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, struct ib_umem *umem, u64 offset) { u64 i = offset; - int j, entry; - int ret = 0, len = 0; - struct scatterlist *sg; + int ret = 0; + struct sg_dma_page_iter sg_iter; if (offset >= pdir->npages) return -EINVAL; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - for (j = 0; j < len; j++) { - dma_addr_t addr = sg_dma_address(sg) + - (j << umem->page_shift); + for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + dma_addr_t addr = sg_page_iter_dma_address(&sg_iter); - ret = pvrdma_page_dir_insert_dma(pdir, i, addr); - if (ret) - goto exit; + ret = pvrdma_page_dir_insert_dma(pdir, i, addr); + if (ret) + goto exit; - i++; - } + i++; } exit: diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index fafb2add3b44..f44220f72e05 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -438,37 +438,29 @@ int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) /** * pvrdma_alloc_pd - allocate protection domain - * @ibdev: the IB device + * @ibpd: PD pointer * @context: user context * @udata: user data * * @return: the ib_pd protection domain pointer on success, otherwise errno. */ -struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct pvrdma_pd *pd; + struct ib_device *ibdev = ibpd->device; + struct pvrdma_pd *pd = to_vpd(ibpd); struct pvrdma_dev *dev = to_vdev(ibdev); - union pvrdma_cmd_req req; - union pvrdma_cmd_resp rsp; + union pvrdma_cmd_req req = {}; + union pvrdma_cmd_resp rsp = {}; struct pvrdma_cmd_create_pd *cmd = &req.create_pd; struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; struct pvrdma_alloc_pd_resp pd_resp = {0}; int ret; - void *ptr; /* Check allowed max pds */ if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ptr = ERR_PTR(-ENOMEM); - goto err; - } - - memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); @@ -476,8 +468,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, dev_warn(&dev->pdev->dev, "failed to allocate protection domain, error: %d\n", ret); - ptr = ERR_PTR(ret); - goto freepd; + goto err; } pd->privileged = !context; @@ -490,18 +481,16 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); pvrdma_dealloc_pd(&pd->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } /* u32 pd handle */ - return &pd->ibpd; + return 0; -freepd: - kfree(pd); err: atomic_dec(&dev->num_pds); - return ptr; + return ret; } /** @@ -510,14 +499,13 @@ err: * * @return: 0 on success, otherwise errno. */ -int pvrdma_dealloc_pd(struct ib_pd *pd) +void pvrdma_dealloc_pd(struct ib_pd *pd) { struct pvrdma_dev *dev = to_vdev(pd->device); - union pvrdma_cmd_req req; + union pvrdma_cmd_req req = {}; struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd; int ret; - memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD; cmd->pd_handle = to_vpd(pd)->pd_handle; @@ -527,10 +515,7 @@ int pvrdma_dealloc_pd(struct ib_pd *pd) "could not dealloc protection domain, error: %d\n", ret); - kfree(to_vpd(pd)); atomic_dec(&dev->num_pds); - - return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index f7f758d60110..ed91baad1ffa 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -399,10 +399,9 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata); int pvrdma_dealloc_ucontext(struct ib_ucontext *context); -struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int pvrdma_dealloc_pd(struct ib_pd *ibpd); +int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void pvrdma_dealloc_pd(struct ib_pd *ibpd); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index dcc1870b8d23..6033054b22fa 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -50,7 +50,7 @@ /** * rvt_alloc_pd - allocate a protection domain - * @ibdev: ib device + * @ibpd: PD * @context: optional user context * @udata: optional user data * @@ -58,19 +58,14 @@ * * Return: 0 on success */ -struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct rvt_dev_info *dev = ib_to_rvt(ibdev); - struct rvt_pd *pd; - struct ib_pd *ret; + struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); + int ret = 0; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } /* * While we could continue allocating protecetion domains, being * constrained only by system resources. The IBTA spec defines that @@ -81,8 +76,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, spin_lock(&dev->n_pds_lock); if (dev->n_pds_allocated == dev->dparms.props.max_pd) { spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail; } @@ -92,8 +86,6 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, /* ib_alloc_pd() will initialize pd->ibpd. */ pd->user = !!udata; - ret = &pd->ibpd; - bail: return ret; } @@ -104,16 +96,11 @@ bail: * * Return: always 0 */ -int rvt_dealloc_pd(struct ib_pd *ibpd) +void rvt_dealloc_pd(struct ib_pd *ibpd) { - struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; } diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 1892ca4a9746..7a887e4a45e7 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -50,9 +50,8 @@ #include -struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int rvt_dealloc_pd(struct ib_pd *ibpd); +int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void rvt_dealloc_pd(struct ib_pd *ibpd); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index b3f0c5578925..a19832c73d5a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -436,6 +436,7 @@ static const struct ib_device_ops rvt_dev_ops = { .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, .unmap_fmr = rvt_unmap_fmr, + INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), }; static noinline int check_support(struct rvt_dev_info *rdi, int verb) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 2438093776a0..42f0f25e396c 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -162,11 +162,10 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct ib_udata *udata, struct rxe_mem *mem) { - int entry; struct rxe_map **map; struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; - struct scatterlist *sg; + struct sg_page_iter sg_iter; int num_buf; void *vaddr; int err; @@ -191,16 +190,16 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, goto err1; } - mem->page_shift = umem->page_shift; - mem->page_mask = BIT(umem->page_shift) - 1; + mem->page_shift = PAGE_SHIFT; + mem->page_mask = PAGE_SIZE - 1; num_buf = 0; map = mem->map; if (length > 0) { buf = map[0]->buf; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - vaddr = page_address(sg_page(sg)); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); err = -ENOMEM; @@ -208,7 +207,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, } buf->addr = (uintptr_t)vaddr; - buf->size = BIT(umem->page_shift); + buf->size = PAGE_SIZE; num_buf++; buf++; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b5c91df22047..cd3f14629ba8 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -46,6 +46,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_PD] = { .name = "rxe-pd", .size = sizeof(struct rxe_pd), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_AH] = { .name = "rxe-ah", @@ -119,8 +120,10 @@ static void rxe_cache_clean(size_t cnt) for (i = 0; i < cnt; i++) { type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; + if (!(type->flags & RXE_POOL_NO_ALLOC)) { + kmem_cache_destroy(type->cache); + type->cache = NULL; + } } } @@ -134,14 +137,17 @@ int rxe_cache_init(void) for (i = 0; i < RXE_NUM_TYPES; i++) { type = &rxe_type_info[i]; size = ALIGN(type->size, RXE_POOL_ALIGN); - type->cache = kmem_cache_create(type->name, size, - RXE_POOL_ALIGN, - RXE_POOL_CACHE_FLAGS, NULL); - if (!type->cache) { - pr_err("Unable to init kmem cache for %s\n", - type->name); - err = -ENOMEM; - goto err1; + if (!(type->flags & RXE_POOL_NO_ALLOC)) { + type->cache = + kmem_cache_create(type->name, size, + RXE_POOL_ALIGN, + RXE_POOL_CACHE_FLAGS, NULL); + if (!type->cache) { + pr_err("Unable to init kmem cache for %s\n", + type->name); + err = -ENOMEM; + goto err1; + } } } @@ -415,6 +421,37 @@ out_put_pool: return NULL; } +int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +{ + unsigned long flags; + + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != RXE_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); + return -EINVAL; + } + kref_get(&pool->ref_cnt); + read_unlock_irqrestore(&pool->pool_lock, flags); + + kref_get(&pool->rxe->ref_cnt); + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_put_pool; + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return 0; + +out_put_pool: + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return -EINVAL; +} + void rxe_elem_release(struct kref *kref) { struct rxe_pool_entry *elem = @@ -424,7 +461,8 @@ void rxe_elem_release(struct kref *kref) if (pool->cleanup) pool->cleanup(elem); - kmem_cache_free(pool_cache(pool), elem); + if (!(pool->flags & RXE_POOL_NO_ALLOC)) + kmem_cache_free(pool_cache(pool), elem); atomic_dec(&pool->num_elem); rxe_dev_put(pool->rxe); rxe_pool_put(pool); diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 72968c29e01f..2f2cff1cbe43 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -41,6 +41,7 @@ enum rxe_pool_flags { RXE_POOL_ATOMIC = BIT(0), RXE_POOL_INDEX = BIT(1), RXE_POOL_KEY = BIT(2), + RXE_POOL_NO_ALLOC = BIT(4), }; enum rxe_elem_type { @@ -131,6 +132,9 @@ void rxe_pool_cleanup(struct rxe_pool *pool); /* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); +/* connect already allocated object to pool */ +int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); + /* assign an index to an indexed object and insert object into * pool's rb tree */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index cc5a05124ece..051c3930e808 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -191,23 +191,20 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num, return 0; } -static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(dev); - struct rxe_pd *pd; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); - pd = rxe_alloc(&rxe->pd_pool); - return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); + return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } -static int rxe_dealloc_pd(struct ib_pd *ibpd) +static void rxe_dealloc_pd(struct ib_pd *ibpd) { struct rxe_pd *pd = to_rpd(ibpd); rxe_drop_ref(pd); - return 0; } static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, @@ -1183,6 +1180,7 @@ static const struct ib_device_ops rxe_dev_ops = { .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), }; int rxe_register_device(struct rxe_dev *rxe) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 74e04801d34d..70839d3f55d9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -66,8 +66,8 @@ struct rxe_ucontext { }; struct rxe_pd { + struct ib_pd ibpd; struct rxe_pool_entry pelem; - struct ib_pd ibpd; }; struct rxe_ah { diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c index cdb79ae2d8dc..9fbfbda74171 100644 --- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c +++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c @@ -846,7 +846,7 @@ static int cio2_vb2_buf_init(struct vb2_buffer *vb) unsigned int pages = DIV_ROUND_UP(vb->planes[0].length, CIO2_PAGE_SIZE); unsigned int lops = DIV_ROUND_UP(pages + 1, entries_per_page); struct sg_table *sg; - struct sg_page_iter sg_iter; + struct sg_dma_page_iter sg_iter; int i, j; if (lops <= 0 || lops > CIO2_MAX_LOPS) { @@ -873,7 +873,7 @@ static int cio2_vb2_buf_init(struct vb2_buffer *vb) b->offset = sg->sgl->offset; i = j = 0; - for_each_sg_page(sg->sgl, &sg_iter, sg->nents, 0) { + for_each_sg_dma_page (sg->sgl, &sg_iter, sg->nents, 0) { if (!pages--) break; b->lop[i][j] = sg_page_iter_dma_address(&sg_iter) >> PAGE_SHIFT; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index c62a0c830705..38dd41eb959e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -56,6 +56,7 @@ enum { CPL_TX_DATA_ISO = 0x1F, CPL_CLOSE_LISTSRV_RPL = 0x20, + CPL_GET_TCB_RPL = 0x22, CPL_L2T_WRITE_RPL = 0x23, CPL_PASS_OPEN_RPL = 0x24, CPL_ACT_OPEN_RPL = 0x25, @@ -688,6 +689,13 @@ struct cpl_get_tcb { #define NO_REPLY_V(x) ((x) << NO_REPLY_S) #define NO_REPLY_F NO_REPLY_V(1U) +struct cpl_get_tcb_rpl { + union opcode_tid ot; + __u8 cookie; + __u8 status; + __be16 len; +}; + struct cpl_set_tcb_field { WR_HDR; union opcode_tid ot; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 3297ce025e8b..1b9afb192f7f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -41,6 +41,14 @@ #define TCB_SMAC_SEL_V(x) ((x) << TCB_SMAC_SEL_S) #define TCB_T_FLAGS_W 1 +#define TCB_T_FLAGS_S 0 +#define TCB_T_FLAGS_M 0xffffffffffffffffULL +#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S) + +#define TCB_RQ_START_W 30 +#define TCB_RQ_START_S 0 +#define TCB_RQ_START_M 0x3ffffffULL +#define TCB_RQ_START_V(x) ((x) << TCB_RQ_START_S) #define TF_CCTRL_ECE_S 60 #define TF_CCTRL_CWR_S 61 @@ -66,4 +74,8 @@ #define TCB_RX_FRAG3_LEN_RAW_W 29 #define TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W 30 #define TCB_PDU_HDR_LEN_W 31 + +#define TF_RX_PDU_OUT_S 49 +#define TF_RX_PDU_OUT_V(x) ((__u64)(x) << TF_RX_PDU_OUT_S) + #endif /* __T4_TCB_H */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b96f0d0b5b8f..b4be960c7e5d 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -339,12 +339,12 @@ int sg_alloc_table_chained(struct sg_table *table, int nents, /* * sg page iterator * - * Iterates over sg entries page-by-page. On each successful iteration, - * you can call sg_page_iter_page(@piter) and sg_page_iter_dma_address(@piter) - * to get the current page and its dma address. @piter->sg will point to the - * sg holding this page and @piter->sg_pgoffset to the page's page offset - * within the sg. The iteration will stop either when a maximum number of sg - * entries was reached or a terminating sg (sg_last(sg) == true) was reached. + * Iterates over sg entries page-by-page. On each successful iteration, you + * can call sg_page_iter_page(@piter) to get the current page and its dma + * address. @piter->sg will point to the sg holding this page and + * @piter->sg_pgoffset to the page's page offset within the sg. The iteration + * will stop either when a maximum number of sg entries was reached or a + * terminating sg (sg_last(sg) == true) was reached. */ struct sg_page_iter { struct scatterlist *sg; /* sg holding the page */ @@ -356,7 +356,19 @@ struct sg_page_iter { * next step */ }; +/* + * sg page iterator for DMA addresses + * + * This is the same as sg_page_iter however you can call + * sg_page_iter_dma_address(@dma_iter) to get the page's DMA + * address. sg_page_iter_page() cannot be called on this iterator. + */ +struct sg_dma_page_iter { + struct sg_page_iter base; +}; + bool __sg_page_iter_next(struct sg_page_iter *piter); +bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter); void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset); @@ -372,11 +384,13 @@ static inline struct page *sg_page_iter_page(struct sg_page_iter *piter) /** * sg_page_iter_dma_address - get the dma address of the current page held by * the page iterator. - * @piter: page iterator holding the page + * @dma_iter: page iterator holding the page */ -static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter) +static inline dma_addr_t +sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) { - return sg_dma_address(piter->sg) + (piter->sg_pgoffset << PAGE_SHIFT); + return sg_dma_address(dma_iter->base.sg) + + (dma_iter->base.sg_pgoffset << PAGE_SHIFT); } /** @@ -385,11 +399,28 @@ static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter) * @piter: page iterator to hold current page, sg, sg_pgoffset * @nents: maximum number of sg entries to iterate over * @pgoffset: starting page offset + * + * Callers may use sg_page_iter_page() to get each page pointer. */ #define for_each_sg_page(sglist, piter, nents, pgoffset) \ for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \ __sg_page_iter_next(piter);) +/** + * for_each_sg_dma_page - iterate over the pages of the given sg list + * @sglist: sglist to iterate over + * @dma_iter: page iterator to hold current page + * @dma_nents: maximum number of sg entries to iterate over, this is the value + * returned from dma_map_sg + * @pgoffset: starting page offset + * + * Callers may use sg_page_iter_dma_address() to get each page's DMA address. + */ +#define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset) \ + for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents, \ + pgoffset); \ + __sg_page_iter_dma_next(dma_iter);) + /* * Mapping sg iterator * diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index fdef558e3a2d..79ba8219e7dc 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -616,12 +616,11 @@ struct ib_mad_agent { void *context; u32 hi_tid; u32 flags; + void *security; + struct list_head mad_agent_sec_list; u8 port_num; u8 rmpp_version; - void *security; bool smp_allowed; - bool lsm_nb_reg; - struct notifier_block lsm_nb; }; /** diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2e1f1e885ee5..135fab2c016c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2264,6 +2264,19 @@ struct ib_counters_read_attr { struct uverbs_attr_bundle; +#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ + .size_##ib_struct = \ + (sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \ + BUILD_BUG_ON_ZERO( \ + !__same_type(((struct drv_struct *)NULL)->member, \ + struct ib_struct))) + +#define rdma_zalloc_drv_obj(ib_dev, ib_type) \ + ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL)) + +#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct + /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will @@ -2372,10 +2385,9 @@ struct ib_device_ops { int (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); - struct ib_pd *(*alloc_pd)(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - int (*dealloc_pd)(struct ib_pd *pd); + int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); + void (*dealloc_pd)(struct ib_pd *pd); struct ib_ah *(*create_ah)(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); @@ -2517,6 +2529,8 @@ struct ib_device_ops { */ int (*fill_res_entry)(struct sk_buff *msg, struct rdma_restrack_entry *entry); + + DECLARE_RDMA_OBJ_SIZE(ib_pd); }; struct ib_device { @@ -2528,12 +2542,8 @@ struct ib_device { struct list_head event_handler_list; spinlock_t event_handler_lock; - rwlock_t client_data_lock; - struct list_head core_list; - /* Access to the client_data_list is protected by the client_data_lock - * rwlock and the lists_rwsem read-write semaphore - */ - struct list_head client_data_list; + struct rw_semaphore client_data_rwsem; + struct xarray client_data; struct ib_cache cache; /** @@ -2558,12 +2568,6 @@ struct ib_device { struct kobject *ports_kobj; struct list_head port_list; - enum { - IB_DEV_UNINITIALIZED, - IB_DEV_REGISTERED, - IB_DEV_UNREGISTERED - } reg_state; - int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; @@ -2602,7 +2606,7 @@ struct ib_device { }; struct ib_client { - char *name; + const char *name; void (*add) (struct ib_device *); void (*remove)(struct ib_device *, void *client_data); @@ -2629,6 +2633,7 @@ struct ib_client { const struct sockaddr *addr, void *client_data); struct list_head list; + u32 client_id; /* kverbs are not required by the client */ u8 no_kverbs_req:1; @@ -2651,7 +2656,21 @@ void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); -void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +/** + * ib_get_client_data - Get IB client context + * @device:Device to get context for + * @client:Client to get context for + * + * ib_get_client_data() returns the client context data set with + * ib_set_client_data(). This can only be called while the client is + * registered to the device, once the ib_client remove() callback returns this + * cannot be called. + */ +static inline void *ib_get_client_data(struct ib_device *device, + struct ib_client *client) +{ + return xa_load(&device->client_data, client->client_id); +} void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); void ib_set_device_ops(struct ib_device *device, diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 48512abd3162..0e1f02815643 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -94,7 +94,8 @@ struct iw_cm_id { void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; - bool mapped; + bool tos_set:1; + bool mapped:1; }; struct iw_cm_conn_param { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 60987a5903b7..71f48cfdc24c 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -374,6 +374,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); */ int rdma_set_afonly(struct rdma_cm_id *id, int afonly); +int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout); /** * rdma_get_service_id - Return the IB service ID for a specified address. * @id: Communication identifier associated with the address. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 0d1e78ebad05..e42940a215a3 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -300,6 +300,10 @@ enum { RDMA_OPTION_ID_TOS = 0, RDMA_OPTION_ID_REUSEADDR = 1, RDMA_OPTION_ID_AFONLY = 2, + RDMA_OPTION_ID_ACK_TIMEOUT = 3 +}; + +enum { RDMA_OPTION_IB_PATH = 1 }; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9ba349e775ef..739dc9fe2c55 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -625,6 +625,32 @@ bool __sg_page_iter_next(struct sg_page_iter *piter) } EXPORT_SYMBOL(__sg_page_iter_next); +static int sg_dma_page_count(struct scatterlist *sg) +{ + return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT; +} + +bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter) +{ + struct sg_page_iter *piter = &dma_iter->base; + + if (!piter->__nents || !piter->sg) + return false; + + piter->sg_pgoffset += piter->__pg_advance; + piter->__pg_advance = 1; + + while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) { + piter->sg_pgoffset -= sg_dma_page_count(piter->sg); + piter->sg = sg_next(piter->sg); + if (!--piter->__nents || !piter->sg) + return false; + } + + return true; +} +EXPORT_SYMBOL(__sg_page_iter_dma_next); + /** * sg_miter_start - start mapping iteration over a sg list * @miter: sg mapping iter to be started