2014-12-11 17:04:17 +02:00
/*
* Copyright ( c ) 2014 Mellanox Technologies . All rights reserved .
*
* This software is available to you under a choice of one of two
* licenses . You may choose to be licensed under the terms of the GNU
* General Public License ( GPL ) Version 2 , available from the file
* COPYING in the main directory of this source tree , or the
* OpenIB . org BSD license below :
*
* Redistribution and use in source and binary forms , with or
* without modification , are permitted provided that the following
* conditions are met :
*
* - Redistributions of source code must retain the above
* copyright notice , this list of conditions and the following
* disclaimer .
*
* - Redistributions in binary form must reproduce the above
* copyright notice , this list of conditions and the following
* disclaimer in the documentation and / or other materials
* provided with the distribution .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND ,
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY , WHETHER IN AN
* ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM , OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE .
*/
# include <linux/types.h>
# include <linux/sched.h>
2017-02-08 18:51:29 +01:00
# include <linux/sched/mm.h>
2017-02-05 15:30:50 +01:00
# include <linux/sched/task.h>
2014-12-11 17:04:17 +02:00
# include <linux/pid.h>
# include <linux/slab.h>
# include <linux/export.h>
# include <linux/vmalloc.h>
2017-04-05 09:23:57 +03:00
# include <linux/hugetlb.h>
2017-10-25 18:56:49 +03:00
# include <linux/interval_tree_generic.h>
2014-12-11 17:04:17 +02:00
# include <rdma/ib_verbs.h>
# include <rdma/ib_umem.h>
# include <rdma/ib_umem_odp.h>
2017-10-25 18:56:49 +03:00
/*
* The ib_umem list keeps track of memory regions for which the HW
* device request to receive notification when the related memory
* mapping is changed .
*
* ib_umem_lock protects the list .
*/
static u64 node_start ( struct umem_odp_node * n )
{
struct ib_umem_odp * umem_odp =
container_of ( n , struct ib_umem_odp , interval_tree ) ;
return ib_umem_start ( umem_odp - > umem ) ;
}
/* Note that the representation of the intervals in the interval tree
* considers the ending point as contained in the interval , while the
* function ib_umem_end returns the first address which is not contained
* in the umem .
*/
static u64 node_last ( struct umem_odp_node * n )
{
struct ib_umem_odp * umem_odp =
container_of ( n , struct ib_umem_odp , interval_tree ) ;
return ib_umem_end ( umem_odp - > umem ) - 1 ;
}
INTERVAL_TREE_DEFINE ( struct umem_odp_node , rb , u64 , __subtree_last ,
node_start , node_last , static , rbt_ib_umem )
2014-12-11 17:04:18 +02:00
static void ib_umem_notifier_start_account ( struct ib_umem * item )
{
mutex_lock ( & item - > odp_data - > umem_mutex ) ;
/* Only update private counters for this umem if it has them.
* Otherwise skip it . All page faults will be delayed for this umem . */
if ( item - > odp_data - > mn_counters_active ) {
int notifiers_count = item - > odp_data - > notifiers_count + + ;
if ( notifiers_count = = 0 )
/* Initialize the completion object for waiting on
* notifiers . Since notifier_count is zero , no one
* should be waiting right now . */
reinit_completion ( & item - > odp_data - > notifier_completion ) ;
}
mutex_unlock ( & item - > odp_data - > umem_mutex ) ;
}
static void ib_umem_notifier_end_account ( struct ib_umem * item )
{
mutex_lock ( & item - > odp_data - > umem_mutex ) ;
/* Only update private counters for this umem if it has them.
* Otherwise skip it . All page faults will be delayed for this umem . */
if ( item - > odp_data - > mn_counters_active ) {
/*
* This sequence increase will notify the QP page fault that
* the page that is going to be mapped in the spte could have
* been freed .
*/
+ + item - > odp_data - > notifiers_seq ;
if ( - - item - > odp_data - > notifiers_count = = 0 )
complete_all ( & item - > odp_data - > notifier_completion ) ;
}
mutex_unlock ( & item - > odp_data - > umem_mutex ) ;
}
/* Account for a new mmu notifier in an ib_ucontext. */
static void ib_ucontext_notifier_start_account ( struct ib_ucontext * context )
{
atomic_inc ( & context - > notifier_count ) ;
}
/* Account for a terminating mmu notifier in an ib_ucontext.
*
* Must be called with the ib_ucontext - > umem_rwsem semaphore unlocked , since
* the function takes the semaphore itself . */
static void ib_ucontext_notifier_end_account ( struct ib_ucontext * context )
{
int zero_notifiers = atomic_dec_and_test ( & context - > notifier_count ) ;
if ( zero_notifiers & &
! list_empty ( & context - > no_private_counters ) ) {
/* No currently running mmu notifiers. Now is the chance to
* add private accounting to all previously added umems . */
struct ib_umem_odp * odp_data , * next ;
/* Prevent concurrent mmu notifiers from working on the
* no_private_counters list . */
down_write ( & context - > umem_rwsem ) ;
/* Read the notifier_count again, with the umem_rwsem
* semaphore taken for write . */
if ( ! atomic_read ( & context - > notifier_count ) ) {
list_for_each_entry_safe ( odp_data , next ,
& context - > no_private_counters ,
no_private_counters ) {
mutex_lock ( & odp_data - > umem_mutex ) ;
odp_data - > mn_counters_active = true ;
list_del ( & odp_data - > no_private_counters ) ;
complete_all ( & odp_data - > notifier_completion ) ;
mutex_unlock ( & odp_data - > umem_mutex ) ;
}
}
up_write ( & context - > umem_rwsem ) ;
}
}
static int ib_umem_notifier_release_trampoline ( struct ib_umem * item , u64 start ,
u64 end , void * cookie ) {
/*
* Increase the number of notifiers running , to
* prevent any further fault handling on this MR .
*/
ib_umem_notifier_start_account ( item ) ;
item - > odp_data - > dying = 1 ;
/* Make sure that the fact the umem is dying is out before we release
* all pending page faults . */
smp_wmb ( ) ;
complete_all ( & item - > odp_data - > notifier_completion ) ;
item - > context - > invalidate_range ( item , ib_umem_start ( item ) ,
ib_umem_end ( item ) ) ;
return 0 ;
}
static void ib_umem_notifier_release ( struct mmu_notifier * mn ,
struct mm_struct * mm )
{
struct ib_ucontext * context = container_of ( mn , struct ib_ucontext , mn ) ;
if ( ! context - > invalidate_range )
return ;
ib_ucontext_notifier_start_account ( context ) ;
down_read ( & context - > umem_rwsem ) ;
rbt_ib_umem_for_each_in_range ( & context - > umem_tree , 0 ,
ULLONG_MAX ,
ib_umem_notifier_release_trampoline ,
NULL ) ;
up_read ( & context - > umem_rwsem ) ;
}
static int invalidate_page_trampoline ( struct ib_umem * item , u64 start ,
u64 end , void * cookie )
{
ib_umem_notifier_start_account ( item ) ;
item - > context - > invalidate_range ( item , start , start + PAGE_SIZE ) ;
ib_umem_notifier_end_account ( item ) ;
return 0 ;
}
static int invalidate_range_start_trampoline ( struct ib_umem * item , u64 start ,
u64 end , void * cookie )
{
ib_umem_notifier_start_account ( item ) ;
item - > context - > invalidate_range ( item , start , end ) ;
return 0 ;
}
static void ib_umem_notifier_invalidate_range_start ( struct mmu_notifier * mn ,
struct mm_struct * mm ,
unsigned long start ,
unsigned long end )
{
struct ib_ucontext * context = container_of ( mn , struct ib_ucontext , mn ) ;
if ( ! context - > invalidate_range )
return ;
ib_ucontext_notifier_start_account ( context ) ;
down_read ( & context - > umem_rwsem ) ;
rbt_ib_umem_for_each_in_range ( & context - > umem_tree , start ,
end ,
invalidate_range_start_trampoline , NULL ) ;
up_read ( & context - > umem_rwsem ) ;
}
static int invalidate_range_end_trampoline ( struct ib_umem * item , u64 start ,
u64 end , void * cookie )
{
ib_umem_notifier_end_account ( item ) ;
return 0 ;
}
static void ib_umem_notifier_invalidate_range_end ( struct mmu_notifier * mn ,
struct mm_struct * mm ,
unsigned long start ,
unsigned long end )
{
struct ib_ucontext * context = container_of ( mn , struct ib_ucontext , mn ) ;
if ( ! context - > invalidate_range )
return ;
down_read ( & context - > umem_rwsem ) ;
rbt_ib_umem_for_each_in_range ( & context - > umem_tree , start ,
end ,
invalidate_range_end_trampoline , NULL ) ;
up_read ( & context - > umem_rwsem ) ;
ib_ucontext_notifier_end_account ( context ) ;
}
2015-11-29 23:02:51 +01:00
static const struct mmu_notifier_ops ib_umem_notifiers = {
2014-12-11 17:04:18 +02:00
. release = ib_umem_notifier_release ,
. invalidate_range_start = ib_umem_notifier_invalidate_range_start ,
. invalidate_range_end = ib_umem_notifier_invalidate_range_end ,
} ;
2017-01-18 16:58:07 +02:00
struct ib_umem * ib_alloc_odp_umem ( struct ib_ucontext * context ,
unsigned long addr ,
size_t size )
{
struct ib_umem * umem ;
struct ib_umem_odp * odp_data ;
int pages = size > > PAGE_SHIFT ;
int ret ;
umem = kzalloc ( sizeof ( * umem ) , GFP_KERNEL ) ;
if ( ! umem )
return ERR_PTR ( - ENOMEM ) ;
2017-04-05 09:23:50 +03:00
umem - > context = context ;
umem - > length = size ;
umem - > address = addr ;
umem - > page_shift = PAGE_SHIFT ;
umem - > writable = 1 ;
2017-01-18 16:58:07 +02:00
odp_data = kzalloc ( sizeof ( * odp_data ) , GFP_KERNEL ) ;
if ( ! odp_data ) {
ret = - ENOMEM ;
goto out_umem ;
}
odp_data - > umem = umem ;
mutex_init ( & odp_data - > umem_mutex ) ;
init_completion ( & odp_data - > notifier_completion ) ;
treewide: Use array_size() in vzalloc()
The vzalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vzalloc(a * b)
with:
vzalloc(array_size(a, b))
as well as handling cases of:
vzalloc(a * b * c)
with:
vzalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vzalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vzalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vzalloc(C1 * C2 * C3, ...)
|
vzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vzalloc(C1 * C2, ...)
|
vzalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:27:37 -07:00
odp_data - > page_list =
vzalloc ( array_size ( pages , sizeof ( * odp_data - > page_list ) ) ) ;
2017-01-18 16:58:07 +02:00
if ( ! odp_data - > page_list ) {
ret = - ENOMEM ;
goto out_odp_data ;
}
treewide: Use array_size() in vzalloc()
The vzalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vzalloc(a * b)
with:
vzalloc(array_size(a, b))
as well as handling cases of:
vzalloc(a * b * c)
with:
vzalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vzalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vzalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vzalloc(C1 * C2 * C3, ...)
|
vzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vzalloc(C1 * C2, ...)
|
vzalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:27:37 -07:00
odp_data - > dma_list =
vzalloc ( array_size ( pages , sizeof ( * odp_data - > dma_list ) ) ) ;
2017-01-18 16:58:07 +02:00
if ( ! odp_data - > dma_list ) {
ret = - ENOMEM ;
goto out_page_list ;
}
down_write ( & context - > umem_rwsem ) ;
context - > odp_mrs_count + + ;
rbt_ib_umem_insert ( & odp_data - > interval_tree , & context - > umem_tree ) ;
if ( likely ( ! atomic_read ( & context - > notifier_count ) ) )
odp_data - > mn_counters_active = true ;
else
list_add ( & odp_data - > no_private_counters ,
& context - > no_private_counters ) ;
up_write ( & context - > umem_rwsem ) ;
umem - > odp_data = odp_data ;
return umem ;
out_page_list :
vfree ( odp_data - > page_list ) ;
out_odp_data :
kfree ( odp_data ) ;
out_umem :
kfree ( umem ) ;
return ERR_PTR ( ret ) ;
}
EXPORT_SYMBOL ( ib_alloc_odp_umem ) ;
2017-04-05 09:23:57 +03:00
int ib_umem_odp_get ( struct ib_ucontext * context , struct ib_umem * umem ,
int access )
2014-12-11 17:04:17 +02:00
{
int ret_val ;
struct pid * our_pid ;
2014-12-11 17:04:18 +02:00
struct mm_struct * mm = get_task_mm ( current ) ;
if ( ! mm )
return - EINVAL ;
2014-12-11 17:04:17 +02:00
2017-04-05 09:23:57 +03:00
if ( access & IB_ACCESS_HUGETLB ) {
struct vm_area_struct * vma ;
struct hstate * h ;
2017-05-21 19:08:09 +03:00
down_read ( & mm - > mmap_sem ) ;
2017-04-05 09:23:57 +03:00
vma = find_vma ( mm , ib_umem_start ( umem ) ) ;
2017-05-21 19:08:09 +03:00
if ( ! vma | | ! is_vm_hugetlb_page ( vma ) ) {
up_read ( & mm - > mmap_sem ) ;
2017-04-05 09:23:57 +03:00
return - EINVAL ;
2017-05-21 19:08:09 +03:00
}
2017-04-05 09:23:57 +03:00
h = hstate_vma ( vma ) ;
umem - > page_shift = huge_page_shift ( h ) ;
2017-05-21 19:08:09 +03:00
up_read ( & mm - > mmap_sem ) ;
2017-04-05 09:23:57 +03:00
umem - > hugetlb = 1 ;
} else {
umem - > hugetlb = 0 ;
}
2014-12-11 17:04:17 +02:00
/* Prevent creating ODP MRs in child processes */
rcu_read_lock ( ) ;
our_pid = get_task_pid ( current - > group_leader , PIDTYPE_PID ) ;
rcu_read_unlock ( ) ;
put_pid ( our_pid ) ;
2014-12-11 17:04:18 +02:00
if ( context - > tgid ! = our_pid ) {
ret_val = - EINVAL ;
goto out_mm ;
}
2014-12-11 17:04:17 +02:00
umem - > odp_data = kzalloc ( sizeof ( * umem - > odp_data ) , GFP_KERNEL ) ;
2014-12-11 17:04:18 +02:00
if ( ! umem - > odp_data ) {
ret_val = - ENOMEM ;
goto out_mm ;
}
umem - > odp_data - > umem = umem ;
2014-12-11 17:04:17 +02:00
mutex_init ( & umem - > odp_data - > umem_mutex ) ;
2014-12-11 17:04:18 +02:00
init_completion ( & umem - > odp_data - > notifier_completion ) ;
2017-01-18 16:58:07 +02:00
if ( ib_umem_num_pages ( umem ) ) {
treewide: Use array_size() in vzalloc()
The vzalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vzalloc(a * b)
with:
vzalloc(array_size(a, b))
as well as handling cases of:
vzalloc(a * b * c)
with:
vzalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vzalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vzalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vzalloc(C1 * C2 * C3, ...)
|
vzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vzalloc(C1 * C2, ...)
|
vzalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:27:37 -07:00
umem - > odp_data - > page_list =
vzalloc ( array_size ( sizeof ( * umem - > odp_data - > page_list ) ,
ib_umem_num_pages ( umem ) ) ) ;
2017-01-18 16:58:07 +02:00
if ( ! umem - > odp_data - > page_list ) {
ret_val = - ENOMEM ;
goto out_odp_data ;
}
2014-12-11 17:04:17 +02:00
treewide: Use array_size() in vzalloc()
The vzalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vzalloc(a * b)
with:
vzalloc(array_size(a, b))
as well as handling cases of:
vzalloc(a * b * c)
with:
vzalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vzalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vzalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vzalloc(C1 * C2 * C3, ...)
|
vzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vzalloc(C1 * C2, ...)
|
vzalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:27:37 -07:00
umem - > odp_data - > dma_list =
vzalloc ( array_size ( sizeof ( * umem - > odp_data - > dma_list ) ,
ib_umem_num_pages ( umem ) ) ) ;
2017-01-18 16:58:07 +02:00
if ( ! umem - > odp_data - > dma_list ) {
ret_val = - ENOMEM ;
goto out_page_list ;
}
2014-12-11 17:04:17 +02:00
}
2014-12-11 17:04:18 +02:00
/*
* When using MMU notifiers , we will get a
* notification before the " current " task ( and MM ) is
* destroyed . We use the umem_rwsem semaphore to synchronize .
*/
down_write ( & context - > umem_rwsem ) ;
context - > odp_mrs_count + + ;
if ( likely ( ib_umem_start ( umem ) ! = ib_umem_end ( umem ) ) )
rbt_ib_umem_insert ( & umem - > odp_data - > interval_tree ,
& context - > umem_tree ) ;
2015-01-06 13:56:02 +02:00
if ( likely ( ! atomic_read ( & context - > notifier_count ) ) | |
context - > odp_mrs_count = = 1 )
2014-12-11 17:04:18 +02:00
umem - > odp_data - > mn_counters_active = true ;
else
list_add ( & umem - > odp_data - > no_private_counters ,
& context - > no_private_counters ) ;
downgrade_write ( & context - > umem_rwsem ) ;
if ( context - > odp_mrs_count = = 1 ) {
/*
* Note that at this point , no MMU notifier is running
* for this context !
*/
atomic_set ( & context - > notifier_count , 0 ) ;
INIT_HLIST_NODE ( & context - > mn . hlist ) ;
context - > mn . ops = & ib_umem_notifiers ;
/*
* Lock - dep detects a false positive for mmap_sem vs .
* umem_rwsem , due to not grasping downgrade_write correctly .
*/
lockdep_off ( ) ;
ret_val = mmu_notifier_register ( & context - > mn , mm ) ;
lockdep_on ( ) ;
if ( ret_val ) {
pr_err ( " Failed to register mmu_notifier %d \n " , ret_val ) ;
ret_val = - EBUSY ;
goto out_mutex ;
}
}
up_read ( & context - > umem_rwsem ) ;
/*
* Note that doing an mmput can cause a notifier for the relevant mm .
* If the notifier is called while we hold the umem_rwsem , this will
* cause a deadlock . Therefore , we release the reference only after we
* released the semaphore .
*/
mmput ( mm ) ;
2014-12-11 17:04:17 +02:00
return 0 ;
2014-12-11 17:04:18 +02:00
out_mutex :
up_read ( & context - > umem_rwsem ) ;
vfree ( umem - > odp_data - > dma_list ) ;
2014-12-11 17:04:17 +02:00
out_page_list :
vfree ( umem - > odp_data - > page_list ) ;
out_odp_data :
kfree ( umem - > odp_data ) ;
2014-12-11 17:04:18 +02:00
out_mm :
mmput ( mm ) ;
2014-12-11 17:04:17 +02:00
return ret_val ;
}
void ib_umem_odp_release ( struct ib_umem * umem )
{
2014-12-11 17:04:18 +02:00
struct ib_ucontext * context = umem - > context ;
2014-12-11 17:04:17 +02:00
/*
* Ensure that no more pages are mapped in the umem .
*
* It is the driver ' s responsibility to ensure , before calling us ,
* that the hardware will not attempt to access the MR any more .
*/
ib_umem_odp_unmap_dma_pages ( umem , ib_umem_start ( umem ) ,
ib_umem_end ( umem ) ) ;
2014-12-11 17:04:18 +02:00
down_write ( & context - > umem_rwsem ) ;
if ( likely ( ib_umem_start ( umem ) ! = ib_umem_end ( umem ) ) )
rbt_ib_umem_remove ( & umem - > odp_data - > interval_tree ,
& context - > umem_tree ) ;
context - > odp_mrs_count - - ;
if ( ! umem - > odp_data - > mn_counters_active ) {
list_del ( & umem - > odp_data - > no_private_counters ) ;
complete_all ( & umem - > odp_data - > notifier_completion ) ;
}
/*
* Downgrade the lock to a read lock . This ensures that the notifiers
* ( who lock the mutex for reading ) will be able to finish , and we
* will be able to enventually obtain the mmu notifiers SRCU . Note
* that since we are doing it atomically , no other user could register
* and unregister while we do the check .
*/
downgrade_write ( & context - > umem_rwsem ) ;
if ( ! context - > odp_mrs_count ) {
struct task_struct * owning_process = NULL ;
struct mm_struct * owning_mm = NULL ;
owning_process = get_pid_task ( context - > tgid ,
PIDTYPE_PID ) ;
if ( owning_process = = NULL )
/*
* The process is already dead , notifier were removed
* already .
*/
goto out ;
owning_mm = get_task_mm ( owning_process ) ;
if ( owning_mm = = NULL )
/*
* The process ' mm is already dead , notifier were
* removed already .
*/
goto out_put_task ;
mmu_notifier_unregister ( & context - > mn , owning_mm ) ;
mmput ( owning_mm ) ;
out_put_task :
put_task_struct ( owning_process ) ;
}
out :
up_read ( & context - > umem_rwsem ) ;
2014-12-11 17:04:17 +02:00
vfree ( umem - > odp_data - > dma_list ) ;
vfree ( umem - > odp_data - > page_list ) ;
kfree ( umem - > odp_data ) ;
kfree ( umem ) ;
}
/*
* Map for DMA and insert a single page into the on - demand paging page tables .
*
* @ umem : the umem to insert the page to .
* @ page_index : index in the umem to add the page to .
* @ page : the page struct to map and add .
* @ access_mask : access permissions needed for this page .
* @ current_seq : sequence number for synchronization with invalidations .
* the sequence number is taken from
* umem - > odp_data - > notifiers_seq .
*
2014-12-11 17:04:18 +02:00
* The function returns - EFAULT if the DMA mapping operation fails . It returns
* - EAGAIN if a concurrent invalidation prevents us from updating the page .
2014-12-11 17:04:17 +02:00
*
* The page is released via put_page even if the operation failed . For
* on - demand pinning , the page is released whenever it isn ' t stored in the
* umem .
*/
static int ib_umem_odp_map_dma_single_page (
struct ib_umem * umem ,
int page_index ,
struct page * page ,
u64 access_mask ,
unsigned long current_seq )
{
struct ib_device * dev = umem - > context - > device ;
dma_addr_t dma_addr ;
int stored_page = 0 ;
2014-12-11 17:04:18 +02:00
int remove_existing_mapping = 0 ;
2014-12-11 17:04:17 +02:00
int ret = 0 ;
2014-12-11 17:04:18 +02:00
/*
* Note : we avoid writing if seq is different from the initial seq , to
* handle case of a racing notifier . This check also allows us to bail
* early if we have a notifier running in parallel with us .
*/
if ( ib_umem_mmu_notifier_retry ( umem , current_seq ) ) {
ret = - EAGAIN ;
goto out ;
}
2014-12-11 17:04:17 +02:00
if ( ! ( umem - > odp_data - > dma_list [ page_index ] ) ) {
dma_addr = ib_dma_map_page ( dev ,
page ,
2017-04-05 09:23:55 +03:00
0 , BIT ( umem - > page_shift ) ,
2014-12-11 17:04:17 +02:00
DMA_BIDIRECTIONAL ) ;
if ( ib_dma_mapping_error ( dev , dma_addr ) ) {
ret = - EFAULT ;
goto out ;
}
umem - > odp_data - > dma_list [ page_index ] = dma_addr | access_mask ;
umem - > odp_data - > page_list [ page_index ] = page ;
2017-01-18 16:58:07 +02:00
umem - > npages + + ;
2014-12-11 17:04:17 +02:00
stored_page = 1 ;
} else if ( umem - > odp_data - > page_list [ page_index ] = = page ) {
umem - > odp_data - > dma_list [ page_index ] | = access_mask ;
} else {
pr_err ( " error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p \n " ,
umem - > odp_data - > page_list [ page_index ] , page ) ;
2014-12-11 17:04:18 +02:00
/* Better remove the mapping now, to prevent any further
* damage . */
remove_existing_mapping = 1 ;
2014-12-11 17:04:17 +02:00
}
out :
2014-12-11 17:04:18 +02:00
/* On Demand Paging - avoid pinning the page */
if ( umem - > context - > invalidate_range | | ! stored_page )
2014-12-11 17:04:17 +02:00
put_page ( page ) ;
2014-12-11 17:04:18 +02:00
if ( remove_existing_mapping & & umem - > context - > invalidate_range ) {
invalidate_page_trampoline (
umem ,
2017-04-05 09:23:55 +03:00
ib_umem_start ( umem ) + ( page_index > > umem - > page_shift ) ,
ib_umem_start ( umem ) + ( ( page_index + 1 ) > >
umem - > page_shift ) ,
2014-12-11 17:04:18 +02:00
NULL ) ;
ret = - EAGAIN ;
}
2014-12-11 17:04:17 +02:00
return ret ;
}
/**
* ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR .
*
* Pins the range of pages passed in the argument , and maps them to
* DMA addresses . The DMA addresses of the mapped pages is updated in
* umem - > odp_data - > dma_list .
*
* Returns the number of pages mapped in success , negative error code
* for failure .
2014-12-11 17:04:18 +02:00
* An - EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task .
2017-01-18 16:58:08 +02:00
* An - ENOENT error code indicates that userspace process is being terminated
* and mm was already destroyed .
2014-12-11 17:04:17 +02:00
* @ umem : the umem to map and pin
* @ user_virt : the address from which we need to map .
* @ bcnt : the minimal number of bytes to pin and map . The mapping might be
* bigger due to alignment , and may also be smaller in case of an error
* pinning or mapping a page . The actual pages mapped is returned in
* the return value .
* @ access_mask : bit mask of the requested access permissions for the given
* range .
* @ current_seq : the MMU notifiers sequance value for synchronization with
* invalidations . the sequance number is read from
* umem - > odp_data - > notifiers_seq before calling this function
*/
int ib_umem_odp_map_dma_pages ( struct ib_umem * umem , u64 user_virt , u64 bcnt ,
u64 access_mask , unsigned long current_seq )
{
struct task_struct * owning_process = NULL ;
struct mm_struct * owning_mm = NULL ;
struct page * * local_page_list = NULL ;
2017-04-05 09:23:55 +03:00
u64 page_mask , off ;
int j , k , ret = 0 , start_idx , npages = 0 , page_shift ;
2016-10-13 01:20:17 +01:00
unsigned int flags = 0 ;
2017-04-05 09:23:55 +03:00
phys_addr_t p = 0 ;
2014-12-11 17:04:17 +02:00
if ( access_mask = = 0 )
return - EINVAL ;
if ( user_virt < ib_umem_start ( umem ) | |
user_virt + bcnt > ib_umem_end ( umem ) )
return - EFAULT ;
local_page_list = ( struct page * * ) __get_free_page ( GFP_KERNEL ) ;
if ( ! local_page_list )
return - ENOMEM ;
2017-04-05 09:23:55 +03:00
page_shift = umem - > page_shift ;
page_mask = ~ ( BIT ( page_shift ) - 1 ) ;
off = user_virt & ( ~ page_mask ) ;
user_virt = user_virt & page_mask ;
2014-12-11 17:04:17 +02:00
bcnt + = off ; /* Charge for the first page offset as well. */
owning_process = get_pid_task ( umem - > context - > tgid , PIDTYPE_PID ) ;
if ( owning_process = = NULL ) {
ret = - EINVAL ;
goto out_no_task ;
}
owning_mm = get_task_mm ( owning_process ) ;
if ( owning_mm = = NULL ) {
2017-01-18 16:58:08 +02:00
ret = - ENOENT ;
2014-12-11 17:04:17 +02:00
goto out_put_task ;
}
2016-10-13 01:20:17 +01:00
if ( access_mask & ODP_WRITE_ALLOWED_BIT )
flags | = FOLL_WRITE ;
2017-04-05 09:23:55 +03:00
start_idx = ( user_virt - ib_umem_start ( umem ) ) > > page_shift ;
2014-12-11 17:04:17 +02:00
k = start_idx ;
while ( bcnt > 0 ) {
2017-04-05 09:23:55 +03:00
const size_t gup_num_pages = min_t ( size_t ,
( bcnt + BIT ( page_shift ) - 1 ) > > page_shift ,
PAGE_SIZE / sizeof ( struct page * ) ) ;
2014-12-11 17:04:17 +02:00
down_read ( & owning_mm - > mmap_sem ) ;
/*
* Note : this might result in redundent page getting . We can
* avoid this by checking dma_list to be 0 before calling
* get_user_pages . However , this make the code much more
* complex ( and doesn ' t gain us much performance in most use
* cases ) .
*/
2016-02-12 13:01:54 -08:00
npages = get_user_pages_remote ( owning_process , owning_mm ,
user_virt , gup_num_pages ,
2016-12-14 15:06:52 -08:00
flags , local_page_list , NULL , NULL ) ;
2014-12-11 17:04:17 +02:00
up_read ( & owning_mm - > mmap_sem ) ;
if ( npages < 0 )
break ;
bcnt - = min_t ( size_t , npages < < PAGE_SHIFT , bcnt ) ;
2015-04-15 18:17:56 +03:00
mutex_lock ( & umem - > odp_data - > umem_mutex ) ;
2017-04-05 09:23:55 +03:00
for ( j = 0 ; j < npages ; j + + , user_virt + = PAGE_SIZE ) {
if ( user_virt & ~ page_mask ) {
p + = PAGE_SIZE ;
if ( page_to_phys ( local_page_list [ j ] ) ! = p ) {
ret = - EFAULT ;
break ;
}
put_page ( local_page_list [ j ] ) ;
continue ;
}
2014-12-11 17:04:17 +02:00
ret = ib_umem_odp_map_dma_single_page (
2017-04-05 09:23:55 +03:00
umem , k , local_page_list [ j ] ,
access_mask , current_seq ) ;
2014-12-11 17:04:17 +02:00
if ( ret < 0 )
break ;
2017-04-05 09:23:55 +03:00
p = page_to_phys ( local_page_list [ j ] ) ;
2014-12-11 17:04:17 +02:00
k + + ;
}
2015-04-15 18:17:56 +03:00
mutex_unlock ( & umem - > odp_data - > umem_mutex ) ;
2014-12-11 17:04:17 +02:00
if ( ret < 0 ) {
/* Release left over pages when handling errors. */
for ( + + j ; j < npages ; + + j )
put_page ( local_page_list [ j ] ) ;
break ;
}
}
if ( ret > = 0 ) {
if ( npages < 0 & & k = = start_idx )
ret = npages ;
else
ret = k - start_idx ;
}
mmput ( owning_mm ) ;
out_put_task :
put_task_struct ( owning_process ) ;
out_no_task :
free_page ( ( unsigned long ) local_page_list ) ;
return ret ;
}
EXPORT_SYMBOL ( ib_umem_odp_map_dma_pages ) ;
void ib_umem_odp_unmap_dma_pages ( struct ib_umem * umem , u64 virt ,
u64 bound )
{
int idx ;
u64 addr ;
struct ib_device * dev = umem - > context - > device ;
virt = max_t ( u64 , virt , ib_umem_start ( umem ) ) ;
bound = min_t ( u64 , bound , ib_umem_end ( umem ) ) ;
2014-12-11 17:04:18 +02:00
/* Note that during the run of this function, the
* notifiers_count of the MR is > 0 , preventing any racing
* faults from completion . We might be racing with other
* invalidations , so we must make sure we free each page only
* once . */
2015-04-15 18:17:56 +03:00
mutex_lock ( & umem - > odp_data - > umem_mutex ) ;
2017-04-05 09:23:50 +03:00
for ( addr = virt ; addr < bound ; addr + = BIT ( umem - > page_shift ) ) {
2017-04-05 09:23:55 +03:00
idx = ( addr - ib_umem_start ( umem ) ) > > umem - > page_shift ;
2014-12-11 17:04:17 +02:00
if ( umem - > odp_data - > page_list [ idx ] ) {
struct page * page = umem - > odp_data - > page_list [ idx ] ;
dma_addr_t dma = umem - > odp_data - > dma_list [ idx ] ;
dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK ;
WARN_ON ( ! dma_addr ) ;
ib_dma_unmap_page ( dev , dma_addr , PAGE_SIZE ,
DMA_BIDIRECTIONAL ) ;
2015-04-15 18:17:57 +03:00
if ( dma & ODP_WRITE_ALLOWED_BIT ) {
struct page * head_page = compound_head ( page ) ;
2014-12-11 17:04:18 +02:00
/*
* set_page_dirty prefers being called with
* the page lock . However , MMU notifiers are
* called sometimes with and sometimes without
* the lock . We rely on the umem_mutex instead
* to prevent other mmu notifiers from
* continuing and allowing the page mapping to
* be removed .
*/
set_page_dirty ( head_page ) ;
2015-04-15 18:17:57 +03:00
}
2014-12-11 17:04:18 +02:00
/* on demand pinning support */
if ( ! umem - > context - > invalidate_range )
put_page ( page ) ;
umem - > odp_data - > page_list [ idx ] = NULL ;
umem - > odp_data - > dma_list [ idx ] = 0 ;
2017-01-18 16:58:07 +02:00
umem - > npages - - ;
2014-12-11 17:04:17 +02:00
}
}
2015-04-15 18:17:56 +03:00
mutex_unlock ( & umem - > odp_data - > umem_mutex ) ;
2014-12-11 17:04:17 +02:00
}
EXPORT_SYMBOL ( ib_umem_odp_unmap_dma_pages ) ;
2017-10-25 18:56:49 +03:00
/* @last is not a part of the interval. See comment for function
* node_last .
*/
int rbt_ib_umem_for_each_in_range ( struct rb_root_cached * root ,
u64 start , u64 last ,
umem_call_back cb ,
void * cookie )
{
int ret_val = 0 ;
struct umem_odp_node * node , * next ;
struct ib_umem_odp * umem ;
if ( unlikely ( start = = last ) )
return ret_val ;
for ( node = rbt_ib_umem_iter_first ( root , start , last - 1 ) ;
node ; node = next ) {
next = rbt_ib_umem_iter_next ( node , start , last - 1 ) ;
umem = container_of ( node , struct ib_umem_odp , interval_tree ) ;
ret_val = cb ( umem - > umem , start , last , cookie ) | | ret_val ;
}
return ret_val ;
}
EXPORT_SYMBOL ( rbt_ib_umem_for_each_in_range ) ;
struct ib_umem_odp * rbt_ib_umem_lookup ( struct rb_root_cached * root ,
u64 addr , u64 length )
{
struct umem_odp_node * node ;
node = rbt_ib_umem_iter_first ( root , addr , addr + length - 1 ) ;
if ( node )
return container_of ( node , struct ib_umem_odp , interval_tree ) ;
return NULL ;
}
EXPORT_SYMBOL ( rbt_ib_umem_lookup ) ;