2005-07-07 17:57:14 -07:00
/*
* Copyright ( c ) 2005 Topspin Communications . All rights reserved .
* Copyright ( c ) 2005 Cisco Systems . All rights reserved .
2005-08-10 23:03:10 -07:00
* Copyright ( c ) 2005 Mellanox Technologies . All rights reserved .
2005-07-07 17:57:14 -07:00
*
* This software is available to you under a choice of one of two
* licenses . You may choose to be licensed under the terms of the GNU
* General Public License ( GPL ) Version 2 , available from the file
* COPYING in the main directory of this source tree , or the
* OpenIB . org BSD license below :
*
* Redistribution and use in source and binary forms , with or
* without modification , are permitted provided that the following
* conditions are met :
*
* - Redistributions of source code must retain the above
* copyright notice , this list of conditions and the following
* disclaimer .
*
* - Redistributions in binary form must reproduce the above
* copyright notice , this list of conditions and the following
* disclaimer in the documentation and / or other materials
* provided with the distribution .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND ,
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY , WHETHER IN AN
* ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM , OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE .
*
* $ Id : uverbs_mem . c 2743 2005 - 06 - 28 22 : 27 : 59 Z roland $
*/
# include <linux/mm.h>
# include <linux/dma-mapping.h>
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
# include <linux/sched.h>
2005-07-07 17:57:14 -07:00
# include "uverbs.h"
static void __ib_umem_release ( struct ib_device * dev , struct ib_umem * umem , int dirty )
{
struct ib_umem_chunk * chunk , * tmp ;
int i ;
list_for_each_entry_safe ( chunk , tmp , & umem - > chunk_list , list ) {
2006-12-12 14:28:30 -08:00
ib_dma_unmap_sg ( dev , chunk - > page_list ,
chunk - > nents , DMA_BIDIRECTIONAL ) ;
2005-07-07 17:57:14 -07:00
for ( i = 0 ; i < chunk - > nents ; + + i ) {
if ( umem - > writable & & dirty )
set_page_dirty_lock ( chunk - > page_list [ i ] . page ) ;
put_page ( chunk - > page_list [ i ] . page ) ;
}
kfree ( chunk ) ;
}
}
2007-03-04 16:15:11 -08:00
/**
* ib_umem_get - Pin and DMA map userspace memory .
* @ context : userspace context to pin memory for
* @ addr : userspace virtual address to start at
* @ size : length of region to pin
* @ access : IB_ACCESS_xxx flags for memory being pinned
*/
struct ib_umem * ib_umem_get ( struct ib_ucontext * context , unsigned long addr ,
size_t size , int access )
2005-07-07 17:57:14 -07:00
{
2007-03-04 16:15:11 -08:00
struct ib_umem * umem ;
2005-07-07 17:57:14 -07:00
struct page * * page_list ;
struct ib_umem_chunk * chunk ;
unsigned long locked ;
unsigned long lock_limit ;
unsigned long cur_base ;
unsigned long npages ;
2007-03-04 16:15:11 -08:00
int ret ;
2005-07-07 17:57:14 -07:00
int off ;
int i ;
if ( ! can_do_mlock ( ) )
2007-03-04 16:15:11 -08:00
return ERR_PTR ( - EPERM ) ;
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
umem = kmalloc ( sizeof * umem , GFP_KERNEL ) ;
if ( ! umem )
return ERR_PTR ( - ENOMEM ) ;
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
umem - > context = context ;
umem - > length = size ;
umem - > offset = addr & ~ PAGE_MASK ;
umem - > page_size = PAGE_SIZE ;
/*
* We ask for writable memory if any access flags other than
* " remote read " are set . " Local write " and " remote write "
* obviously require write access . " Remote atomic " can do
* things like fetch and add , which will modify memory , and
* " MW bind " can change permissions by binding a window .
*/
umem - > writable = ! ! ( access & ~ IB_ACCESS_REMOTE_READ ) ;
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
INIT_LIST_HEAD ( & umem - > chunk_list ) ;
page_list = ( struct page * * ) __get_free_page ( GFP_KERNEL ) ;
if ( ! page_list ) {
kfree ( umem ) ;
return ERR_PTR ( - ENOMEM ) ;
}
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
npages = PAGE_ALIGN ( size + umem - > offset ) > > PAGE_SHIFT ;
2005-07-07 17:57:14 -07:00
down_write ( & current - > mm - > mmap_sem ) ;
locked = npages + current - > mm - > locked_vm ;
lock_limit = current - > signal - > rlim [ RLIMIT_MEMLOCK ] . rlim_cur > > PAGE_SHIFT ;
if ( ( locked > lock_limit ) & & ! capable ( CAP_IPC_LOCK ) ) {
ret = - ENOMEM ;
goto out ;
}
2007-03-04 16:15:11 -08:00
cur_base = addr & PAGE_MASK ;
2005-07-07 17:57:14 -07:00
while ( npages ) {
ret = get_user_pages ( current , current - > mm , cur_base ,
min_t ( int , npages ,
PAGE_SIZE / sizeof ( struct page * ) ) ,
2007-03-04 16:15:11 -08:00
1 , ! umem - > writable , page_list , NULL ) ;
2005-07-07 17:57:14 -07:00
if ( ret < 0 )
goto out ;
cur_base + = ret * PAGE_SIZE ;
npages - = ret ;
off = 0 ;
while ( ret ) {
chunk = kmalloc ( sizeof * chunk + sizeof ( struct scatterlist ) *
min_t ( int , ret , IB_UMEM_MAX_PAGE_CHUNK ) ,
GFP_KERNEL ) ;
if ( ! chunk ) {
ret = - ENOMEM ;
goto out ;
}
chunk - > nents = min_t ( int , ret , IB_UMEM_MAX_PAGE_CHUNK ) ;
for ( i = 0 ; i < chunk - > nents ; + + i ) {
chunk - > page_list [ i ] . page = page_list [ i + off ] ;
chunk - > page_list [ i ] . offset = 0 ;
chunk - > page_list [ i ] . length = PAGE_SIZE ;
}
2007-03-04 16:15:11 -08:00
chunk - > nmap = ib_dma_map_sg ( context - > device ,
2006-12-12 14:28:30 -08:00
& chunk - > page_list [ 0 ] ,
chunk - > nents ,
DMA_BIDIRECTIONAL ) ;
2005-07-07 17:57:14 -07:00
if ( chunk - > nmap < = 0 ) {
for ( i = 0 ; i < chunk - > nents ; + + i )
put_page ( chunk - > page_list [ i ] . page ) ;
kfree ( chunk ) ;
ret = - ENOMEM ;
goto out ;
}
ret - = chunk - > nents ;
off + = chunk - > nents ;
2007-03-04 16:15:11 -08:00
list_add_tail ( & chunk - > list , & umem - > chunk_list ) ;
2005-07-07 17:57:14 -07:00
}
ret = 0 ;
}
out :
2007-03-04 16:15:11 -08:00
if ( ret < 0 ) {
__ib_umem_release ( context - > device , umem , 0 ) ;
kfree ( umem ) ;
} else
2005-07-07 17:57:14 -07:00
current - > mm - > locked_vm = locked ;
up_write ( & current - > mm - > mmap_sem ) ;
free_page ( ( unsigned long ) page_list ) ;
2007-03-04 16:15:11 -08:00
return ret < 0 ? ERR_PTR ( ret ) : umem ;
2005-07-07 17:57:14 -07:00
}
2007-03-04 16:15:11 -08:00
EXPORT_SYMBOL ( ib_umem_get ) ;
2005-07-07 17:57:14 -07:00
2007-04-18 20:20:28 -07:00
static void ib_umem_account ( struct work_struct * work )
2005-07-07 17:57:14 -07:00
{
2007-04-18 20:20:28 -07:00
struct ib_umem * umem = container_of ( work , struct ib_umem , work ) ;
down_write ( & umem - > mm - > mmap_sem ) ;
umem - > mm - > locked_vm - = umem - > diff ;
up_write ( & umem - > mm - > mmap_sem ) ;
mmput ( umem - > mm ) ;
kfree ( umem ) ;
2005-07-07 17:57:14 -07:00
}
2007-03-04 16:15:11 -08:00
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @ umem : umem struct to release
*/
void ib_umem_release ( struct ib_umem * umem )
2005-07-07 17:57:14 -07:00
{
2007-03-04 16:15:11 -08:00
struct ib_ucontext * context = umem - > context ;
2005-07-07 17:57:14 -07:00
struct mm_struct * mm ;
2007-03-04 16:15:11 -08:00
unsigned long diff ;
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
__ib_umem_release ( umem - > context - > device , umem , 1 ) ;
2005-07-07 17:57:14 -07:00
mm = get_task_mm ( current ) ;
2007-05-14 11:35:43 +03:00
if ( ! mm ) {
kfree ( umem ) ;
2005-07-07 17:57:14 -07:00
return ;
2007-05-14 11:35:43 +03:00
}
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
diff = PAGE_ALIGN ( umem - > length + umem - > offset ) > > PAGE_SHIFT ;
2005-07-07 17:57:14 -07:00
/*
* We may be called with the mm ' s mmap_sem already held . This
* can happen when a userspace munmap ( ) is the call that drops
* the last reference to our file and calls our release
* method . If there are memory regions to destroy , we ' ll end
2007-03-04 16:15:11 -08:00
* up here and not be able to take the mmap_sem . In that case
* we defer the vm_locked accounting to the system workqueue .
2005-07-07 17:57:14 -07:00
*/
2007-03-04 16:15:11 -08:00
if ( context - > closing & & ! down_write_trylock ( & mm - > mmap_sem ) ) {
2007-04-18 20:20:28 -07:00
INIT_WORK ( & umem - > work , ib_umem_account ) ;
umem - > mm = mm ;
umem - > diff = diff ;
2005-07-07 17:57:14 -07:00
2007-04-18 20:20:28 -07:00
schedule_work ( & umem - > work ) ;
2005-07-07 17:57:14 -07:00
return ;
2007-03-04 16:15:11 -08:00
} else
down_write ( & mm - > mmap_sem ) ;
current - > mm - > locked_vm - = diff ;
up_write ( & mm - > mmap_sem ) ;
mmput ( mm ) ;
2007-04-18 20:20:28 -07:00
kfree ( umem ) ;
2007-03-04 16:15:11 -08:00
}
EXPORT_SYMBOL ( ib_umem_release ) ;
int ib_umem_page_count ( struct ib_umem * umem )
{
struct ib_umem_chunk * chunk ;
int shift ;
int i ;
int n ;
shift = ilog2 ( umem - > page_size ) ;
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
n = 0 ;
list_for_each_entry ( chunk , & umem - > chunk_list , list )
for ( i = 0 ; i < chunk - > nmap ; + + i )
n + = sg_dma_len ( & chunk - > page_list [ i ] ) > > shift ;
2005-07-07 17:57:14 -07:00
2007-03-04 16:15:11 -08:00
return n ;
2005-07-07 17:57:14 -07:00
}
2007-03-04 16:15:11 -08:00
EXPORT_SYMBOL ( ib_umem_page_count ) ;