2008-01-29 14:51:59 +01:00
/*
* Functions related to sysfs handling
*/
# include <linux/kernel.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
# include <linux/slab.h>
2008-01-29 14:51:59 +01:00
# include <linux/module.h>
# include <linux/bio.h>
# include <linux/blkdev.h>
# include <linux/blktrace_api.h>
# include "blk.h"
struct queue_sysfs_entry {
struct attribute attr ;
ssize_t ( * show ) ( struct request_queue * , char * ) ;
ssize_t ( * store ) ( struct request_queue * , const char * , size_t ) ;
} ;
static ssize_t
2009-07-17 15:26:26 +08:00
queue_var_show ( unsigned long var , char * page )
2008-01-29 14:51:59 +01:00
{
2009-07-17 15:26:26 +08:00
return sprintf ( page , " %lu \n " , var ) ;
2008-01-29 14:51:59 +01:00
}
static ssize_t
queue_var_store ( unsigned long * var , const char * page , size_t count )
{
char * p = ( char * ) page ;
* var = simple_strtoul ( p , & p , 10 ) ;
return count ;
}
static ssize_t queue_requests_show ( struct request_queue * q , char * page )
{
return queue_var_show ( q - > nr_requests , ( page ) ) ;
}
static ssize_t
queue_requests_store ( struct request_queue * q , const char * page , size_t count )
{
struct request_list * rl = & q - > rq ;
unsigned long nr ;
2009-09-11 22:44:29 +02:00
int ret ;
if ( ! q - > request_fn )
return - EINVAL ;
ret = queue_var_store ( & nr , page , count ) ;
2008-01-29 14:51:59 +01:00
if ( nr < BLKDEV_MIN_RQ )
nr = BLKDEV_MIN_RQ ;
spin_lock_irq ( q - > queue_lock ) ;
q - > nr_requests = nr ;
blk_queue_congestion_threshold ( q ) ;
2009-04-06 14:48:01 +02:00
if ( rl - > count [ BLK_RW_SYNC ] > = queue_congestion_on_threshold ( q ) )
blk_set_queue_congested ( q , BLK_RW_SYNC ) ;
else if ( rl - > count [ BLK_RW_SYNC ] < queue_congestion_off_threshold ( q ) )
blk_clear_queue_congested ( q , BLK_RW_SYNC ) ;
if ( rl - > count [ BLK_RW_ASYNC ] > = queue_congestion_on_threshold ( q ) )
blk_set_queue_congested ( q , BLK_RW_ASYNC ) ;
else if ( rl - > count [ BLK_RW_ASYNC ] < queue_congestion_off_threshold ( q ) )
blk_clear_queue_congested ( q , BLK_RW_ASYNC ) ;
if ( rl - > count [ BLK_RW_SYNC ] > = q - > nr_requests ) {
blk_set_queue_full ( q , BLK_RW_SYNC ) ;
2011-04-19 13:50:40 +02:00
} else {
2009-04-06 14:48:01 +02:00
blk_clear_queue_full ( q , BLK_RW_SYNC ) ;
wake_up ( & rl - > wait [ BLK_RW_SYNC ] ) ;
2008-01-29 14:51:59 +01:00
}
2009-04-06 14:48:01 +02:00
if ( rl - > count [ BLK_RW_ASYNC ] > = q - > nr_requests ) {
blk_set_queue_full ( q , BLK_RW_ASYNC ) ;
2011-04-19 13:50:40 +02:00
} else {
2009-04-06 14:48:01 +02:00
blk_clear_queue_full ( q , BLK_RW_ASYNC ) ;
wake_up ( & rl - > wait [ BLK_RW_ASYNC ] ) ;
2008-01-29 14:51:59 +01:00
}
spin_unlock_irq ( q - > queue_lock ) ;
return ret ;
}
static ssize_t queue_ra_show ( struct request_queue * q , char * page )
{
2009-07-17 15:26:26 +08:00
unsigned long ra_kb = q - > backing_dev_info . ra_pages < <
( PAGE_CACHE_SHIFT - 10 ) ;
2008-01-29 14:51:59 +01:00
return queue_var_show ( ra_kb , ( page ) ) ;
}
static ssize_t
queue_ra_store ( struct request_queue * q , const char * page , size_t count )
{
unsigned long ra_kb ;
ssize_t ret = queue_var_store ( & ra_kb , page , count ) ;
q - > backing_dev_info . ra_pages = ra_kb > > ( PAGE_CACHE_SHIFT - 10 ) ;
return ret ;
}
static ssize_t queue_max_sectors_show ( struct request_queue * q , char * page )
{
2009-05-22 17:17:50 -04:00
int max_sectors_kb = queue_max_sectors ( q ) > > 1 ;
2008-01-29 14:51:59 +01:00
return queue_var_show ( max_sectors_kb , ( page ) ) ;
}
2010-03-10 00:48:33 -05:00
static ssize_t queue_max_segments_show ( struct request_queue * q , char * page )
{
return queue_var_show ( queue_max_segments ( q ) , ( page ) ) ;
}
2010-09-10 20:50:10 +02:00
static ssize_t queue_max_integrity_segments_show ( struct request_queue * q , char * page )
{
return queue_var_show ( q - > limits . max_integrity_segments , ( page ) ) ;
}
2010-03-10 00:48:33 -05:00
static ssize_t queue_max_segment_size_show ( struct request_queue * q , char * page )
{
2010-12-01 19:41:49 +01:00
if ( blk_queue_cluster ( q ) )
2010-03-10 00:48:33 -05:00
return queue_var_show ( queue_max_segment_size ( q ) , ( page ) ) ;
return queue_var_show ( PAGE_CACHE_SIZE , ( page ) ) ;
}
2009-05-22 17:17:49 -04:00
static ssize_t queue_logical_block_size_show ( struct request_queue * q , char * page )
2008-01-29 19:14:08 +01:00
{
2009-05-22 17:17:49 -04:00
return queue_var_show ( queue_logical_block_size ( q ) , page ) ;
2008-01-29 19:14:08 +01:00
}
2009-05-22 17:17:53 -04:00
static ssize_t queue_physical_block_size_show ( struct request_queue * q , char * page )
{
return queue_var_show ( queue_physical_block_size ( q ) , page ) ;
}
static ssize_t queue_io_min_show ( struct request_queue * q , char * page )
{
return queue_var_show ( queue_io_min ( q ) , page ) ;
}
static ssize_t queue_io_opt_show ( struct request_queue * q , char * page )
{
return queue_var_show ( queue_io_opt ( q ) , page ) ;
2008-01-29 19:14:08 +01:00
}
2009-11-10 11:50:21 +01:00
static ssize_t queue_discard_granularity_show ( struct request_queue * q , char * page )
{
return queue_var_show ( q - > limits . discard_granularity , page ) ;
}
static ssize_t queue_discard_max_show ( struct request_queue * q , char * page )
{
2011-05-18 10:37:35 +02:00
return sprintf ( page , " %llu \n " ,
( unsigned long long ) q - > limits . max_discard_sectors < < 9 ) ;
2009-11-10 11:50:21 +01:00
}
2009-12-03 09:24:48 +01:00
static ssize_t queue_discard_zeroes_data_show ( struct request_queue * q , char * page )
{
return queue_var_show ( queue_discard_zeroes_data ( q ) , page ) ;
}
2008-01-29 14:51:59 +01:00
static ssize_t
queue_max_sectors_store ( struct request_queue * q , const char * page , size_t count )
{
unsigned long max_sectors_kb ,
2009-05-22 17:17:50 -04:00
max_hw_sectors_kb = queue_max_hw_sectors ( q ) > > 1 ,
2008-01-29 14:51:59 +01:00
page_kb = 1 < < ( PAGE_CACHE_SHIFT - 10 ) ;
ssize_t ret = queue_var_store ( & max_sectors_kb , page , count ) ;
if ( max_sectors_kb > max_hw_sectors_kb | | max_sectors_kb < page_kb )
return - EINVAL ;
2008-11-25 09:08:39 +01:00
2008-01-29 14:51:59 +01:00
spin_lock_irq ( q - > queue_lock ) ;
2009-09-01 22:40:15 +02:00
q - > limits . max_sectors = max_sectors_kb < < 1 ;
2008-01-29 14:51:59 +01:00
spin_unlock_irq ( q - > queue_lock ) ;
return ret ;
}
static ssize_t queue_max_hw_sectors_show ( struct request_queue * q , char * page )
{
2009-05-22 17:17:50 -04:00
int max_hw_sectors_kb = queue_max_hw_sectors ( q ) > > 1 ;
2008-01-29 14:51:59 +01:00
return queue_var_show ( max_hw_sectors_kb , ( page ) ) ;
}
2010-08-07 18:13:50 +02:00
# define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
static ssize_t \
queue_show_ # # name ( struct request_queue * q , char * page ) \
{ \
int bit ; \
bit = test_bit ( QUEUE_FLAG_ # # flag , & q - > queue_flags ) ; \
return queue_var_show ( neg ? ! bit : bit , page ) ; \
} \
static ssize_t \
queue_store_ # # name ( struct request_queue * q , const char * page , size_t count ) \
{ \
unsigned long val ; \
ssize_t ret ; \
ret = queue_var_store ( & val , page , count ) ; \
if ( neg ) \
val = ! val ; \
\
spin_lock_irq ( q - > queue_lock ) ; \
if ( val ) \
queue_flag_set ( QUEUE_FLAG_ # # flag , q ) ; \
else \
queue_flag_clear ( QUEUE_FLAG_ # # flag , q ) ; \
spin_unlock_irq ( q - > queue_lock ) ; \
return ret ; \
2009-01-07 12:22:39 +01:00
}
2010-08-07 18:13:50 +02:00
QUEUE_SYSFS_BIT_FNS ( nonrot , NONROT , 1 ) ;
QUEUE_SYSFS_BIT_FNS ( random , ADD_RANDOM , 0 ) ;
QUEUE_SYSFS_BIT_FNS ( iostats , IO_STAT , 0 ) ;
# undef QUEUE_SYSFS_BIT_FNS
2009-01-07 12:22:39 +01:00
2008-04-29 14:44:19 +02:00
static ssize_t queue_nomerges_show ( struct request_queue * q , char * page )
{
2010-01-29 09:04:08 +01:00
return queue_var_show ( ( blk_queue_nomerges ( q ) < < 1 ) |
blk_queue_noxmerges ( q ) , page ) ;
2008-04-29 14:44:19 +02:00
}
static ssize_t queue_nomerges_store ( struct request_queue * q , const char * page ,
size_t count )
{
unsigned long nm ;
ssize_t ret = queue_var_store ( & nm , page , count ) ;
2008-05-07 09:09:39 +02:00
spin_lock_irq ( q - > queue_lock ) ;
2010-01-29 09:04:08 +01:00
queue_flag_clear ( QUEUE_FLAG_NOMERGES , q ) ;
queue_flag_clear ( QUEUE_FLAG_NOXMERGES , q ) ;
if ( nm = = 2 )
2008-05-07 09:09:39 +02:00
queue_flag_set ( QUEUE_FLAG_NOMERGES , q ) ;
2010-01-29 09:04:08 +01:00
else if ( nm )
queue_flag_set ( QUEUE_FLAG_NOXMERGES , q ) ;
2008-05-07 09:09:39 +02:00
spin_unlock_irq ( q - > queue_lock ) ;
2009-01-07 12:22:39 +01:00
2008-04-29 14:44:19 +02:00
return ret ;
}
2008-09-13 20:26:01 +02:00
static ssize_t queue_rq_affinity_show ( struct request_queue * q , char * page )
{
2009-07-17 15:26:26 +08:00
bool set = test_bit ( QUEUE_FLAG_SAME_COMP , & q - > queue_flags ) ;
2011-07-23 20:44:25 +02:00
bool force = test_bit ( QUEUE_FLAG_SAME_FORCE , & q - > queue_flags ) ;
2008-09-13 20:26:01 +02:00
2011-07-23 20:44:25 +02:00
return queue_var_show ( set < < force , page ) ;
2008-09-13 20:26:01 +02:00
}
static ssize_t
queue_rq_affinity_store ( struct request_queue * q , const char * page , size_t count )
{
ssize_t ret = - EINVAL ;
# if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
unsigned long val ;
ret = queue_var_store ( & val , page , count ) ;
spin_lock_irq ( q - > queue_lock ) ;
2011-08-23 21:25:12 +02:00
if ( val = = 2 ) {
2008-09-13 20:26:01 +02:00
queue_flag_set ( QUEUE_FLAG_SAME_COMP , q ) ;
2011-08-23 21:25:12 +02:00
queue_flag_set ( QUEUE_FLAG_SAME_FORCE , q ) ;
} else if ( val = = 1 ) {
queue_flag_set ( QUEUE_FLAG_SAME_COMP , q ) ;
queue_flag_clear ( QUEUE_FLAG_SAME_FORCE , q ) ;
} else if ( val = = 0 ) {
2011-07-23 20:44:25 +02:00
queue_flag_clear ( QUEUE_FLAG_SAME_COMP , q ) ;
queue_flag_clear ( QUEUE_FLAG_SAME_FORCE , q ) ;
}
2008-09-13 20:26:01 +02:00
spin_unlock_irq ( q - > queue_lock ) ;
# endif
return ret ;
}
2008-01-29 14:51:59 +01:00
static struct queue_sysfs_entry queue_requests_entry = {
. attr = { . name = " nr_requests " , . mode = S_IRUGO | S_IWUSR } ,
. show = queue_requests_show ,
. store = queue_requests_store ,
} ;
static struct queue_sysfs_entry queue_ra_entry = {
. attr = { . name = " read_ahead_kb " , . mode = S_IRUGO | S_IWUSR } ,
. show = queue_ra_show ,
. store = queue_ra_store ,
} ;
static struct queue_sysfs_entry queue_max_sectors_entry = {
. attr = { . name = " max_sectors_kb " , . mode = S_IRUGO | S_IWUSR } ,
. show = queue_max_sectors_show ,
. store = queue_max_sectors_store ,
} ;
static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
. attr = { . name = " max_hw_sectors_kb " , . mode = S_IRUGO } ,
. show = queue_max_hw_sectors_show ,
} ;
2010-03-10 00:48:33 -05:00
static struct queue_sysfs_entry queue_max_segments_entry = {
. attr = { . name = " max_segments " , . mode = S_IRUGO } ,
. show = queue_max_segments_show ,
} ;
2010-09-10 20:50:10 +02:00
static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
. attr = { . name = " max_integrity_segments " , . mode = S_IRUGO } ,
. show = queue_max_integrity_segments_show ,
} ;
2010-03-10 00:48:33 -05:00
static struct queue_sysfs_entry queue_max_segment_size_entry = {
. attr = { . name = " max_segment_size " , . mode = S_IRUGO } ,
. show = queue_max_segment_size_show ,
} ;
2008-01-29 14:51:59 +01:00
static struct queue_sysfs_entry queue_iosched_entry = {
. attr = { . name = " scheduler " , . mode = S_IRUGO | S_IWUSR } ,
. show = elv_iosched_show ,
. store = elv_iosched_store ,
} ;
2008-01-29 19:14:08 +01:00
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
. attr = { . name = " hw_sector_size " , . mode = S_IRUGO } ,
2009-05-22 17:17:49 -04:00
. show = queue_logical_block_size_show ,
} ;
static struct queue_sysfs_entry queue_logical_block_size_entry = {
. attr = { . name = " logical_block_size " , . mode = S_IRUGO } ,
. show = queue_logical_block_size_show ,
2008-01-29 19:14:08 +01:00
} ;
2009-05-22 17:17:53 -04:00
static struct queue_sysfs_entry queue_physical_block_size_entry = {
. attr = { . name = " physical_block_size " , . mode = S_IRUGO } ,
. show = queue_physical_block_size_show ,
} ;
static struct queue_sysfs_entry queue_io_min_entry = {
. attr = { . name = " minimum_io_size " , . mode = S_IRUGO } ,
. show = queue_io_min_show ,
} ;
static struct queue_sysfs_entry queue_io_opt_entry = {
. attr = { . name = " optimal_io_size " , . mode = S_IRUGO } ,
. show = queue_io_opt_show ,
2008-01-29 19:14:08 +01:00
} ;
2009-11-10 11:50:21 +01:00
static struct queue_sysfs_entry queue_discard_granularity_entry = {
. attr = { . name = " discard_granularity " , . mode = S_IRUGO } ,
. show = queue_discard_granularity_show ,
} ;
static struct queue_sysfs_entry queue_discard_max_entry = {
. attr = { . name = " discard_max_bytes " , . mode = S_IRUGO } ,
. show = queue_discard_max_show ,
} ;
2009-12-03 09:24:48 +01:00
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
. attr = { . name = " discard_zeroes_data " , . mode = S_IRUGO } ,
. show = queue_discard_zeroes_data_show ,
} ;
2009-01-07 12:22:39 +01:00
static struct queue_sysfs_entry queue_nonrot_entry = {
. attr = { . name = " rotational " , . mode = S_IRUGO | S_IWUSR } ,
2010-08-07 18:13:50 +02:00
. show = queue_show_nonrot ,
. store = queue_store_nonrot ,
2009-01-07 12:22:39 +01:00
} ;
2008-04-29 14:44:19 +02:00
static struct queue_sysfs_entry queue_nomerges_entry = {
. attr = { . name = " nomerges " , . mode = S_IRUGO | S_IWUSR } ,
. show = queue_nomerges_show ,
. store = queue_nomerges_store ,
} ;
2008-09-13 20:26:01 +02:00
static struct queue_sysfs_entry queue_rq_affinity_entry = {
. attr = { . name = " rq_affinity " , . mode = S_IRUGO | S_IWUSR } ,
. show = queue_rq_affinity_show ,
. store = queue_rq_affinity_store ,
} ;
2009-01-23 10:54:44 +01:00
static struct queue_sysfs_entry queue_iostats_entry = {
. attr = { . name = " iostats " , . mode = S_IRUGO | S_IWUSR } ,
2010-08-07 18:13:50 +02:00
. show = queue_show_iostats ,
. store = queue_store_iostats ,
2009-01-23 10:54:44 +01:00
} ;
2010-06-09 10:42:09 +02:00
static struct queue_sysfs_entry queue_random_entry = {
. attr = { . name = " add_random " , . mode = S_IRUGO | S_IWUSR } ,
2010-08-07 18:13:50 +02:00
. show = queue_show_random ,
. store = queue_store_random ,
2010-06-09 10:42:09 +02:00
} ;
2008-01-29 14:51:59 +01:00
static struct attribute * default_attrs [ ] = {
& queue_requests_entry . attr ,
& queue_ra_entry . attr ,
& queue_max_hw_sectors_entry . attr ,
& queue_max_sectors_entry . attr ,
2010-03-10 00:48:33 -05:00
& queue_max_segments_entry . attr ,
2010-09-10 20:50:10 +02:00
& queue_max_integrity_segments_entry . attr ,
2010-03-10 00:48:33 -05:00
& queue_max_segment_size_entry . attr ,
2008-01-29 14:51:59 +01:00
& queue_iosched_entry . attr ,
2008-01-29 19:14:08 +01:00
& queue_hw_sector_size_entry . attr ,
2009-05-22 17:17:49 -04:00
& queue_logical_block_size_entry . attr ,
2009-05-22 17:17:53 -04:00
& queue_physical_block_size_entry . attr ,
& queue_io_min_entry . attr ,
& queue_io_opt_entry . attr ,
2009-11-10 11:50:21 +01:00
& queue_discard_granularity_entry . attr ,
& queue_discard_max_entry . attr ,
2009-12-03 09:24:48 +01:00
& queue_discard_zeroes_data_entry . attr ,
2009-01-07 12:22:39 +01:00
& queue_nonrot_entry . attr ,
2008-04-29 14:44:19 +02:00
& queue_nomerges_entry . attr ,
2008-09-13 20:26:01 +02:00
& queue_rq_affinity_entry . attr ,
2009-01-23 10:54:44 +01:00
& queue_iostats_entry . attr ,
2010-06-09 10:42:09 +02:00
& queue_random_entry . attr ,
2008-01-29 14:51:59 +01:00
NULL ,
} ;
# define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
static ssize_t
queue_attr_show ( struct kobject * kobj , struct attribute * attr , char * page )
{
struct queue_sysfs_entry * entry = to_queue ( attr ) ;
struct request_queue * q =
container_of ( kobj , struct request_queue , kobj ) ;
ssize_t res ;
if ( ! entry - > show )
return - EIO ;
mutex_lock ( & q - > sysfs_lock ) ;
if ( test_bit ( QUEUE_FLAG_DEAD , & q - > queue_flags ) ) {
mutex_unlock ( & q - > sysfs_lock ) ;
return - ENOENT ;
}
res = entry - > show ( q , page ) ;
mutex_unlock ( & q - > sysfs_lock ) ;
return res ;
}
static ssize_t
queue_attr_store ( struct kobject * kobj , struct attribute * attr ,
const char * page , size_t length )
{
struct queue_sysfs_entry * entry = to_queue ( attr ) ;
2008-01-31 13:03:55 +01:00
struct request_queue * q ;
2008-01-29 14:51:59 +01:00
ssize_t res ;
if ( ! entry - > store )
return - EIO ;
2008-01-31 13:03:55 +01:00
q = container_of ( kobj , struct request_queue , kobj ) ;
2008-01-29 14:51:59 +01:00
mutex_lock ( & q - > sysfs_lock ) ;
if ( test_bit ( QUEUE_FLAG_DEAD , & q - > queue_flags ) ) {
mutex_unlock ( & q - > sysfs_lock ) ;
return - ENOENT ;
}
res = entry - > store ( q , page , length ) ;
mutex_unlock ( & q - > sysfs_lock ) ;
return res ;
}
/**
2011-09-21 10:01:22 +02:00
* blk_release_queue : - release a & struct request_queue when it is no longer needed
* @ kobj : the kobj belonging to the request queue to be released
2008-01-29 14:51:59 +01:00
*
* Description :
2011-09-21 10:01:22 +02:00
* blk_release_queue is the pair to blk_init_queue ( ) or
2008-01-29 14:51:59 +01:00
* blk_queue_make_request ( ) . It should be called when a request queue is
* being released ; typically when a block device is being de - registered .
* Currently , its primary task it to free all the & struct request
* structures that were allocated to the queue and the queue itself .
*
* Caveat :
* Hopefully the low level driver will have finished any
* outstanding requests first . . .
* */
static void blk_release_queue ( struct kobject * kobj )
{
struct request_queue * q =
container_of ( kobj , struct request_queue , kobj ) ;
struct request_list * rl = & q - > rq ;
blk_sync_queue ( q ) ;
2011-09-28 08:07:01 -06:00
if ( q - > elevator )
elevator_exit ( q - > elevator ) ;
blk_throtl_exit ( q ) ;
2008-01-29 14:51:59 +01:00
if ( rl - > rq_pool )
mempool_destroy ( rl - > rq_pool ) ;
if ( q - > queue_tags )
__blk_queue_free_tags ( q ) ;
block: fix request_queue lifetime handling by making blk_queue_cleanup() properly shutdown
request_queue is refcounted but actually depdends on lifetime
management from the queue owner - on blk_cleanup_queue(), block layer
expects that there's no request passing through request_queue and no
new one will.
This is fundamentally broken. The queue owner (e.g. SCSI layer)
doesn't have a way to know whether there are other active users before
calling blk_cleanup_queue() and other users (e.g. bsg) don't have any
guarantee that the queue is and would stay valid while it's holding a
reference.
With delay added in blk_queue_bio() before queue_lock is grabbed, the
following oops can be easily triggered when a device is removed with
in-flight IOs.
sd 0:0:1:0: [sdb] Stopping disk
ata1.01: disabled
general protection fault: 0000 [#1] PREEMPT SMP
CPU 2
Modules linked in:
Pid: 648, comm: test_rawio Not tainted 3.1.0-rc3-work+ #56 Bochs Bochs
RIP: 0010:[<ffffffff8137d651>] [<ffffffff8137d651>] elv_rqhash_find+0x61/0x100
...
Process test_rawio (pid: 648, threadinfo ffff880019efa000, task ffff880019ef8a80)
...
Call Trace:
[<ffffffff8137d774>] elv_merge+0x84/0xe0
[<ffffffff81385b54>] blk_queue_bio+0xf4/0x400
[<ffffffff813838ea>] generic_make_request+0xca/0x100
[<ffffffff81383994>] submit_bio+0x74/0x100
[<ffffffff811c53ec>] dio_bio_submit+0xbc/0xc0
[<ffffffff811c610e>] __blockdev_direct_IO+0x92e/0xb40
[<ffffffff811c39f7>] blkdev_direct_IO+0x57/0x60
[<ffffffff8113b1c5>] generic_file_aio_read+0x6d5/0x760
[<ffffffff8118c1ca>] do_sync_read+0xda/0x120
[<ffffffff8118ce55>] vfs_read+0xc5/0x180
[<ffffffff8118cfaa>] sys_pread64+0x9a/0xb0
[<ffffffff81afaf6b>] system_call_fastpath+0x16/0x1b
This happens because blk_queue_cleanup() destroys the queue and
elevator whether IOs are in progress or not and DEAD tests are
sprinkled in the request processing path without proper
synchronization.
Similar problem exists for blk-throtl. On queue cleanup, blk-throtl
is shutdown whether it has requests in it or not. Depending on
timing, it either oopses or throttled bios are lost putting tasks
which are waiting for bio completion into eternal D state.
The way it should work is having the usual clear distinction between
shutdown and release. Shutdown drains all currently pending requests,
marks the queue dead, and performs partial teardown of the now
unnecessary part of the queue. Even after shutdown is complete,
reference holders are still allowed to issue requests to the queue
although they will be immmediately failed. The rest of teardown
happens on release.
This patch makes the following changes to make blk_queue_cleanup()
behave as proper shutdown.
* QUEUE_FLAG_DEAD is now set while holding both q->exit_mutex and
queue_lock.
* Unsynchronized DEAD check in generic_make_request_checks() removed.
This couldn't make any meaningful difference as the queue could die
after the check.
* blk_drain_queue() updated such that it can drain all requests and is
now called during cleanup.
* blk_throtl updated such that it checks DEAD on grabbing queue_lock,
drains all throttled bios during cleanup and free td when queue is
released.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2011-10-19 14:42:16 +02:00
blk_throtl_release ( q ) ;
2008-01-29 14:51:59 +01:00
blk_trace_shutdown ( q ) ;
bdi_destroy ( & q - > backing_dev_info ) ;
kmem_cache_free ( blk_requestq_cachep , q ) ;
}
2010-01-19 02:58:23 +01:00
static const struct sysfs_ops queue_sysfs_ops = {
2008-01-29 14:51:59 +01:00
. show = queue_attr_show ,
. store = queue_attr_store ,
} ;
struct kobj_type blk_queue_ktype = {
. sysfs_ops = & queue_sysfs_ops ,
. default_attrs = default_attrs ,
. release = blk_release_queue ,
} ;
int blk_register_queue ( struct gendisk * disk )
{
int ret ;
2009-04-14 14:00:05 +08:00
struct device * dev = disk_to_dev ( disk ) ;
2008-01-29 14:51:59 +01:00
struct request_queue * q = disk - > queue ;
2008-04-21 09:51:06 +02:00
if ( WARN_ON ( ! q ) )
2008-01-29 14:51:59 +01:00
return - ENXIO ;
2009-04-14 14:00:05 +08:00
ret = blk_trace_init_sysfs ( dev ) ;
if ( ret )
return ret ;
2009-06-11 10:52:27 -07:00
ret = kobject_add ( & q - > kobj , kobject_get ( & dev - > kobj ) , " %s " , " queue " ) ;
2011-04-19 13:47:58 +02:00
if ( ret < 0 ) {
blk_trace_remove_sysfs ( dev ) ;
2008-01-29 14:51:59 +01:00
return ret ;
2011-04-19 13:47:58 +02:00
}
2008-01-29 14:51:59 +01:00
kobject_uevent ( & q - > kobj , KOBJ_ADD ) ;
2009-05-22 17:17:52 -04:00
if ( ! q - > request_fn )
return 0 ;
2008-01-29 14:51:59 +01:00
ret = elv_register_queue ( q ) ;
if ( ret ) {
kobject_uevent ( & q - > kobj , KOBJ_REMOVE ) ;
kobject_del ( & q - > kobj ) ;
2011-04-13 22:14:54 +02:00
blk_trace_remove_sysfs ( dev ) ;
2010-08-23 12:30:29 +02:00
kobject_put ( & dev - > kobj ) ;
2008-01-29 14:51:59 +01:00
return ret ;
}
return 0 ;
}
void blk_unregister_queue ( struct gendisk * disk )
{
struct request_queue * q = disk - > queue ;
2008-04-21 09:51:06 +02:00
if ( WARN_ON ( ! q ) )
return ;
2009-09-25 06:19:26 +02:00
if ( q - > request_fn )
2008-01-29 14:51:59 +01:00
elv_unregister_queue ( q ) ;
2009-09-25 06:19:26 +02:00
kobject_uevent ( & q - > kobj , KOBJ_REMOVE ) ;
kobject_del ( & q - > kobj ) ;
blk_trace_remove_sysfs ( disk_to_dev ( disk ) ) ;
kobject_put ( & disk_to_dev ( disk ) - > kobj ) ;
2008-01-29 14:51:59 +01:00
}