2019-05-31 01:09:24 -07:00
// SPDX-License-Identifier: GPL-2.0-only
2005-04-16 15:20:36 -07:00
/*
* net / sunrpc / cache . c
*
* Generic code for various authentication - related caches
* used by sunrpc clients and servers .
*
* Copyright ( C ) 2002 Neil Brown < neilb @ cse . unsw . edu . au >
*/
# include <linux/types.h>
# include <linux/fs.h>
# include <linux/file.h>
# include <linux/slab.h>
# include <linux/signal.h>
# include <linux/sched.h>
# include <linux/kmod.h>
# include <linux/list.h>
# include <linux/module.h>
# include <linux/ctype.h>
2014-11-28 17:50:28 +02:00
# include <linux/string_helpers.h>
2016-12-24 11:46:01 -08:00
# include <linux/uaccess.h>
2005-04-16 15:20:36 -07:00
# include <linux/poll.h>
# include <linux/seq_file.h>
# include <linux/proc_fs.h>
# include <linux/net.h>
# include <linux/workqueue.h>
2006-03-20 22:33:17 -08:00
# include <linux/mutex.h>
2009-08-09 15:14:28 -04:00
# include <linux/pagemap.h>
2005-04-16 15:20:36 -07:00
# include <asm/ioctls.h>
# include <linux/sunrpc/types.h>
# include <linux/sunrpc/cache.h>
# include <linux/sunrpc/stats.h>
2009-08-09 15:14:30 -04:00
# include <linux/sunrpc/rpc_pipe_fs.h>
2020-03-01 18:21:44 -05:00
# include <trace/events/sunrpc.h>
2010-09-27 14:01:58 +04:00
# include "netns.h"
2005-04-16 15:20:36 -07:00
# define RPCDBG_FACILITY RPCDBG_CACHE
2011-01-02 21:28:34 -05:00
static bool cache_defer_req ( struct cache_req * req , struct cache_head * item ) ;
2005-04-16 15:20:36 -07:00
static void cache_revisit_request ( struct cache_head * item ) ;
2015-10-16 08:59:08 +11:00
static void cache_init ( struct cache_head * h , struct cache_detail * detail )
2005-04-16 15:20:36 -07:00
{
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
time64_t now = seconds_since_boot ( ) ;
2015-07-27 11:10:15 +08:00
INIT_HLIST_NODE ( & h - > cache_list ) ;
2005-04-16 15:20:36 -07:00
h - > flags = 0 ;
2006-03-27 01:15:09 -08:00
kref_init ( & h - > ref ) ;
2005-04-16 15:20:36 -07:00
h - > expiry_time = now + CACHE_NEW_EXPIRY ;
2015-10-16 08:59:08 +11:00
if ( now < = detail - > flush_time )
/* ensure it isn't already expired */
now = detail - > flush_time + 1 ;
2005-04-16 15:20:36 -07:00
h - > last_refresh = now ;
}
2018-11-28 11:45:57 +03:00
static void cache_fresh_unlocked ( struct cache_head * head ,
struct cache_detail * detail ) ;
2018-10-03 12:01:22 -04:00
static struct cache_head * sunrpc_cache_find_rcu ( struct cache_detail * detail ,
struct cache_head * key ,
int hash )
{
struct hlist_head * head = & detail - > hash_table [ hash ] ;
struct cache_head * tmp ;
rcu_read_lock ( ) ;
hlist_for_each_entry_rcu ( tmp , head , cache_list ) {
2020-03-01 18:21:43 -05:00
if ( ! detail - > match ( tmp , key ) )
continue ;
if ( test_bit ( CACHE_VALID , & tmp - > flags ) & &
cache_is_expired ( detail , tmp ) )
continue ;
tmp = cache_get_rcu ( tmp ) ;
rcu_read_unlock ( ) ;
return tmp ;
2018-10-03 12:01:22 -04:00
}
rcu_read_unlock ( ) ;
return NULL ;
}
2020-01-06 13:40:35 -05:00
static void sunrpc_begin_cache_remove_entry ( struct cache_head * ch ,
struct cache_detail * cd )
{
/* Must be called under cd->hash_lock */
hlist_del_init_rcu ( & ch - > cache_list ) ;
set_bit ( CACHE_CLEANED , & ch - > flags ) ;
cd - > entries - - ;
}
static void sunrpc_end_cache_remove_entry ( struct cache_head * ch ,
struct cache_detail * cd )
{
cache_fresh_unlocked ( ch , cd ) ;
cache_put ( ch , cd ) ;
}
2018-10-01 10:41:45 -04:00
static struct cache_head * sunrpc_cache_add_entry ( struct cache_detail * detail ,
struct cache_head * key ,
int hash )
{
struct cache_head * new , * tmp , * freeme = NULL ;
struct hlist_head * head = & detail - > hash_table [ hash ] ;
2006-03-27 01:15:02 -08:00
new = detail - > alloc ( ) ;
if ( ! new )
return NULL ;
2006-08-05 12:14:29 -07:00
/* must fully initialise 'new', else
* we might get lose if we need to
* cache_put it soon .
*/
2015-10-16 08:59:08 +11:00
cache_init ( new , detail ) ;
2006-08-05 12:14:29 -07:00
detail - > init ( new , key ) ;
2006-03-27 01:15:02 -08:00
2018-10-01 10:41:52 -04:00
spin_lock ( & detail - > hash_lock ) ;
2006-03-27 01:15:02 -08:00
/* check if entry appeared while we slept */
2020-02-19 15:05:05 +05:30
hlist_for_each_entry_rcu ( tmp , head , cache_list ,
lockdep_is_held ( & detail - > hash_lock ) ) {
2020-03-01 18:21:43 -05:00
if ( ! detail - > match ( tmp , key ) )
continue ;
if ( test_bit ( CACHE_VALID , & tmp - > flags ) & &
cache_is_expired ( detail , tmp ) ) {
sunrpc_begin_cache_remove_entry ( tmp , detail ) ;
2020-03-01 18:21:44 -05:00
trace_cache_entry_expired ( detail , tmp ) ;
2020-03-01 18:21:43 -05:00
freeme = tmp ;
break ;
2006-03-27 01:15:02 -08:00
}
2020-03-01 18:21:43 -05:00
cache_get ( tmp ) ;
spin_unlock ( & detail - > hash_lock ) ;
cache_put ( new , detail ) ;
return tmp ;
2006-03-27 01:15:02 -08:00
}
2015-07-27 11:10:15 +08:00
2018-10-03 12:01:22 -04:00
hlist_add_head_rcu ( & new - > cache_list , head ) ;
2006-03-27 01:15:02 -08:00
detail - > entries + + ;
cache_get ( new ) ;
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2006-03-27 01:15:02 -08:00
2020-01-06 13:40:35 -05:00
if ( freeme )
sunrpc_end_cache_remove_entry ( freeme , detail ) ;
2006-03-27 01:15:02 -08:00
return new ;
}
2018-10-03 12:01:22 -04:00
struct cache_head * sunrpc_cache_lookup_rcu ( struct cache_detail * detail ,
struct cache_head * key , int hash )
{
struct cache_head * ret ;
ret = sunrpc_cache_find_rcu ( detail , key , hash ) ;
if ( ret )
return ret ;
/* Didn't find anything, insert an empty entry */
return sunrpc_cache_add_entry ( detail , key , hash ) ;
}
EXPORT_SYMBOL_GPL ( sunrpc_cache_lookup_rcu ) ;
2009-08-04 15:22:38 +10:00
static void cache_dequeue ( struct cache_detail * detail , struct cache_head * ch ) ;
2006-03-27 01:15:08 -08:00
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
static void cache_fresh_locked ( struct cache_head * head , time64_t expiry ,
2015-10-16 08:59:08 +11:00
struct cache_detail * detail )
2006-03-27 01:15:08 -08:00
{
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
time64_t now = seconds_since_boot ( ) ;
2015-10-16 08:59:08 +11:00
if ( now < = detail - > flush_time )
/* ensure it isn't immediately treated as expired */
now = detail - > flush_time + 1 ;
2006-03-27 01:15:08 -08:00
head - > expiry_time = expiry ;
2015-10-16 08:59:08 +11:00
head - > last_refresh = now ;
2011-01-04 14:12:47 -05:00
smp_wmb ( ) ; /* paired with smp_rmb() in cache_is_valid() */
2009-09-09 16:32:54 +10:00
set_bit ( CACHE_VALID , & head - > flags ) ;
2006-03-27 01:15:08 -08:00
}
static void cache_fresh_unlocked ( struct cache_head * head ,
2009-09-09 16:32:54 +10:00
struct cache_detail * detail )
2006-03-27 01:15:08 -08:00
{
if ( test_and_clear_bit ( CACHE_PENDING , & head - > flags ) ) {
cache_revisit_request ( head ) ;
2009-08-04 15:22:38 +10:00
cache_dequeue ( detail , head ) ;
2006-03-27 01:15:08 -08:00
}
}
2020-03-01 18:21:44 -05:00
static void cache_make_negative ( struct cache_detail * detail ,
struct cache_head * h )
{
set_bit ( CACHE_NEGATIVE , & h - > flags ) ;
trace_cache_entry_make_negative ( detail , h ) ;
}
static void cache_entry_update ( struct cache_detail * detail ,
struct cache_head * h ,
struct cache_head * new )
{
if ( ! test_bit ( CACHE_NEGATIVE , & new - > flags ) ) {
detail - > update ( h , new ) ;
trace_cache_entry_update ( detail , h ) ;
} else {
cache_make_negative ( detail , h ) ;
}
}
2006-03-27 01:15:02 -08:00
struct cache_head * sunrpc_cache_update ( struct cache_detail * detail ,
struct cache_head * new , struct cache_head * old , int hash )
{
/* The 'old' entry is to be replaced by 'new'.
* If ' old ' is not VALID , we update it directly ,
* otherwise we need to replace it
*/
struct cache_head * tmp ;
if ( ! test_bit ( CACHE_VALID , & old - > flags ) ) {
2018-10-01 10:41:52 -04:00
spin_lock ( & detail - > hash_lock ) ;
2006-03-27 01:15:02 -08:00
if ( ! test_bit ( CACHE_VALID , & old - > flags ) ) {
2020-03-01 18:21:44 -05:00
cache_entry_update ( detail , old , new ) ;
2015-10-16 08:59:08 +11:00
cache_fresh_locked ( old , new - > expiry_time , detail ) ;
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2009-09-09 16:32:54 +10:00
cache_fresh_unlocked ( old , detail ) ;
2006-03-27 01:15:02 -08:00
return old ;
}
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2006-03-27 01:15:02 -08:00
}
/* We need to insert a new entry */
tmp = detail - > alloc ( ) ;
if ( ! tmp ) {
2006-03-27 01:15:09 -08:00
cache_put ( old , detail ) ;
2006-03-27 01:15:02 -08:00
return NULL ;
}
2015-10-16 08:59:08 +11:00
cache_init ( tmp , detail ) ;
2006-03-27 01:15:02 -08:00
detail - > init ( tmp , old ) ;
2018-10-01 10:41:52 -04:00
spin_lock ( & detail - > hash_lock ) ;
2020-03-01 18:21:44 -05:00
cache_entry_update ( detail , tmp , new ) ;
2015-07-27 11:10:15 +08:00
hlist_add_head ( & tmp - > cache_list , & detail - > hash_table [ hash ] ) ;
2006-05-22 22:35:25 -07:00
detail - > entries + + ;
2006-03-27 01:15:02 -08:00
cache_get ( tmp ) ;
2015-10-16 08:59:08 +11:00
cache_fresh_locked ( tmp , new - > expiry_time , detail ) ;
cache_fresh_locked ( old , 0 , detail ) ;
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2009-09-09 16:32:54 +10:00
cache_fresh_unlocked ( tmp , detail ) ;
cache_fresh_unlocked ( old , detail ) ;
2006-03-27 01:15:09 -08:00
cache_put ( old , detail ) ;
2006-03-27 01:15:02 -08:00
return tmp ;
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( sunrpc_cache_update ) ;
2005-04-16 15:20:36 -07:00
2013-03-28 22:19:45 +08:00
static inline int cache_is_valid ( struct cache_head * h )
2009-08-04 15:22:38 +10:00
{
2010-02-03 17:31:31 +11:00
if ( ! test_bit ( CACHE_VALID , & h - > flags ) )
2009-08-04 15:22:38 +10:00
return - EAGAIN ;
else {
/* entry is valid */
if ( test_bit ( CACHE_NEGATIVE , & h - > flags ) )
return - ENOENT ;
2011-01-04 14:12:47 -05:00
else {
/*
* In combination with write barrier in
* sunrpc_cache_update , ensures that anyone
* using the cache entry after this sees the
* updated contents :
*/
smp_rmb ( ) ;
2009-08-04 15:22:38 +10:00
return 0 ;
2011-01-04 14:12:47 -05:00
}
2009-08-04 15:22:38 +10:00
}
}
2009-08-21 11:27:29 -04:00
2011-01-03 15:10:27 -05:00
static int try_to_negate_entry ( struct cache_detail * detail , struct cache_head * h )
{
int rv ;
2018-10-01 10:41:52 -04:00
spin_lock ( & detail - > hash_lock ) ;
2013-03-28 22:19:45 +08:00
rv = cache_is_valid ( h ) ;
sunrpc/cache: use cache_fresh_unlocked consistently and correctly.
cache_fresh_unlocked() is called when a cache entry
has been updated and ensures that if there were any
pending upcalls, they are cleared.
So every time we update a cache entry, we should call this,
and this should be the only way that we try to clear
pending calls (that sort of uniformity makes code sooo much
easier to read).
try_to_negate_entry() will (possibly) mark an entry as
negative. If it doesn't, it is because the entry already
is VALID.
So the entry will be valid on exit, so it is appropriate to
call cache_fresh_unlocked().
So tidy up try_to_negate_entry() to do that, and remove
partial open-coded cache_fresh_unlocked() from the one
call-site of try_to_negate_entry().
In the other branch of the 'switch(cache_make_upcall())',
we again have a partial open-coded version of cache_fresh_unlocked().
Replace that with a real call.
And again in cache_clean(), use a real call to cache_fresh_unlocked().
These call sites might previously have called
cache_revisit_request() if CACHE_PENDING wasn't set.
This is never necessary because cache_revisit_request() can
only do anything if the item is in the cache_defer_hash,
However any time that an item is added to the cache_defer_hash
(setup_deferral), the code immediately tests CACHE_PENDING,
and removes the entry again if it is clear. So all other
places we only need to 'cache_revisit_request' if we've
just cleared CACHE_PENDING.
Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2013-06-13 12:53:42 +10:00
if ( rv = = - EAGAIN ) {
2020-03-01 18:21:44 -05:00
cache_make_negative ( detail , h ) ;
2015-10-16 08:59:08 +11:00
cache_fresh_locked ( h , seconds_since_boot ( ) + CACHE_NEW_EXPIRY ,
detail ) ;
sunrpc/cache: use cache_fresh_unlocked consistently and correctly.
cache_fresh_unlocked() is called when a cache entry
has been updated and ensures that if there were any
pending upcalls, they are cleared.
So every time we update a cache entry, we should call this,
and this should be the only way that we try to clear
pending calls (that sort of uniformity makes code sooo much
easier to read).
try_to_negate_entry() will (possibly) mark an entry as
negative. If it doesn't, it is because the entry already
is VALID.
So the entry will be valid on exit, so it is appropriate to
call cache_fresh_unlocked().
So tidy up try_to_negate_entry() to do that, and remove
partial open-coded cache_fresh_unlocked() from the one
call-site of try_to_negate_entry().
In the other branch of the 'switch(cache_make_upcall())',
we again have a partial open-coded version of cache_fresh_unlocked().
Replace that with a real call.
And again in cache_clean(), use a real call to cache_fresh_unlocked().
These call sites might previously have called
cache_revisit_request() if CACHE_PENDING wasn't set.
This is never necessary because cache_revisit_request() can
only do anything if the item is in the cache_defer_hash,
However any time that an item is added to the cache_defer_hash
(setup_deferral), the code immediately tests CACHE_PENDING,
and removes the entry again if it is clear. So all other
places we only need to 'cache_revisit_request' if we've
just cleared CACHE_PENDING.
Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2013-06-13 12:53:42 +10:00
rv = - ENOENT ;
2011-01-03 15:10:27 -05:00
}
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2011-01-03 15:10:27 -05:00
cache_fresh_unlocked ( h , detail ) ;
sunrpc/cache: use cache_fresh_unlocked consistently and correctly.
cache_fresh_unlocked() is called when a cache entry
has been updated and ensures that if there were any
pending upcalls, they are cleared.
So every time we update a cache entry, we should call this,
and this should be the only way that we try to clear
pending calls (that sort of uniformity makes code sooo much
easier to read).
try_to_negate_entry() will (possibly) mark an entry as
negative. If it doesn't, it is because the entry already
is VALID.
So the entry will be valid on exit, so it is appropriate to
call cache_fresh_unlocked().
So tidy up try_to_negate_entry() to do that, and remove
partial open-coded cache_fresh_unlocked() from the one
call-site of try_to_negate_entry().
In the other branch of the 'switch(cache_make_upcall())',
we again have a partial open-coded version of cache_fresh_unlocked().
Replace that with a real call.
And again in cache_clean(), use a real call to cache_fresh_unlocked().
These call sites might previously have called
cache_revisit_request() if CACHE_PENDING wasn't set.
This is never necessary because cache_revisit_request() can
only do anything if the item is in the cache_defer_hash,
However any time that an item is added to the cache_defer_hash
(setup_deferral), the code immediately tests CACHE_PENDING,
and removes the entry again if it is clear. So all other
places we only need to 'cache_revisit_request' if we've
just cleared CACHE_PENDING.
Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2013-06-13 12:53:42 +10:00
return rv ;
2011-01-03 15:10:27 -05:00
}
2005-04-16 15:20:36 -07:00
/*
* This is the generic cache management routine for all
* the authentication caches .
* It checks the currency of a cache item and will ( later )
* initiate an upcall to fill it if needed .
*
*
* Returns 0 if the cache_head can be used , or cache_puts it and returns
2009-08-04 15:22:38 +10:00
* - EAGAIN if upcall is pending and request has been queued
* - ETIMEDOUT if upcall failed or request could not be queue or
* upcall completed but item is still invalid ( implying that
* the cache item has been replaced with a newer one ) .
2005-04-16 15:20:36 -07:00
* - ENOENT if cache entry was negative
*/
int cache_check ( struct cache_detail * detail ,
struct cache_head * h , struct cache_req * rqstp )
{
int rv ;
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
time64_t refresh_age , age ;
2005-04-16 15:20:36 -07:00
/* First decide return status as best we can */
2013-03-28 22:19:45 +08:00
rv = cache_is_valid ( h ) ;
2005-04-16 15:20:36 -07:00
/* now see if we want to start an upcall */
refresh_age = ( h - > expiry_time - h - > last_refresh ) ;
2010-08-12 16:55:22 +10:00
age = seconds_since_boot ( ) - h - > last_refresh ;
2005-04-16 15:20:36 -07:00
if ( rqstp = = NULL ) {
if ( rv = = - EAGAIN )
rv = - ENOENT ;
2013-06-13 12:53:42 +10:00
} else if ( rv = = - EAGAIN | |
( h - > expiry_time ! = 0 & & age > refresh_age / 2 ) ) {
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
dprintk ( " RPC: Want update, refage=%lld, age=%lld \n " ,
2007-01-31 12:14:08 -05:00
refresh_age , age ) ;
2020-03-01 18:21:42 -05:00
switch ( detail - > cache_upcall ( detail , h ) ) {
case - EINVAL :
sunrpc/cache: handle missing listeners better.
If no handler (such as rpc.mountd) has opened
a cache 'channel', the sunrpc cache responds to
all lookup requests with -ENOENT. This is particularly
important for the auth.unix.gid cache which is
optional.
If the channel was open briefly and an upcall was written to it,
this upcall remains pending even when the handler closes the
channel. When an upcall is pending, the code currently
doesn't check if there are still listeners, it only performs
that check before sending an upcall.
As the cache treads a recently closes channel (closed less than
30 seconds ago) as "potentially still open", there is a
reasonable sized window when a request can become pending
in a closed channel, and thereby block lookups indefinitely.
This can easily be demonstrated by running
cat /proc/net/rpc/auth.unix.gid/channel
and then trying to mount an NFS filesystem from this host. It
will block indefinitely (unless mountd is run with --manage-gids,
or krb5 is used).
When cache_check() finds that an upcall is pending, it should
perform the "cache_listeners_exist()" exist test. If no
listeners do exist, the request should be negated.
With this change in place, there can still be a 30second wait on
mount, until the cache gives up waiting for a handler to come
back, but this is much better than an indefinite wait.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2019-03-22 13:16:56 +11:00
rv = try_to_negate_entry ( detail , h ) ;
2020-03-01 18:21:42 -05:00
break ;
case - EAGAIN :
cache_fresh_unlocked ( h , detail ) ;
break ;
}
2005-04-16 15:20:36 -07:00
}
2009-08-04 15:22:38 +10:00
if ( rv = = - EAGAIN ) {
2011-01-02 21:28:34 -05:00
if ( ! cache_defer_req ( rqstp , h ) ) {
/*
* Request was not deferred ; handle it as best
* we can ourselves :
*/
2013-03-28 22:19:45 +08:00
rv = cache_is_valid ( h ) ;
2009-08-04 15:22:38 +10:00
if ( rv = = - EAGAIN )
rv = - ETIMEDOUT ;
}
}
2006-03-27 01:15:07 -08:00
if ( rv )
2006-03-27 01:15:09 -08:00
cache_put ( h , detail ) ;
2005-04-16 15:20:36 -07:00
return rv ;
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( cache_check ) ;
2005-04-16 15:20:36 -07:00
/*
* caches need to be periodically cleaned .
* For this we maintain a list of cache_detail and
* a current pointer into that list and into the table
* for that entry .
*
2013-06-13 12:53:42 +10:00
* Each time cache_clean is called it finds the next non - empty entry
2005-04-16 15:20:36 -07:00
* in the current table and walks the list in that entry
* looking for entries that can be removed .
*
* An entry gets removed if :
* - The expiry is before current time
* - The last_refresh time is before the flush_time for that cache
*
* later we might drop old entries with non - NEVER expiry if that table
* is getting ' full ' for some definition of ' full '
*
* The question of " how often to scan a table " is an interesting one
* and is answered in part by the use of the " nextcheck " field in the
* cache_detail .
* When a scan of a table begins , the nextcheck field is set to a time
* that is well into the future .
* While scanning , if an expiry time is found that is earlier than the
* current nextcheck time , nextcheck is set to that expiry time .
* If the flush_time is ever set to a time earlier than the nextcheck
* time , the nextcheck time is then set to that flush_time .
*
* A table is then only scanned if the current time is at least
* the nextcheck time .
2007-02-09 15:38:13 -08:00
*
2005-04-16 15:20:36 -07:00
*/
static LIST_HEAD ( cache_list ) ;
static DEFINE_SPINLOCK ( cache_list_lock ) ;
static struct cache_detail * current_detail ;
static int current_index ;
2006-11-22 14:55:48 +00:00
static void do_cache_clean ( struct work_struct * work ) ;
sunrpc: make the cache cleaner workqueue deferrable
This patch makes the cache_cleaner workqueue deferrable, to prevent
unnecessary system wake-ups, which is very important for embedded
battery-powered devices.
do_cache_clean() is called every 30 seconds at the moment, and often
makes the system wake up from its power-save sleep state. With this
change, when the workqueue uses a deferrable timer, the
do_cache_clean() invocation will be delayed and combined with the
closest "real" wake-up. This improves the power consumption situation.
Note, I tried to create a DECLARE_DELAYED_WORK_DEFERRABLE() helper
macro, similar to DECLARE_DELAYED_WORK(), but failed because of the
way the timer wheel core stores the deferrable flag (it is the
LSBit in the time->base pointer). My attempt to define a static
variable with this bit set ended up with the "initializer element is
not constant" error.
Thus, I have to use run-time initialization, so I created a new
cache_initialize() function which is called once when sunrpc is
being initialized.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2010-07-01 18:05:56 +03:00
static struct delayed_work cache_cleaner ;
2005-04-16 15:20:36 -07:00
2011-11-25 17:12:40 +03:00
void sunrpc_init_cache_detail ( struct cache_detail * cd )
2007-11-12 17:04:29 -05:00
{
2018-10-01 10:41:52 -04:00
spin_lock_init ( & cd - > hash_lock ) ;
2005-04-16 15:20:36 -07:00
INIT_LIST_HEAD ( & cd - > queue ) ;
spin_lock ( & cache_list_lock ) ;
cd - > nextcheck = 0 ;
cd - > entries = 0 ;
2019-07-26 18:33:01 -04:00
atomic_set ( & cd - > writers , 0 ) ;
2005-04-16 15:20:36 -07:00
cd - > last_close = 0 ;
cd - > last_warn = - 1 ;
list_add ( & cd - > others , & cache_list ) ;
spin_unlock ( & cache_list_lock ) ;
/* start the cleaning process */
2016-09-01 15:30:26 +08:00
queue_delayed_work ( system_power_efficient_wq , & cache_cleaner , 0 ) ;
2005-04-16 15:20:36 -07:00
}
2011-11-25 17:12:40 +03:00
EXPORT_SYMBOL_GPL ( sunrpc_init_cache_detail ) ;
2005-04-16 15:20:36 -07:00
2011-11-25 17:12:40 +03:00
void sunrpc_destroy_cache_detail ( struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
cache_purge ( cd ) ;
spin_lock ( & cache_list_lock ) ;
2018-10-01 10:41:52 -04:00
spin_lock ( & cd - > hash_lock ) ;
2005-04-16 15:20:36 -07:00
if ( current_detail = = cd )
current_detail = NULL ;
list_del_init ( & cd - > others ) ;
2018-10-01 10:41:52 -04:00
spin_unlock ( & cd - > hash_lock ) ;
2005-04-16 15:20:36 -07:00
spin_unlock ( & cache_list_lock ) ;
if ( list_empty ( & cache_list ) ) {
/* module must be being unloaded so its safe to kill the worker */
2007-08-07 15:33:01 -04:00
cancel_delayed_work_sync ( & cache_cleaner ) ;
2005-04-16 15:20:36 -07:00
}
}
2011-11-25 17:12:40 +03:00
EXPORT_SYMBOL_GPL ( sunrpc_destroy_cache_detail ) ;
2005-04-16 15:20:36 -07:00
/* clean cache tries to find something to clean
* and cleans it .
* It returns 1 if it cleaned something ,
* 0 if it didn ' t find anything this time
* - 1 if it fell off the end of the list .
*/
static int cache_clean ( void )
{
int rv = 0 ;
struct list_head * next ;
spin_lock ( & cache_list_lock ) ;
/* find a suitable table if we don't already have one */
while ( current_detail = = NULL | |
current_index > = current_detail - > hash_size ) {
if ( current_detail )
next = current_detail - > others . next ;
else
next = cache_list . next ;
if ( next = = & cache_list ) {
current_detail = NULL ;
spin_unlock ( & cache_list_lock ) ;
return - 1 ;
}
current_detail = list_entry ( next , struct cache_detail , others ) ;
2010-08-12 16:55:22 +10:00
if ( current_detail - > nextcheck > seconds_since_boot ( ) )
2005-04-16 15:20:36 -07:00
current_index = current_detail - > hash_size ;
else {
current_index = 0 ;
2010-08-12 16:55:22 +10:00
current_detail - > nextcheck = seconds_since_boot ( ) + 30 * 60 ;
2005-04-16 15:20:36 -07:00
}
}
/* find a non-empty bucket in the table */
while ( current_detail & &
current_index < current_detail - > hash_size & &
2015-07-27 11:10:15 +08:00
hlist_empty ( & current_detail - > hash_table [ current_index ] ) )
2005-04-16 15:20:36 -07:00
current_index + + ;
/* find a cleanable entry in the bucket and clean it, or set to next bucket */
2007-02-09 15:38:13 -08:00
2005-04-16 15:20:36 -07:00
if ( current_detail & & current_index < current_detail - > hash_size ) {
2015-07-27 11:10:15 +08:00
struct cache_head * ch = NULL ;
2005-04-16 15:20:36 -07:00
struct cache_detail * d ;
2015-07-27 11:10:15 +08:00
struct hlist_head * head ;
struct hlist_node * tmp ;
2007-02-09 15:38:13 -08:00
2018-10-01 10:41:52 -04:00
spin_lock ( & current_detail - > hash_lock ) ;
2005-04-16 15:20:36 -07:00
/* Ok, now to clean this strand */
2007-02-09 15:38:13 -08:00
2015-07-27 11:10:15 +08:00
head = & current_detail - > hash_table [ current_index ] ;
hlist_for_each_entry_safe ( ch , tmp , head , cache_list ) {
2005-04-16 15:20:36 -07:00
if ( current_detail - > nextcheck > ch - > expiry_time )
current_detail - > nextcheck = ch - > expiry_time + 1 ;
2010-02-03 17:31:31 +11:00
if ( ! cache_is_expired ( current_detail , ch ) )
2005-04-16 15:20:36 -07:00
continue ;
2020-01-06 13:40:35 -05:00
sunrpc_begin_cache_remove_entry ( ch , current_detail ) ;
2020-03-01 18:21:44 -05:00
trace_cache_entry_expired ( current_detail , ch ) ;
2005-04-16 15:20:36 -07:00
rv = 1 ;
2010-02-03 17:31:31 +11:00
break ;
2005-04-16 15:20:36 -07:00
}
2010-02-03 17:31:31 +11:00
2018-10-01 10:41:52 -04:00
spin_unlock ( & current_detail - > hash_lock ) ;
2005-04-16 15:20:36 -07:00
d = current_detail ;
if ( ! ch )
current_index + + ;
spin_unlock ( & cache_list_lock ) ;
2020-01-06 13:40:35 -05:00
if ( ch )
sunrpc_end_cache_remove_entry ( ch , d ) ;
2005-04-16 15:20:36 -07:00
} else
spin_unlock ( & cache_list_lock ) ;
return rv ;
}
/*
* We want to regularly clean the cache , so we need to schedule some work . . .
*/
2006-11-22 14:55:48 +00:00
static void do_cache_clean ( struct work_struct * work )
2005-04-16 15:20:36 -07:00
{
int delay = 5 ;
if ( cache_clean ( ) = = - 1 )
2009-06-10 12:52:21 -07:00
delay = round_jiffies_relative ( 30 * HZ ) ;
2005-04-16 15:20:36 -07:00
if ( list_empty ( & cache_list ) )
delay = 0 ;
if ( delay )
2016-09-01 15:30:26 +08:00
queue_delayed_work ( system_power_efficient_wq ,
& cache_cleaner , delay ) ;
2005-04-16 15:20:36 -07:00
}
2007-02-09 15:38:13 -08:00
/*
2005-04-16 15:20:36 -07:00
* Clean all caches promptly . This just calls cache_clean
2007-02-09 15:38:13 -08:00
* repeatedly until we are sure that every cache has had a chance to
2005-04-16 15:20:36 -07:00
* be fully cleaned
*/
void cache_flush ( void )
{
while ( cache_clean ( ) ! = - 1 )
cond_resched ( ) ;
while ( cache_clean ( ) ! = - 1 )
cond_resched ( ) ;
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( cache_flush ) ;
2005-04-16 15:20:36 -07:00
void cache_purge ( struct cache_detail * detail )
{
2017-02-08 09:54:33 +08:00
struct cache_head * ch = NULL ;
struct hlist_head * head = NULL ;
int i = 0 ;
2018-10-01 10:41:52 -04:00
spin_lock ( & detail - > hash_lock ) ;
2017-02-08 09:54:33 +08:00
if ( ! detail - > entries ) {
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2017-02-08 09:54:33 +08:00
return ;
}
dprintk ( " RPC: %d entries in %s cache \n " , detail - > entries , detail - > name ) ;
for ( i = 0 ; i < detail - > hash_size ; i + + ) {
head = & detail - > hash_table [ i ] ;
2020-04-06 01:57:22 +08:00
while ( ! hlist_empty ( head ) ) {
ch = hlist_entry ( head - > first , struct cache_head ,
cache_list ) ;
2020-01-06 13:40:35 -05:00
sunrpc_begin_cache_remove_entry ( ch , detail ) ;
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2020-01-06 13:40:35 -05:00
sunrpc_end_cache_remove_entry ( ch , detail ) ;
2018-10-01 10:41:52 -04:00
spin_lock ( & detail - > hash_lock ) ;
2017-02-08 09:54:33 +08:00
}
}
2018-10-01 10:41:52 -04:00
spin_unlock ( & detail - > hash_lock ) ;
2005-04-16 15:20:36 -07:00
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( cache_purge ) ;
2005-04-16 15:20:36 -07:00
/*
* Deferral and Revisiting of Requests .
*
* If a cache lookup finds a pending entry , we
* need to defer the request and revisit it later .
* All deferred requests are stored in a hash table ,
* indexed by " struct cache_head * " .
* As it may be wasteful to store a whole request
2007-02-09 15:38:13 -08:00
* structure , we allow the request to provide a
2005-04-16 15:20:36 -07:00
* deferred form , which must contain a
* ' struct cache_deferred_req '
* This cache_deferred_req contains a method to allow
* it to be revisited when cache info is available
*/
# define DFR_HASHSIZE (PAGE_SIZE / sizeof(struct list_head))
# define DFR_HASH(item) ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
# define DFR_MAX 300 /* ??? */
static DEFINE_SPINLOCK ( cache_defer_lock ) ;
static LIST_HEAD ( cache_defer_list ) ;
2010-08-12 17:04:08 +10:00
static struct hlist_head cache_defer_hash [ DFR_HASHSIZE ] ;
2005-04-16 15:20:36 -07:00
static int cache_defer_cnt ;
2010-08-26 13:19:52 -04:00
static void __unhash_deferred_req ( struct cache_deferred_req * dreq )
{
2010-08-12 17:04:08 +10:00
hlist_del_init ( & dreq - > hash ) ;
2010-10-07 15:29:46 +11:00
if ( ! list_empty ( & dreq - > recent ) ) {
list_del_init ( & dreq - > recent ) ;
cache_defer_cnt - - ;
}
2010-08-26 13:19:52 -04:00
}
static void __hash_deferred_req ( struct cache_deferred_req * dreq , struct cache_head * item )
2005-04-16 15:20:36 -07:00
{
int hash = DFR_HASH ( item ) ;
2010-10-07 15:29:46 +11:00
INIT_LIST_HEAD ( & dreq - > recent ) ;
2010-08-12 17:04:08 +10:00
hlist_add_head ( & dreq - > hash , & cache_defer_hash [ hash ] ) ;
2010-08-26 13:19:52 -04:00
}
2010-10-07 15:29:46 +11:00
static void setup_deferral ( struct cache_deferred_req * dreq ,
struct cache_head * item ,
int count_me )
2005-04-16 15:20:36 -07:00
{
dreq - > item = item ;
spin_lock ( & cache_defer_lock ) ;
2010-08-26 13:19:52 -04:00
__hash_deferred_req ( dreq , item ) ;
2005-04-16 15:20:36 -07:00
2010-10-07 15:29:46 +11:00
if ( count_me ) {
cache_defer_cnt + + ;
list_add ( & dreq - > recent , & cache_defer_list ) ;
2005-04-16 15:20:36 -07:00
}
2010-10-07 15:29:46 +11:00
2005-04-16 15:20:36 -07:00
spin_unlock ( & cache_defer_lock ) ;
2010-08-26 16:56:23 -04:00
}
sunrpc/cache: allow threads to block while waiting for cache update.
The current practice of waiting for cache updates by queueing the
whole request to be retried has (at least) two problems.
1/ With NFSv4, requests can be quite complex and re-trying a whole
request when a later part fails should only be a last-resort, not a
normal practice.
2/ Large requests, and in particular any 'write' request, will not be
queued by the current code and doing so would be undesirable.
In many cases only a very sort wait is needed before the cache gets
valid data.
So, providing the underlying transport permits it by setting
->thread_wait,
arrange to wait briefly for an upcall to be completed (as reflected in
the clearing of CACHE_PENDING).
If the short wait was not long enough and CACHE_PENDING is still set,
fall back on the old approach.
The 'thread_wait' value is set to 5 seconds when there are spare
threads, and 1 second when there are no spare threads.
These values are probably much higher than needed, but will ensure
some forward progress.
Note that as we only request an update for a non-valid item, and as
non-valid items are updated in place it is extremely unlikely that
cache_check will return -ETIMEDOUT. Normally cache_defer_req will
sleep for a short while and then find that the item is_valid.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2010-08-12 17:04:06 +10:00
2010-08-26 16:56:23 -04:00
struct thread_deferred_req {
struct cache_deferred_req handle ;
struct completion completion ;
} ;
static void cache_restart_thread ( struct cache_deferred_req * dreq , int too_many )
{
struct thread_deferred_req * dr =
container_of ( dreq , struct thread_deferred_req , handle ) ;
complete ( & dr - > completion ) ;
}
2010-10-07 15:29:46 +11:00
static void cache_wait_req ( struct cache_req * req , struct cache_head * item )
2010-08-26 16:56:23 -04:00
{
struct thread_deferred_req sleeper ;
struct cache_deferred_req * dreq = & sleeper . handle ;
sleeper . completion = COMPLETION_INITIALIZER_ONSTACK ( sleeper . completion ) ;
dreq - > revisit = cache_restart_thread ;
2010-10-07 15:29:46 +11:00
setup_deferral ( dreq , item , 0 ) ;
2010-08-26 16:56:23 -04:00
2010-10-07 15:29:46 +11:00
if ( ! test_bit ( CACHE_PENDING , & item - > flags ) | |
2010-09-22 12:55:06 +10:00
wait_for_completion_interruptible_timeout (
2010-08-26 16:56:23 -04:00
& sleeper . completion , req - > thread_wait ) < = 0 ) {
/* The completion wasn't completed, so we need
* to clean up
*/
spin_lock ( & cache_defer_lock ) ;
2010-08-12 17:04:08 +10:00
if ( ! hlist_unhashed ( & sleeper . handle . hash ) ) {
2010-08-26 16:56:23 -04:00
__unhash_deferred_req ( & sleeper . handle ) ;
spin_unlock ( & cache_defer_lock ) ;
} else {
/* cache_revisit_request already removed
* this from the hash table , but hasn ' t
* called - > revisit yet . It will very soon
* and we need to wait for it .
sunrpc/cache: allow threads to block while waiting for cache update.
The current practice of waiting for cache updates by queueing the
whole request to be retried has (at least) two problems.
1/ With NFSv4, requests can be quite complex and re-trying a whole
request when a later part fails should only be a last-resort, not a
normal practice.
2/ Large requests, and in particular any 'write' request, will not be
queued by the current code and doing so would be undesirable.
In many cases only a very sort wait is needed before the cache gets
valid data.
So, providing the underlying transport permits it by setting
->thread_wait,
arrange to wait briefly for an upcall to be completed (as reflected in
the clearing of CACHE_PENDING).
If the short wait was not long enough and CACHE_PENDING is still set,
fall back on the old approach.
The 'thread_wait' value is set to 5 seconds when there are spare
threads, and 1 second when there are no spare threads.
These values are probably much higher than needed, but will ensure
some forward progress.
Note that as we only request an update for a non-valid item, and as
non-valid items are updated in place it is extremely unlikely that
cache_check will return -ETIMEDOUT. Normally cache_defer_req will
sleep for a short while and then find that the item is_valid.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2010-08-12 17:04:06 +10:00
*/
2010-08-26 16:56:23 -04:00
spin_unlock ( & cache_defer_lock ) ;
wait_for_completion ( & sleeper . completion ) ;
sunrpc/cache: allow threads to block while waiting for cache update.
The current practice of waiting for cache updates by queueing the
whole request to be retried has (at least) two problems.
1/ With NFSv4, requests can be quite complex and re-trying a whole
request when a later part fails should only be a last-resort, not a
normal practice.
2/ Large requests, and in particular any 'write' request, will not be
queued by the current code and doing so would be undesirable.
In many cases only a very sort wait is needed before the cache gets
valid data.
So, providing the underlying transport permits it by setting
->thread_wait,
arrange to wait briefly for an upcall to be completed (as reflected in
the clearing of CACHE_PENDING).
If the short wait was not long enough and CACHE_PENDING is still set,
fall back on the old approach.
The 'thread_wait' value is set to 5 seconds when there are spare
threads, and 1 second when there are no spare threads.
These values are probably much higher than needed, but will ensure
some forward progress.
Note that as we only request an update for a non-valid item, and as
non-valid items are updated in place it is extremely unlikely that
cache_check will return -ETIMEDOUT. Normally cache_defer_req will
sleep for a short while and then find that the item is_valid.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2010-08-12 17:04:06 +10:00
}
2010-08-26 16:56:23 -04:00
}
}
2010-10-07 15:29:46 +11:00
static void cache_limit_defers ( void )
2010-08-26 16:56:23 -04:00
{
2010-10-07 15:29:46 +11:00
/* Make sure we haven't exceed the limit of allowed deferred
* requests .
*/
struct cache_deferred_req * discard = NULL ;
2010-08-26 16:56:23 -04:00
2010-10-07 15:29:46 +11:00
if ( cache_defer_cnt < = DFR_MAX )
return ;
2010-10-07 15:29:46 +11:00
2010-10-07 15:29:46 +11:00
spin_lock ( & cache_defer_lock ) ;
/* Consider removing either the first or the last */
if ( cache_defer_cnt > DFR_MAX ) {
2014-01-11 07:15:59 -05:00
if ( prandom_u32 ( ) & 1 )
2010-10-07 15:29:46 +11:00
discard = list_entry ( cache_defer_list . next ,
struct cache_deferred_req , recent ) ;
else
discard = list_entry ( cache_defer_list . prev ,
struct cache_deferred_req , recent ) ;
__unhash_deferred_req ( discard ) ;
}
spin_unlock ( & cache_defer_lock ) ;
2009-09-09 16:32:54 +10:00
if ( discard )
discard - > revisit ( discard , 1 ) ;
2010-10-07 15:29:46 +11:00
}
2009-09-09 16:32:54 +10:00
2011-01-02 21:28:34 -05:00
/* Return true if and only if a deferred request is queued. */
static bool cache_defer_req ( struct cache_req * req , struct cache_head * item )
2010-10-07 15:29:46 +11:00
{
struct cache_deferred_req * dreq ;
2010-10-07 15:29:46 +11:00
2010-08-26 16:56:23 -04:00
if ( req - > thread_wait ) {
2010-10-07 15:29:46 +11:00
cache_wait_req ( req , item ) ;
if ( ! test_bit ( CACHE_PENDING , & item - > flags ) )
2011-01-02 21:28:34 -05:00
return false ;
2005-04-16 15:20:36 -07:00
}
2010-08-26 16:56:23 -04:00
dreq = req - > defer ( req ) ;
if ( dreq = = NULL )
2011-01-02 21:28:34 -05:00
return false ;
2010-10-07 15:29:46 +11:00
setup_deferral ( dreq , item , 1 ) ;
2010-10-07 15:29:46 +11:00
if ( ! test_bit ( CACHE_PENDING , & item - > flags ) )
/* Bit could have been cleared before we managed to
* set up the deferral , so need to revisit just in case
*/
cache_revisit_request ( item ) ;
2010-10-07 15:29:46 +11:00
cache_limit_defers ( ) ;
2011-01-02 21:28:34 -05:00
return true ;
2005-04-16 15:20:36 -07:00
}
static void cache_revisit_request ( struct cache_head * item )
{
struct cache_deferred_req * dreq ;
struct list_head pending ;
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 17:06:00 -08:00
struct hlist_node * tmp ;
2005-04-16 15:20:36 -07:00
int hash = DFR_HASH ( item ) ;
INIT_LIST_HEAD ( & pending ) ;
spin_lock ( & cache_defer_lock ) ;
2007-02-09 15:38:13 -08:00
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 17:06:00 -08:00
hlist_for_each_entry_safe ( dreq , tmp , & cache_defer_hash [ hash ] , hash )
2010-08-12 17:04:08 +10:00
if ( dreq - > item = = item ) {
__unhash_deferred_req ( dreq ) ;
list_add ( & dreq - > recent , & pending ) ;
2005-04-16 15:20:36 -07:00
}
2010-08-12 17:04:08 +10:00
2005-04-16 15:20:36 -07:00
spin_unlock ( & cache_defer_lock ) ;
while ( ! list_empty ( & pending ) ) {
dreq = list_entry ( pending . next , struct cache_deferred_req , recent ) ;
list_del_init ( & dreq - > recent ) ;
dreq - > revisit ( dreq , 0 ) ;
}
}
void cache_clean_deferred ( void * owner )
{
struct cache_deferred_req * dreq , * tmp ;
struct list_head pending ;
INIT_LIST_HEAD ( & pending ) ;
spin_lock ( & cache_defer_lock ) ;
2007-02-09 15:38:13 -08:00
2005-04-16 15:20:36 -07:00
list_for_each_entry_safe ( dreq , tmp , & cache_defer_list , recent ) {
if ( dreq - > owner = = owner ) {
2010-08-26 13:19:52 -04:00
__unhash_deferred_req ( dreq ) ;
2010-09-22 12:55:06 +10:00
list_add ( & dreq - > recent , & pending ) ;
2005-04-16 15:20:36 -07:00
}
}
spin_unlock ( & cache_defer_lock ) ;
while ( ! list_empty ( & pending ) ) {
dreq = list_entry ( pending . next , struct cache_deferred_req , recent ) ;
list_del_init ( & dreq - > recent ) ;
dreq - > revisit ( dreq , 1 ) ;
}
}
/*
* communicate with user - space
*
2017-02-07 21:49:17 +08:00
* We have a magic / proc file - / proc / net / rpc / < cachename > / channel .
2007-11-06 14:15:19 -05:00
* On read , you get a full request , or block .
* On write , an update request is processed .
* Poll works if anything to read , and always allows write .
2005-04-16 15:20:36 -07:00
*
2007-02-09 15:38:13 -08:00
* Implemented by linked list of requests . Each open file has
2007-11-06 14:15:19 -05:00
* a - > private that also exists in this list . New requests are added
2005-04-16 15:20:36 -07:00
* to the end and may wakeup and preceding readers .
* New readers are added to the head . If , on read , an item is found with
* CACHE_UPCALLING clear , we free it from the list .
*
*/
static DEFINE_SPINLOCK ( queue_lock ) ;
2006-03-20 22:33:17 -08:00
static DEFINE_MUTEX ( queue_io_mutex ) ;
2005-04-16 15:20:36 -07:00
struct cache_queue {
struct list_head list ;
int reader ; /* if 0, then request */
} ;
struct cache_request {
struct cache_queue q ;
struct cache_head * item ;
char * buf ;
int len ;
int readers ;
} ;
struct cache_reader {
struct cache_queue q ;
int offset ; /* if non-0, we have a refcnt on next request */
} ;
2013-02-04 14:03:03 +03:00
static int cache_request ( struct cache_detail * detail ,
struct cache_request * crq )
{
char * bp = crq - > buf ;
int len = PAGE_SIZE ;
detail - > cache_request ( detail , crq - > item , & bp , & len ) ;
if ( len < 0 )
return - EAGAIN ;
return PAGE_SIZE - len ;
}
2009-08-09 15:14:29 -04:00
static ssize_t cache_read ( struct file * filp , char __user * buf , size_t count ,
loff_t * ppos , struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
struct cache_reader * rp = filp - > private_data ;
struct cache_request * rq ;
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( filp ) ;
2005-04-16 15:20:36 -07:00
int err ;
if ( count = = 0 )
return 0 ;
2016-01-22 15:40:57 -05:00
inode_lock ( inode ) ; /* protect against multiple concurrent
2005-04-16 15:20:36 -07:00
* readers on this file */
again :
spin_lock ( & queue_lock ) ;
/* need to find next request */
while ( rp - > q . list . next ! = & cd - > queue & &
list_entry ( rp - > q . list . next , struct cache_queue , list )
- > reader ) {
struct list_head * next = rp - > q . list . next ;
list_move ( & rp - > q . list , next ) ;
}
if ( rp - > q . list . next = = & cd - > queue ) {
spin_unlock ( & queue_lock ) ;
2016-01-22 15:40:57 -05:00
inode_unlock ( inode ) ;
2012-10-23 10:43:36 -04:00
WARN_ON_ONCE ( rp - > offset ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
rq = container_of ( rp - > q . list . next , struct cache_request , q . list ) ;
2012-10-23 10:43:36 -04:00
WARN_ON_ONCE ( rq - > q . reader ) ;
2005-04-16 15:20:36 -07:00
if ( rp - > offset = = 0 )
rq - > readers + + ;
spin_unlock ( & queue_lock ) ;
2013-02-04 14:03:03 +03:00
if ( rq - > len = = 0 ) {
err = cache_request ( cd , rq ) ;
if ( err < 0 )
goto out ;
rq - > len = err ;
}
2005-04-16 15:20:36 -07:00
if ( rp - > offset = = 0 & & ! test_bit ( CACHE_PENDING , & rq - > item - > flags ) ) {
err = - EAGAIN ;
spin_lock ( & queue_lock ) ;
list_move ( & rp - > q . list , & rq - > q . list ) ;
spin_unlock ( & queue_lock ) ;
} else {
if ( rp - > offset + count > rq - > len )
count = rq - > len - rp - > offset ;
err = - EFAULT ;
if ( copy_to_user ( buf , rq - > buf + rp - > offset , count ) )
goto out ;
rp - > offset + = count ;
if ( rp - > offset > = rq - > len ) {
rp - > offset = 0 ;
spin_lock ( & queue_lock ) ;
list_move ( & rp - > q . list , & rq - > q . list ) ;
spin_unlock ( & queue_lock ) ;
}
err = 0 ;
}
out :
if ( rp - > offset = = 0 ) {
/* need to release rq */
spin_lock ( & queue_lock ) ;
rq - > readers - - ;
if ( rq - > readers = = 0 & &
! test_bit ( CACHE_PENDING , & rq - > item - > flags ) ) {
list_del ( & rq - > q . list ) ;
spin_unlock ( & queue_lock ) ;
2006-03-27 01:15:09 -08:00
cache_put ( rq - > item , cd ) ;
2005-04-16 15:20:36 -07:00
kfree ( rq - > buf ) ;
kfree ( rq ) ;
} else
spin_unlock ( & queue_lock ) ;
}
if ( err = = - EAGAIN )
goto again ;
2016-01-22 15:40:57 -05:00
inode_unlock ( inode ) ;
2005-04-16 15:20:36 -07:00
return err ? err : count ;
}
2009-08-09 15:14:28 -04:00
static ssize_t cache_do_downcall ( char * kaddr , const char __user * buf ,
size_t count , struct cache_detail * cd )
{
ssize_t ret ;
2005-04-16 15:20:36 -07:00
2012-01-18 12:56:02 +03:00
if ( count = = 0 )
return - EINVAL ;
2009-08-09 15:14:28 -04:00
if ( copy_from_user ( kaddr , buf , count ) )
return - EFAULT ;
kaddr [ count ] = ' \0 ' ;
ret = cd - > cache_parse ( cd , kaddr , count ) ;
if ( ! ret )
ret = count ;
return ret ;
}
static ssize_t cache_slow_downcall ( const char __user * buf ,
size_t count , struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
2009-08-09 15:14:28 -04:00
static char write_buf [ 8192 ] ; /* protected by queue_io_mutex */
ssize_t ret = - EINVAL ;
2005-04-16 15:20:36 -07:00
if ( count > = sizeof ( write_buf ) )
2009-08-09 15:14:28 -04:00
goto out ;
2006-03-20 22:33:17 -08:00
mutex_lock ( & queue_io_mutex ) ;
2009-08-09 15:14:28 -04:00
ret = cache_do_downcall ( write_buf , buf , count , cd ) ;
mutex_unlock ( & queue_io_mutex ) ;
out :
return ret ;
}
2005-04-16 15:20:36 -07:00
2009-08-09 15:14:28 -04:00
static ssize_t cache_downcall ( struct address_space * mapping ,
const char __user * buf ,
size_t count , struct cache_detail * cd )
{
struct page * page ;
char * kaddr ;
ssize_t ret = - ENOMEM ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
if ( count > = PAGE_SIZE )
2009-08-09 15:14:28 -04:00
goto out_slow ;
page = find_or_create_page ( mapping , 0 , GFP_KERNEL ) ;
if ( ! page )
goto out_slow ;
kaddr = kmap ( page ) ;
ret = cache_do_downcall ( kaddr , buf , count , cd ) ;
kunmap ( page ) ;
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2009-08-09 15:14:28 -04:00
return ret ;
out_slow :
return cache_slow_downcall ( buf , count , cd ) ;
}
2005-04-16 15:20:36 -07:00
2009-08-09 15:14:29 -04:00
static ssize_t cache_write ( struct file * filp , const char __user * buf ,
size_t count , loff_t * ppos ,
struct cache_detail * cd )
2009-08-09 15:14:28 -04:00
{
struct address_space * mapping = filp - > f_mapping ;
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( filp ) ;
2009-08-09 15:14:28 -04:00
ssize_t ret = - EINVAL ;
if ( ! cd - > cache_parse )
goto out ;
2016-01-22 15:40:57 -05:00
inode_lock ( inode ) ;
2009-08-09 15:14:28 -04:00
ret = cache_downcall ( mapping , buf , count , cd ) ;
2016-01-22 15:40:57 -05:00
inode_unlock ( inode ) ;
2009-08-09 15:14:28 -04:00
out :
return ret ;
2005-04-16 15:20:36 -07:00
}
static DECLARE_WAIT_QUEUE_HEAD ( queue_wait ) ;
2017-07-03 00:01:49 -04:00
static __poll_t cache_poll ( struct file * filp , poll_table * wait ,
2009-08-09 15:14:29 -04:00
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
2017-07-03 00:01:49 -04:00
__poll_t mask ;
2005-04-16 15:20:36 -07:00
struct cache_reader * rp = filp - > private_data ;
struct cache_queue * cq ;
poll_wait ( filp , & queue_wait , wait ) ;
/* alway allow write */
2018-02-11 14:34:03 -08:00
mask = EPOLLOUT | EPOLLWRNORM ;
2005-04-16 15:20:36 -07:00
if ( ! rp )
return mask ;
spin_lock ( & queue_lock ) ;
for ( cq = & rp - > q ; & cq - > list ! = & cd - > queue ;
cq = list_entry ( cq - > list . next , struct cache_queue , list ) )
if ( ! cq - > reader ) {
2018-02-11 14:34:03 -08:00
mask | = EPOLLIN | EPOLLRDNORM ;
2005-04-16 15:20:36 -07:00
break ;
}
spin_unlock ( & queue_lock ) ;
return mask ;
}
2009-08-09 15:14:29 -04:00
static int cache_ioctl ( struct inode * ino , struct file * filp ,
unsigned int cmd , unsigned long arg ,
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
int len = 0 ;
struct cache_reader * rp = filp - > private_data ;
struct cache_queue * cq ;
if ( cmd ! = FIONREAD | | ! rp )
return - EINVAL ;
spin_lock ( & queue_lock ) ;
/* only find the length remaining in current request,
* or the length of the next request
*/
for ( cq = & rp - > q ; & cq - > list ! = & cd - > queue ;
cq = list_entry ( cq - > list . next , struct cache_queue , list ) )
if ( ! cq - > reader ) {
struct cache_request * cr =
container_of ( cq , struct cache_request , q ) ;
len = cr - > len - rp - > offset ;
break ;
}
spin_unlock ( & queue_lock ) ;
return put_user ( len , ( int __user * ) arg ) ;
}
2009-08-09 15:14:29 -04:00
static int cache_open ( struct inode * inode , struct file * filp ,
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
struct cache_reader * rp = NULL ;
2009-08-19 18:13:00 -04:00
if ( ! cd | | ! try_module_get ( cd - > owner ) )
return - EACCES ;
2005-04-16 15:20:36 -07:00
nonseekable_open ( inode , filp ) ;
if ( filp - > f_mode & FMODE_READ ) {
rp = kmalloc ( sizeof ( * rp ) , GFP_KERNEL ) ;
2013-03-23 00:36:44 +04:00
if ( ! rp ) {
module_put ( cd - > owner ) ;
2005-04-16 15:20:36 -07:00
return - ENOMEM ;
2013-03-23 00:36:44 +04:00
}
2005-04-16 15:20:36 -07:00
rp - > offset = 0 ;
rp - > q . reader = 1 ;
2019-07-26 18:33:01 -04:00
2005-04-16 15:20:36 -07:00
spin_lock ( & queue_lock ) ;
list_add ( & rp - > q . list , & cd - > queue ) ;
spin_unlock ( & queue_lock ) ;
}
2019-07-26 18:33:01 -04:00
if ( filp - > f_mode & FMODE_WRITE )
atomic_inc ( & cd - > writers ) ;
2005-04-16 15:20:36 -07:00
filp - > private_data = rp ;
return 0 ;
}
2009-08-09 15:14:29 -04:00
static int cache_release ( struct inode * inode , struct file * filp ,
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
struct cache_reader * rp = filp - > private_data ;
if ( rp ) {
spin_lock ( & queue_lock ) ;
if ( rp - > offset ) {
struct cache_queue * cq ;
for ( cq = & rp - > q ; & cq - > list ! = & cd - > queue ;
cq = list_entry ( cq - > list . next , struct cache_queue , list ) )
if ( ! cq - > reader ) {
container_of ( cq , struct cache_request , q )
- > readers - - ;
break ;
}
rp - > offset = 0 ;
}
list_del ( & rp - > q . list ) ;
spin_unlock ( & queue_lock ) ;
filp - > private_data = NULL ;
kfree ( rp ) ;
2019-07-26 18:33:01 -04:00
}
if ( filp - > f_mode & FMODE_WRITE ) {
atomic_dec ( & cd - > writers ) ;
2010-08-12 16:55:22 +10:00
cd - > last_close = seconds_since_boot ( ) ;
2005-04-16 15:20:36 -07:00
}
2009-08-19 18:13:00 -04:00
module_put ( cd - > owner ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2009-08-04 15:22:38 +10:00
static void cache_dequeue ( struct cache_detail * detail , struct cache_head * ch )
2005-04-16 15:20:36 -07:00
{
2013-06-13 12:53:42 +10:00
struct cache_queue * cq , * tmp ;
struct cache_request * cr ;
struct list_head dequeued ;
INIT_LIST_HEAD ( & dequeued ) ;
2005-04-16 15:20:36 -07:00
spin_lock ( & queue_lock ) ;
2013-06-13 12:53:42 +10:00
list_for_each_entry_safe ( cq , tmp , & detail - > queue , list )
2005-04-16 15:20:36 -07:00
if ( ! cq - > reader ) {
2013-06-13 12:53:42 +10:00
cr = container_of ( cq , struct cache_request , q ) ;
2005-04-16 15:20:36 -07:00
if ( cr - > item ! = ch )
continue ;
2013-06-13 12:53:42 +10:00
if ( test_bit ( CACHE_PENDING , & ch - > flags ) )
/* Lost a race and it is pending again */
break ;
2005-04-16 15:20:36 -07:00
if ( cr - > readers ! = 0 )
2006-03-27 01:15:07 -08:00
continue ;
2013-06-13 12:53:42 +10:00
list_move ( & cr - > q . list , & dequeued ) ;
2005-04-16 15:20:36 -07:00
}
spin_unlock ( & queue_lock ) ;
2013-06-13 12:53:42 +10:00
while ( ! list_empty ( & dequeued ) ) {
cr = list_entry ( dequeued . next , struct cache_request , q . list ) ;
list_del ( & cr - > q . list ) ;
cache_put ( cr - > item , detail ) ;
kfree ( cr - > buf ) ;
kfree ( cr ) ;
}
2005-04-16 15:20:36 -07:00
}
/*
* Support routines for text - based upcalls .
* Fields are separated by spaces .
* Fields are either mangled to quote space tab newline slosh with slosh
* or a hexified with a leading \ x
* Record is terminated with newline .
*
*/
void qword_add ( char * * bpp , int * lp , char * str )
{
char * bp = * bpp ;
int len = * lp ;
2014-11-28 17:50:28 +02:00
int ret ;
2005-04-16 15:20:36 -07:00
if ( len < 0 ) return ;
lib/string_helpers.c: change semantics of string_escape_mem
The current semantics of string_escape_mem are inadequate for one of its
current users, vsnprintf(). If that is to honour its contract, it must
know how much space would be needed for the entire escaped buffer, and
string_escape_mem provides no way of obtaining that (short of allocating a
large enough buffer (~4 times input string) to let it play with, and
that's definitely a big no-no inside vsnprintf).
So change the semantics for string_escape_mem to be more snprintf-like:
Return the size of the output that would be generated if the destination
buffer was big enough, but of course still only write to the part of dst
it is allowed to, and (contrary to snprintf) don't do '\0'-termination.
It is then up to the caller to detect whether output was truncated and to
append a '\0' if desired. Also, we must output partial escape sequences,
otherwise a call such as snprintf(buf, 3, "%1pE", "\123") would cause
printf to write a \0 to buf[2] but leaving buf[0] and buf[1] with whatever
they previously contained.
This also fixes a bug in the escaped_string() helper function, which used
to unconditionally pass a length of "end-buf" to string_escape_mem();
since the latter doesn't check osz for being insanely large, it would
happily write to dst. For example, kasprintf(GFP_KERNEL, "something and
then %pE", ...); is an easy way to trigger an oops.
In test-string_helpers.c, the -ENOMEM test is replaced with testing for
getting the expected return value even if the buffer is too small. We
also ensure that nothing is written (by relying on a NULL pointer deref)
if the output size is 0 by passing NULL - this has to work for
kasprintf("%pE") to work.
In net/sunrpc/cache.c, I think qword_add still has the same semantics.
Someone should definitely double-check this.
In fs/proc/array.c, I made the minimum possible change, but longer-term it
should stop poking around in seq_file internals.
[andriy.shevchenko@linux.intel.com: simplify qword_add]
[andriy.shevchenko@linux.intel.com: add missed curly braces]
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-15 16:17:28 -07:00
ret = string_escape_str ( str , bp , len , ESCAPE_OCTAL , " \\ \n \t " ) ;
if ( ret > = len ) {
bp + = len ;
2014-11-28 17:50:28 +02:00
len = - 1 ;
lib/string_helpers.c: change semantics of string_escape_mem
The current semantics of string_escape_mem are inadequate for one of its
current users, vsnprintf(). If that is to honour its contract, it must
know how much space would be needed for the entire escaped buffer, and
string_escape_mem provides no way of obtaining that (short of allocating a
large enough buffer (~4 times input string) to let it play with, and
that's definitely a big no-no inside vsnprintf).
So change the semantics for string_escape_mem to be more snprintf-like:
Return the size of the output that would be generated if the destination
buffer was big enough, but of course still only write to the part of dst
it is allowed to, and (contrary to snprintf) don't do '\0'-termination.
It is then up to the caller to detect whether output was truncated and to
append a '\0' if desired. Also, we must output partial escape sequences,
otherwise a call such as snprintf(buf, 3, "%1pE", "\123") would cause
printf to write a \0 to buf[2] but leaving buf[0] and buf[1] with whatever
they previously contained.
This also fixes a bug in the escaped_string() helper function, which used
to unconditionally pass a length of "end-buf" to string_escape_mem();
since the latter doesn't check osz for being insanely large, it would
happily write to dst. For example, kasprintf(GFP_KERNEL, "something and
then %pE", ...); is an easy way to trigger an oops.
In test-string_helpers.c, the -ENOMEM test is replaced with testing for
getting the expected return value even if the buffer is too small. We
also ensure that nothing is written (by relying on a NULL pointer deref)
if the output size is 0 by passing NULL - this has to work for
kasprintf("%pE") to work.
In net/sunrpc/cache.c, I think qword_add still has the same semantics.
Someone should definitely double-check this.
In fs/proc/array.c, I made the minimum possible change, but longer-term it
should stop poking around in seq_file internals.
[andriy.shevchenko@linux.intel.com: simplify qword_add]
[andriy.shevchenko@linux.intel.com: add missed curly braces]
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-15 16:17:28 -07:00
} else {
bp + = ret ;
2014-11-28 17:50:28 +02:00
len - = ret ;
2005-04-16 15:20:36 -07:00
* bp + + = ' ' ;
len - - ;
}
* bpp = bp ;
* lp = len ;
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( qword_add ) ;
2005-04-16 15:20:36 -07:00
void qword_addhex ( char * * bpp , int * lp , char * buf , int blen )
{
char * bp = * bpp ;
int len = * lp ;
if ( len < 0 ) return ;
if ( len > 2 ) {
* bp + + = ' \\ ' ;
* bp + + = ' x ' ;
len - = 2 ;
while ( blen & & len > = 2 ) {
2013-12-12 15:49:21 +02:00
bp = hex_byte_pack ( bp , * buf + + ) ;
2005-04-16 15:20:36 -07:00
len - = 2 ;
blen - - ;
}
}
if ( blen | | len < 1 ) len = - 1 ;
else {
* bp + + = ' ' ;
len - - ;
}
* bpp = bp ;
* lp = len ;
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( qword_addhex ) ;
2005-04-16 15:20:36 -07:00
static void warn_no_listener ( struct cache_detail * detail )
{
if ( detail - > last_warn ! = detail - > last_close ) {
detail - > last_warn = detail - > last_close ;
if ( detail - > warn_no_listener )
2009-08-09 15:14:26 -04:00
detail - > warn_no_listener ( detail , detail - > last_close ! = 0 ) ;
2005-04-16 15:20:36 -07:00
}
}
2010-09-19 22:55:06 -04:00
static bool cache_listeners_exist ( struct cache_detail * detail )
{
2019-07-26 18:33:01 -04:00
if ( atomic_read ( & detail - > writers ) )
2010-09-19 22:55:06 -04:00
return true ;
if ( detail - > last_close = = 0 )
/* This cache was never opened */
return false ;
if ( detail - > last_close < seconds_since_boot ( ) - 30 )
/*
* We allow for the possibility that someone might
* restart a userspace daemon without restarting the
* server ; but after 30 seconds , we give up .
*/
return false ;
return true ;
}
2005-04-16 15:20:36 -07:00
/*
2009-08-09 15:14:29 -04:00
* register an upcall request to user - space and queue it up for read ( ) by the
* upcall daemon .
*
2005-04-16 15:20:36 -07:00
* Each request is at most one page long .
*/
2020-03-01 18:21:42 -05:00
static int cache_pipe_upcall ( struct cache_detail * detail , struct cache_head * h )
2005-04-16 15:20:36 -07:00
{
char * buf ;
struct cache_request * crq ;
2013-06-13 12:53:42 +10:00
int ret = 0 ;
2005-04-16 15:20:36 -07:00
2013-06-13 12:53:42 +10:00
if ( test_bit ( CACHE_CLEANED , & h - > flags ) )
/* Too late to make an upcall */
return - EAGAIN ;
2005-04-16 15:20:36 -07:00
buf = kmalloc ( PAGE_SIZE , GFP_KERNEL ) ;
if ( ! buf )
return - EAGAIN ;
crq = kmalloc ( sizeof ( * crq ) , GFP_KERNEL ) ;
if ( ! crq ) {
kfree ( buf ) ;
return - EAGAIN ;
}
crq - > q . reader = 0 ;
crq - > buf = buf ;
2013-02-04 14:03:03 +03:00
crq - > len = 0 ;
2005-04-16 15:20:36 -07:00
crq - > readers = 0 ;
spin_lock ( & queue_lock ) ;
2016-03-04 17:20:13 +11:00
if ( test_bit ( CACHE_PENDING , & h - > flags ) ) {
crq - > item = cache_get ( h ) ;
2013-06-13 12:53:42 +10:00
list_add_tail ( & crq - > q . list , & detail - > queue ) ;
2020-03-01 18:21:44 -05:00
trace_cache_entry_upcall ( detail , h ) ;
2016-03-04 17:20:13 +11:00
} else
2013-06-13 12:53:42 +10:00
/* Lost a race, no longer PENDING, so don't enqueue */
ret = - EAGAIN ;
2005-04-16 15:20:36 -07:00
spin_unlock ( & queue_lock ) ;
wake_up ( & queue_wait ) ;
2013-06-13 12:53:42 +10:00
if ( ret = = - EAGAIN ) {
kfree ( buf ) ;
kfree ( crq ) ;
}
return ret ;
2005-04-16 15:20:36 -07:00
}
2020-03-01 18:21:42 -05:00
int sunrpc_cache_pipe_upcall ( struct cache_detail * detail , struct cache_head * h )
{
if ( test_and_set_bit ( CACHE_PENDING , & h - > flags ) )
return 0 ;
return cache_pipe_upcall ( detail , h ) ;
}
2009-08-09 15:14:29 -04:00
EXPORT_SYMBOL_GPL ( sunrpc_cache_pipe_upcall ) ;
2005-04-16 15:20:36 -07:00
2020-03-01 18:21:42 -05:00
int sunrpc_cache_pipe_upcall_timeout ( struct cache_detail * detail ,
struct cache_head * h )
{
if ( ! cache_listeners_exist ( detail ) ) {
warn_no_listener ( detail ) ;
2020-03-01 18:21:44 -05:00
trace_cache_entry_no_listener ( detail , h ) ;
2020-03-01 18:21:42 -05:00
return - EINVAL ;
}
return sunrpc_cache_pipe_upcall ( detail , h ) ;
}
EXPORT_SYMBOL_GPL ( sunrpc_cache_pipe_upcall_timeout ) ;
2005-04-16 15:20:36 -07:00
/*
* parse a message from user - space and pass it
* to an appropriate cache
* Messages are , like requests , separated into fields by
* spaces and dequotes as \ xHEXSTRING or embedded \ nnn octal
*
2007-02-09 15:38:13 -08:00
* Message is
2005-04-16 15:20:36 -07:00
* reply cachename expiry key . . . content . . . .
*
2007-02-09 15:38:13 -08:00
* key and content are both parsed by cache
2005-04-16 15:20:36 -07:00
*/
int qword_get ( char * * bpp , char * dest , int bufsize )
{
/* return bytes copied, or -1 on error */
char * bp = * bpp ;
int len = 0 ;
while ( * bp = = ' ' ) bp + + ;
if ( bp [ 0 ] = = ' \\ ' & & bp [ 1 ] = = ' x ' ) {
/* HEX STRING */
bp + = 2 ;
2016-02-18 18:55:54 +00:00
while ( len < bufsize - 1 ) {
2010-09-21 09:40:25 +03:00
int h , l ;
h = hex_to_bin ( bp [ 0 ] ) ;
if ( h < 0 )
break ;
l = hex_to_bin ( bp [ 1 ] ) ;
if ( l < 0 )
break ;
* dest + + = ( h < < 4 ) | l ;
bp + = 2 ;
2005-04-16 15:20:36 -07:00
len + + ;
}
} else {
/* text with \nnn octal quoting */
while ( * bp ! = ' ' & & * bp ! = ' \n ' & & * bp & & len < bufsize - 1 ) {
if ( * bp = = ' \\ ' & &
isodigit ( bp [ 1 ] ) & & ( bp [ 1 ] < = ' 3 ' ) & &
isodigit ( bp [ 2 ] ) & &
isodigit ( bp [ 3 ] ) ) {
int byte = ( * + + bp - ' 0 ' ) ;
bp + + ;
byte = ( byte < < 3 ) | ( * bp + + - ' 0 ' ) ;
byte = ( byte < < 3 ) | ( * bp + + - ' 0 ' ) ;
* dest + + = byte ;
len + + ;
} else {
* dest + + = * bp + + ;
len + + ;
}
}
}
if ( * bp ! = ' ' & & * bp ! = ' \n ' & & * bp ! = ' \0 ' )
return - 1 ;
while ( * bp = = ' ' ) bp + + ;
* bpp = bp ;
* dest = ' \0 ' ;
return len ;
}
2008-12-23 16:30:12 -05:00
EXPORT_SYMBOL_GPL ( qword_get ) ;
2005-04-16 15:20:36 -07:00
/*
2017-02-07 21:49:17 +08:00
* support / proc / net / rpc / $ CACHENAME / content
2005-04-16 15:20:36 -07:00
* as a seqfile .
* We call - > cache_show passing NULL for the item to
* get a header , then pass each real item in the cache
*/
2018-10-03 12:01:22 -04:00
static void * __cache_seq_start ( struct seq_file * m , loff_t * pos )
2005-04-16 15:20:36 -07:00
{
loff_t n = * pos ;
2012-04-15 05:58:06 +00:00
unsigned int hash , entry ;
2005-04-16 15:20:36 -07:00
struct cache_head * ch ;
2015-07-27 11:09:10 +08:00
struct cache_detail * cd = m - > private ;
2005-04-16 15:20:36 -07:00
if ( ! n - - )
return SEQ_START_TOKEN ;
hash = n > > 32 ;
entry = n & ( ( 1LL < < 32 ) - 1 ) ;
2018-10-03 12:01:22 -04:00
hlist_for_each_entry_rcu ( ch , & cd - > hash_table [ hash ] , cache_list )
2005-04-16 15:20:36 -07:00
if ( ! entry - - )
return ch ;
n & = ~ ( ( 1LL < < 32 ) - 1 ) ;
do {
hash + + ;
n + = 1LL < < 32 ;
2007-02-09 15:38:13 -08:00
} while ( hash < cd - > hash_size & &
2015-07-27 11:10:15 +08:00
hlist_empty ( & cd - > hash_table [ hash ] ) ) ;
2005-04-16 15:20:36 -07:00
if ( hash > = cd - > hash_size )
return NULL ;
* pos = n + 1 ;
2018-10-03 12:01:22 -04:00
return hlist_entry_safe ( rcu_dereference_raw (
hlist_first_rcu ( & cd - > hash_table [ hash ] ) ) ,
2015-07-27 11:10:15 +08:00
struct cache_head , cache_list ) ;
2005-04-16 15:20:36 -07:00
}
2018-10-03 12:01:22 -04:00
2018-10-01 10:41:51 -04:00
static void * cache_seq_next ( struct seq_file * m , void * p , loff_t * pos )
2005-04-16 15:20:36 -07:00
{
struct cache_head * ch = p ;
int hash = ( * pos > > 32 ) ;
2015-07-27 11:09:10 +08:00
struct cache_detail * cd = m - > private ;
2005-04-16 15:20:36 -07:00
if ( p = = SEQ_START_TOKEN )
hash = 0 ;
2015-07-27 11:10:15 +08:00
else if ( ch - > cache_list . next = = NULL ) {
2005-04-16 15:20:36 -07:00
hash + + ;
* pos + = 1LL < < 32 ;
} else {
+ + * pos ;
2018-10-03 12:01:22 -04:00
return hlist_entry_safe ( rcu_dereference_raw (
hlist_next_rcu ( & ch - > cache_list ) ) ,
2015-07-27 11:10:15 +08:00
struct cache_head , cache_list ) ;
2005-04-16 15:20:36 -07:00
}
* pos & = ~ ( ( 1LL < < 32 ) - 1 ) ;
while ( hash < cd - > hash_size & &
2015-07-27 11:10:15 +08:00
hlist_empty ( & cd - > hash_table [ hash ] ) ) {
2005-04-16 15:20:36 -07:00
hash + + ;
* pos + = 1LL < < 32 ;
}
if ( hash > = cd - > hash_size )
return NULL ;
+ + * pos ;
2018-10-03 12:01:22 -04:00
return hlist_entry_safe ( rcu_dereference_raw (
hlist_first_rcu ( & cd - > hash_table [ hash ] ) ) ,
2015-07-27 11:10:15 +08:00
struct cache_head , cache_list ) ;
2005-04-16 15:20:36 -07:00
}
2018-10-03 12:01:22 -04:00
void * cache_seq_start_rcu ( struct seq_file * m , loff_t * pos )
__acquires ( RCU )
{
rcu_read_lock ( ) ;
return __cache_seq_start ( m , pos ) ;
}
EXPORT_SYMBOL_GPL ( cache_seq_start_rcu ) ;
void * cache_seq_next_rcu ( struct seq_file * file , void * p , loff_t * pos )
{
return cache_seq_next ( file , p , pos ) ;
}
EXPORT_SYMBOL_GPL ( cache_seq_next_rcu ) ;
void cache_seq_stop_rcu ( struct seq_file * m , void * p )
__releases ( RCU )
{
rcu_read_unlock ( ) ;
}
EXPORT_SYMBOL_GPL ( cache_seq_stop_rcu ) ;
2005-04-16 15:20:36 -07:00
static int c_show ( struct seq_file * m , void * p )
{
struct cache_head * cp = p ;
2015-07-27 11:09:10 +08:00
struct cache_detail * cd = m - > private ;
2005-04-16 15:20:36 -07:00
if ( p = = SEQ_START_TOKEN )
return cd - > cache_show ( m , cd , NULL ) ;
ifdebug ( CACHE )
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
seq_printf ( m , " # expiry=%lld refcnt=%d flags=%lx \n " ,
2010-08-12 16:55:22 +10:00
convert_to_wallclock ( cp - > expiry_time ) ,
2016-11-14 17:29:48 +01:00
kref_read ( & cp - > ref ) , cp - > flags ) ;
2005-04-16 15:20:36 -07:00
cache_get ( cp ) ;
if ( cache_check ( cd , cp , NULL ) )
/* cache_check does a cache_put on failure */
seq_printf ( m , " # " ) ;
2012-07-12 10:37:34 +10:00
else {
if ( cache_is_expired ( cd , cp ) )
seq_printf ( m , " # " ) ;
2005-04-16 15:20:36 -07:00
cache_put ( cp , cd ) ;
2012-07-12 10:37:34 +10:00
}
2005-04-16 15:20:36 -07:00
return cd - > cache_show ( m , cd , cp ) ;
}
2007-07-10 23:07:31 -07:00
static const struct seq_operations cache_content_op = {
2018-10-01 10:41:51 -04:00
. start = cache_seq_start_rcu ,
. next = cache_seq_next_rcu ,
. stop = cache_seq_stop_rcu ,
2005-04-16 15:20:36 -07:00
. show = c_show ,
} ;
2009-08-09 15:14:29 -04:00
static int content_open ( struct inode * inode , struct file * file ,
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
2015-07-27 11:09:10 +08:00
struct seq_file * seq ;
int err ;
2005-04-16 15:20:36 -07:00
2009-08-19 18:13:00 -04:00
if ( ! cd | | ! try_module_get ( cd - > owner ) )
return - EACCES ;
2015-07-27 11:09:10 +08:00
err = seq_open ( file , & cache_content_op ) ;
if ( err ) {
2010-03-11 14:08:10 -08:00
module_put ( cd - > owner ) ;
2015-07-27 11:09:10 +08:00
return err ;
2010-03-11 14:08:10 -08:00
}
2005-04-16 15:20:36 -07:00
2015-07-27 11:09:10 +08:00
seq = file - > private_data ;
seq - > private = cd ;
2007-10-10 02:31:07 -07:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2009-08-19 18:13:00 -04:00
static int content_release ( struct inode * inode , struct file * file ,
struct cache_detail * cd )
{
2015-07-27 11:09:10 +08:00
int ret = seq_release ( inode , file ) ;
2009-08-19 18:13:00 -04:00
module_put ( cd - > owner ) ;
return ret ;
}
static int open_flush ( struct inode * inode , struct file * file ,
struct cache_detail * cd )
{
if ( ! cd | | ! try_module_get ( cd - > owner ) )
return - EACCES ;
return nonseekable_open ( inode , file ) ;
}
static int release_flush ( struct inode * inode , struct file * file ,
struct cache_detail * cd )
{
module_put ( cd - > owner ) ;
return 0 ;
}
2005-04-16 15:20:36 -07:00
static ssize_t read_flush ( struct file * file , char __user * buf ,
2009-08-09 15:14:29 -04:00
size_t count , loff_t * ppos ,
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
2012-07-17 00:01:26 +02:00
char tbuf [ 22 ] ;
2007-10-26 13:31:20 -04:00
size_t len ;
2005-04-16 15:20:36 -07:00
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
len = snprintf ( tbuf , sizeof ( tbuf ) , " %llu \n " ,
2017-02-07 21:50:32 +08:00
convert_to_wallclock ( cd - > flush_time ) ) ;
return simple_read_from_buffer ( buf , count , ppos , tbuf , len ) ;
2005-04-16 15:20:36 -07:00
}
2009-08-09 15:14:29 -04:00
static ssize_t write_flush ( struct file * file , const char __user * buf ,
size_t count , loff_t * ppos ,
struct cache_detail * cd )
2005-04-16 15:20:36 -07:00
{
char tbuf [ 20 ] ;
2018-02-14 12:15:06 +11:00
char * ep ;
nfs: use time64_t internally
The timestamps for the cache are all in boottime seconds, so they
don't overflow 32-bit values, but the use of time_t is deprecated
because it generally does overflow when used with wall-clock time.
There are multiple possible ways of avoiding it:
- leave time_t, which is safe here, but forces others to
look into this code to determine that it is over and over.
- use a more generic type, like 'int' or 'long', which is known
to be sufficient here but loses the documentation of referring
to timestamps
- use ktime_t everywhere, and convert into seconds in the few
places where we want realtime-seconds. The conversion is
sometimes expensive, but not more so than the conversion we
do today.
- use time64_t to clarify that this code is safe. Nothing would
change for 64-bit architectures, but it is slightly less
efficient on 32-bit architectures.
Without a clear winner of the three approaches above, this picks
the last one, favouring readability over a small performance
loss on 32-bit architectures.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2017-10-20 16:34:42 +02:00
time64_t now ;
2010-08-12 16:55:22 +10:00
2005-04-16 15:20:36 -07:00
if ( * ppos | | count > sizeof ( tbuf ) - 1 )
return - EINVAL ;
if ( copy_from_user ( tbuf , buf , count ) )
return - EFAULT ;
tbuf [ count ] = 0 ;
2010-08-12 16:55:22 +10:00
simple_strtoul ( tbuf , & ep , 0 ) ;
2005-04-16 15:20:36 -07:00
if ( * ep & & * ep ! = ' \n ' )
return - EINVAL ;
2018-02-14 12:15:06 +11:00
/* Note that while we check that 'buf' holds a valid number,
* we always ignore the value and just flush everything .
* Making use of the number leads to races .
*/
2005-04-16 15:20:36 -07:00
2015-10-16 08:59:08 +11:00
now = seconds_since_boot ( ) ;
2018-02-14 12:15:06 +11:00
/* Always flush everything, so behave like cache_purge()
* Do this by advancing flush_time to the current time ,
* or by one second if it has already reached the current time .
* Newly added cache entries will always have - > last_refresh greater
* that - > flush_time , so they don ' t get flushed prematurely .
2015-10-16 08:59:08 +11:00
*/
2018-02-14 12:15:06 +11:00
if ( cd - > flush_time > = now )
now = cd - > flush_time + 1 ;
cd - > flush_time = now ;
cd - > nextcheck = now ;
2005-04-16 15:20:36 -07:00
cache_flush ( ) ;
2019-08-18 14:18:44 -04:00
if ( cd - > flush )
cd - > flush ( ) ;
2005-04-16 15:20:36 -07:00
* ppos + = count ;
return count ;
}
2009-08-09 15:14:29 -04:00
static ssize_t cache_read_procfs ( struct file * filp , char __user * buf ,
size_t count , loff_t * ppos )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( file_inode ( filp ) ) ;
2009-08-09 15:14:29 -04:00
return cache_read ( filp , buf , count , ppos , cd ) ;
}
static ssize_t cache_write_procfs ( struct file * filp , const char __user * buf ,
size_t count , loff_t * ppos )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( file_inode ( filp ) ) ;
2009-08-09 15:14:29 -04:00
return cache_write ( filp , buf , count , ppos , cd ) ;
}
2017-07-03 00:01:49 -04:00
static __poll_t cache_poll_procfs ( struct file * filp , poll_table * wait )
2009-08-09 15:14:29 -04:00
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( file_inode ( filp ) ) ;
2009-08-09 15:14:29 -04:00
return cache_poll ( filp , wait , cd ) ;
}
2010-03-30 07:27:50 +02:00
static long cache_ioctl_procfs ( struct file * filp ,
unsigned int cmd , unsigned long arg )
2009-08-09 15:14:29 -04:00
{
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( filp ) ;
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-09 15:14:29 -04:00
2010-10-04 21:18:23 +02:00
return cache_ioctl ( inode , filp , cmd , arg , cd ) ;
2009-08-09 15:14:29 -04:00
}
static int cache_open_procfs ( struct inode * inode , struct file * filp )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-09 15:14:29 -04:00
return cache_open ( inode , filp , cd ) ;
}
static int cache_release_procfs ( struct inode * inode , struct file * filp )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-09 15:14:29 -04:00
return cache_release ( inode , filp , cd ) ;
}
2020-02-03 17:37:17 -08:00
static const struct proc_ops cache_channel_proc_ops = {
. proc_lseek = no_llseek ,
. proc_read = cache_read_procfs ,
. proc_write = cache_write_procfs ,
. proc_poll = cache_poll_procfs ,
. proc_ioctl = cache_ioctl_procfs , /* for FIONREAD */
. proc_open = cache_open_procfs ,
. proc_release = cache_release_procfs ,
2005-04-16 15:20:36 -07:00
} ;
2009-08-09 15:14:29 -04:00
static int content_open_procfs ( struct inode * inode , struct file * filp )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-09 15:14:29 -04:00
return content_open ( inode , filp , cd ) ;
}
2009-08-19 18:13:00 -04:00
static int content_release_procfs ( struct inode * inode , struct file * filp )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-19 18:13:00 -04:00
return content_release ( inode , filp , cd ) ;
}
2020-02-03 17:37:17 -08:00
static const struct proc_ops content_proc_ops = {
. proc_open = content_open_procfs ,
. proc_read = seq_read ,
. proc_lseek = seq_lseek ,
. proc_release = content_release_procfs ,
2009-08-09 15:14:29 -04:00
} ;
2009-08-19 18:13:00 -04:00
static int open_flush_procfs ( struct inode * inode , struct file * filp )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-19 18:13:00 -04:00
return open_flush ( inode , filp , cd ) ;
}
static int release_flush_procfs ( struct inode * inode , struct file * filp )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( inode ) ;
2009-08-19 18:13:00 -04:00
return release_flush ( inode , filp , cd ) ;
}
2009-08-09 15:14:29 -04:00
static ssize_t read_flush_procfs ( struct file * filp , char __user * buf ,
size_t count , loff_t * ppos )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( file_inode ( filp ) ) ;
2009-08-09 15:14:29 -04:00
return read_flush ( filp , buf , count , ppos , cd ) ;
}
static ssize_t write_flush_procfs ( struct file * filp ,
const char __user * buf ,
size_t count , loff_t * ppos )
{
2013-03-31 18:16:14 -04:00
struct cache_detail * cd = PDE_DATA ( file_inode ( filp ) ) ;
2009-08-09 15:14:29 -04:00
return write_flush ( filp , buf , count , ppos , cd ) ;
}
2020-02-03 17:37:17 -08:00
static const struct proc_ops cache_flush_proc_ops = {
. proc_open = open_flush_procfs ,
. proc_read = read_flush_procfs ,
. proc_write = write_flush_procfs ,
. proc_release = release_flush_procfs ,
. proc_lseek = no_llseek ,
2005-04-16 15:20:36 -07:00
} ;
2009-08-09 15:14:29 -04:00
2017-02-07 21:47:16 +08:00
static void remove_cache_proc_entries ( struct cache_detail * cd )
2009-08-09 15:14:29 -04:00
{
2017-02-07 21:47:16 +08:00
if ( cd - > procfs ) {
proc_remove ( cd - > procfs ) ;
cd - > procfs = NULL ;
}
2009-08-09 15:14:29 -04:00
}
# ifdef CONFIG_PROC_FS
2010-09-27 14:00:15 +04:00
static int create_cache_proc_entries ( struct cache_detail * cd , struct net * net )
2009-08-09 15:14:29 -04:00
{
struct proc_dir_entry * p ;
2010-09-27 14:01:58 +04:00
struct sunrpc_net * sn ;
2009-08-09 15:14:29 -04:00
2010-09-27 14:01:58 +04:00
sn = net_generic ( net , sunrpc_net_id ) ;
2017-02-07 21:47:16 +08:00
cd - > procfs = proc_mkdir ( cd - > name , sn - > proc_net_rpc ) ;
if ( cd - > procfs = = NULL )
2009-08-09 15:14:29 -04:00
goto out_nomem ;
2018-03-23 15:54:38 -07:00
p = proc_create_data ( " flush " , S_IFREG | 0600 ,
2020-02-03 17:37:17 -08:00
cd - > procfs , & cache_flush_proc_ops , cd ) ;
2009-08-09 15:14:29 -04:00
if ( p = = NULL )
goto out_nomem ;
2013-02-04 14:02:50 +03:00
if ( cd - > cache_request | | cd - > cache_parse ) {
2018-03-23 15:54:38 -07:00
p = proc_create_data ( " channel " , S_IFREG | 0600 , cd - > procfs ,
2020-02-03 17:37:17 -08:00
& cache_channel_proc_ops , cd ) ;
2009-08-09 15:14:29 -04:00
if ( p = = NULL )
goto out_nomem ;
}
if ( cd - > cache_show ) {
2018-03-23 15:54:38 -07:00
p = proc_create_data ( " content " , S_IFREG | 0400 , cd - > procfs ,
2020-02-03 17:37:17 -08:00
& content_proc_ops , cd ) ;
2009-08-09 15:14:29 -04:00
if ( p = = NULL )
goto out_nomem ;
}
return 0 ;
out_nomem :
2017-02-07 21:47:16 +08:00
remove_cache_proc_entries ( cd ) ;
2009-08-09 15:14:29 -04:00
return - ENOMEM ;
}
# else /* CONFIG_PROC_FS */
2010-09-27 14:00:15 +04:00
static int create_cache_proc_entries ( struct cache_detail * cd , struct net * net )
2009-08-09 15:14:29 -04:00
{
return 0 ;
}
# endif
sunrpc: make the cache cleaner workqueue deferrable
This patch makes the cache_cleaner workqueue deferrable, to prevent
unnecessary system wake-ups, which is very important for embedded
battery-powered devices.
do_cache_clean() is called every 30 seconds at the moment, and often
makes the system wake up from its power-save sleep state. With this
change, when the workqueue uses a deferrable timer, the
do_cache_clean() invocation will be delayed and combined with the
closest "real" wake-up. This improves the power consumption situation.
Note, I tried to create a DECLARE_DELAYED_WORK_DEFERRABLE() helper
macro, similar to DECLARE_DELAYED_WORK(), but failed because of the
way the timer wheel core stores the deferrable flag (it is the
LSBit in the time->base pointer). My attempt to define a static
variable with this bit set ended up with the "initializer element is
not constant" error.
Thus, I have to use run-time initialization, so I created a new
cache_initialize() function which is called once when sunrpc is
being initialized.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2010-07-01 18:05:56 +03:00
void __init cache_initialize ( void )
{
2012-08-21 13:18:23 -07:00
INIT_DEFERRABLE_WORK ( & cache_cleaner , do_cache_clean ) ;
sunrpc: make the cache cleaner workqueue deferrable
This patch makes the cache_cleaner workqueue deferrable, to prevent
unnecessary system wake-ups, which is very important for embedded
battery-powered devices.
do_cache_clean() is called every 30 seconds at the moment, and often
makes the system wake up from its power-save sleep state. With this
change, when the workqueue uses a deferrable timer, the
do_cache_clean() invocation will be delayed and combined with the
closest "real" wake-up. This improves the power consumption situation.
Note, I tried to create a DECLARE_DELAYED_WORK_DEFERRABLE() helper
macro, similar to DECLARE_DELAYED_WORK(), but failed because of the
way the timer wheel core stores the deferrable flag (it is the
LSBit in the time->base pointer). My attempt to define a static
variable with this bit set ended up with the "initializer element is
not constant" error.
Thus, I have to use run-time initialization, so I created a new
cache_initialize() function which is called once when sunrpc is
being initialized.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2010-07-01 18:05:56 +03:00
}
2010-09-27 14:00:15 +04:00
int cache_register_net ( struct cache_detail * cd , struct net * net )
2009-08-09 15:14:29 -04:00
{
int ret ;
sunrpc_init_cache_detail ( cd ) ;
2010-09-27 14:00:15 +04:00
ret = create_cache_proc_entries ( cd , net ) ;
2009-08-09 15:14:29 -04:00
if ( ret )
sunrpc_destroy_cache_detail ( cd ) ;
return ret ;
}
2011-12-07 12:57:56 +03:00
EXPORT_SYMBOL_GPL ( cache_register_net ) ;
2010-09-27 14:00:15 +04:00
void cache_unregister_net ( struct cache_detail * cd , struct net * net )
2009-08-09 15:14:29 -04:00
{
2017-02-07 21:47:16 +08:00
remove_cache_proc_entries ( cd ) ;
2009-08-09 15:14:29 -04:00
sunrpc_destroy_cache_detail ( cd ) ;
}
2011-12-07 12:57:56 +03:00
EXPORT_SYMBOL_GPL ( cache_unregister_net ) ;
2010-09-27 14:00:15 +04:00
2017-10-17 18:14:23 +02:00
struct cache_detail * cache_create_net ( const struct cache_detail * tmpl , struct net * net )
2012-01-19 21:42:21 +04:00
{
struct cache_detail * cd ;
2015-07-27 11:10:15 +08:00
int i ;
2012-01-19 21:42:21 +04:00
cd = kmemdup ( tmpl , sizeof ( struct cache_detail ) , GFP_KERNEL ) ;
if ( cd = = NULL )
return ERR_PTR ( - ENOMEM ) ;
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:03:40 -07:00
cd - > hash_table = kcalloc ( cd - > hash_size , sizeof ( struct hlist_head ) ,
2012-01-19 21:42:21 +04:00
GFP_KERNEL ) ;
if ( cd - > hash_table = = NULL ) {
kfree ( cd ) ;
return ERR_PTR ( - ENOMEM ) ;
}
2015-07-27 11:10:15 +08:00
for ( i = 0 ; i < cd - > hash_size ; i + + )
INIT_HLIST_HEAD ( & cd - > hash_table [ i ] ) ;
2012-01-19 21:42:21 +04:00
cd - > net = net ;
return cd ;
}
EXPORT_SYMBOL_GPL ( cache_create_net ) ;
void cache_destroy_net ( struct cache_detail * cd , struct net * net )
2010-09-27 14:00:15 +04:00
{
2012-01-19 21:42:21 +04:00
kfree ( cd - > hash_table ) ;
kfree ( cd ) ;
2010-09-27 14:00:15 +04:00
}
2012-01-19 21:42:21 +04:00
EXPORT_SYMBOL_GPL ( cache_destroy_net ) ;
2009-08-09 15:14:30 -04:00
static ssize_t cache_read_pipefs ( struct file * filp , char __user * buf ,
size_t count , loff_t * ppos )
{
2013-01-23 17:07:38 -05:00
struct cache_detail * cd = RPC_I ( file_inode ( filp ) ) - > private ;
2009-08-09 15:14:30 -04:00
return cache_read ( filp , buf , count , ppos , cd ) ;
}
static ssize_t cache_write_pipefs ( struct file * filp , const char __user * buf ,
size_t count , loff_t * ppos )
{
2013-01-23 17:07:38 -05:00
struct cache_detail * cd = RPC_I ( file_inode ( filp ) ) - > private ;
2009-08-09 15:14:30 -04:00
return cache_write ( filp , buf , count , ppos , cd ) ;
}
2017-07-03 00:01:49 -04:00
static __poll_t cache_poll_pipefs ( struct file * filp , poll_table * wait )
2009-08-09 15:14:30 -04:00
{
2013-01-23 17:07:38 -05:00
struct cache_detail * cd = RPC_I ( file_inode ( filp ) ) - > private ;
2009-08-09 15:14:30 -04:00
return cache_poll ( filp , wait , cd ) ;
}
2010-05-19 15:08:17 +02:00
static long cache_ioctl_pipefs ( struct file * filp ,
2009-08-09 15:14:30 -04:00
unsigned int cmd , unsigned long arg )
{
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( filp ) ;
2009-08-09 15:14:30 -04:00
struct cache_detail * cd = RPC_I ( inode ) - > private ;
2010-10-04 21:18:23 +02:00
return cache_ioctl ( inode , filp , cmd , arg , cd ) ;
2009-08-09 15:14:30 -04:00
}
static int cache_open_pipefs ( struct inode * inode , struct file * filp )
{
struct cache_detail * cd = RPC_I ( inode ) - > private ;
return cache_open ( inode , filp , cd ) ;
}
static int cache_release_pipefs ( struct inode * inode , struct file * filp )
{
struct cache_detail * cd = RPC_I ( inode ) - > private ;
return cache_release ( inode , filp , cd ) ;
}
const struct file_operations cache_file_operations_pipefs = {
. owner = THIS_MODULE ,
. llseek = no_llseek ,
. read = cache_read_pipefs ,
. write = cache_write_pipefs ,
. poll = cache_poll_pipefs ,
2010-05-19 15:08:17 +02:00
. unlocked_ioctl = cache_ioctl_pipefs , /* for FIONREAD */
2009-08-09 15:14:30 -04:00
. open = cache_open_pipefs ,
. release = cache_release_pipefs ,
} ;
static int content_open_pipefs ( struct inode * inode , struct file * filp )
{
struct cache_detail * cd = RPC_I ( inode ) - > private ;
return content_open ( inode , filp , cd ) ;
}
2009-08-19 18:13:00 -04:00
static int content_release_pipefs ( struct inode * inode , struct file * filp )
{
struct cache_detail * cd = RPC_I ( inode ) - > private ;
return content_release ( inode , filp , cd ) ;
}
2009-08-09 15:14:30 -04:00
const struct file_operations content_file_operations_pipefs = {
. open = content_open_pipefs ,
. read = seq_read ,
. llseek = seq_lseek ,
2009-08-19 18:13:00 -04:00
. release = content_release_pipefs ,
2009-08-09 15:14:30 -04:00
} ;
2009-08-19 18:13:00 -04:00
static int open_flush_pipefs ( struct inode * inode , struct file * filp )
{
struct cache_detail * cd = RPC_I ( inode ) - > private ;
return open_flush ( inode , filp , cd ) ;
}
static int release_flush_pipefs ( struct inode * inode , struct file * filp )
{
struct cache_detail * cd = RPC_I ( inode ) - > private ;
return release_flush ( inode , filp , cd ) ;
}
2009-08-09 15:14:30 -04:00
static ssize_t read_flush_pipefs ( struct file * filp , char __user * buf ,
size_t count , loff_t * ppos )
{
2013-01-23 17:07:38 -05:00
struct cache_detail * cd = RPC_I ( file_inode ( filp ) ) - > private ;
2009-08-09 15:14:30 -04:00
return read_flush ( filp , buf , count , ppos , cd ) ;
}
static ssize_t write_flush_pipefs ( struct file * filp ,
const char __user * buf ,
size_t count , loff_t * ppos )
{
2013-01-23 17:07:38 -05:00
struct cache_detail * cd = RPC_I ( file_inode ( filp ) ) - > private ;
2009-08-09 15:14:30 -04:00
return write_flush ( filp , buf , count , ppos , cd ) ;
}
const struct file_operations cache_flush_operations_pipefs = {
2009-08-19 18:13:00 -04:00
. open = open_flush_pipefs ,
2009-08-09 15:14:30 -04:00
. read = read_flush_pipefs ,
. write = write_flush_pipefs ,
2009-08-19 18:13:00 -04:00
. release = release_flush_pipefs ,
llseek: automatically add .llseek fop
All file_operations should get a .llseek operation so we can make
nonseekable_open the default for future file operations without a
.llseek pointer.
The three cases that we can automatically detect are no_llseek, seq_lseek
and default_llseek. For cases where we can we can automatically prove that
the file offset is always ignored, we use noop_llseek, which maintains
the current behavior of not returning an error from a seek.
New drivers should normally not use noop_llseek but instead use no_llseek
and call nonseekable_open at open time. Existing drivers can be converted
to do the same when the maintainer knows for certain that no user code
relies on calling seek on the device file.
The generated code is often incorrectly indented and right now contains
comments that clarify for each added line why a specific variant was
chosen. In the version that gets submitted upstream, the comments will
be gone and I will manually fix the indentation, because there does not
seem to be a way to do that using coccinelle.
Some amount of new code is currently sitting in linux-next that should get
the same modifications, which I will do at the end of the merge window.
Many thanks to Julia Lawall for helping me learn to write a semantic
patch that does all this.
===== begin semantic patch =====
// This adds an llseek= method to all file operations,
// as a preparation for making no_llseek the default.
//
// The rules are
// - use no_llseek explicitly if we do nonseekable_open
// - use seq_lseek for sequential files
// - use default_llseek if we know we access f_pos
// - use noop_llseek if we know we don't access f_pos,
// but we still want to allow users to call lseek
//
@ open1 exists @
identifier nested_open;
@@
nested_open(...)
{
<+...
nonseekable_open(...)
...+>
}
@ open exists@
identifier open_f;
identifier i, f;
identifier open1.nested_open;
@@
int open_f(struct inode *i, struct file *f)
{
<+...
(
nonseekable_open(...)
|
nested_open(...)
)
...+>
}
@ read disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
<+...
(
*off = E
|
*off += E
|
func(..., off, ...)
|
E = *off
)
...+>
}
@ read_no_fpos disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
... when != off
}
@ write @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
<+...
(
*off = E
|
*off += E
|
func(..., off, ...)
|
E = *off
)
...+>
}
@ write_no_fpos @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
... when != off
}
@ fops0 @
identifier fops;
@@
struct file_operations fops = {
...
};
@ has_llseek depends on fops0 @
identifier fops0.fops;
identifier llseek_f;
@@
struct file_operations fops = {
...
.llseek = llseek_f,
...
};
@ has_read depends on fops0 @
identifier fops0.fops;
identifier read_f;
@@
struct file_operations fops = {
...
.read = read_f,
...
};
@ has_write depends on fops0 @
identifier fops0.fops;
identifier write_f;
@@
struct file_operations fops = {
...
.write = write_f,
...
};
@ has_open depends on fops0 @
identifier fops0.fops;
identifier open_f;
@@
struct file_operations fops = {
...
.open = open_f,
...
};
// use no_llseek if we call nonseekable_open
////////////////////////////////////////////
@ nonseekable1 depends on !has_llseek && has_open @
identifier fops0.fops;
identifier nso ~= "nonseekable_open";
@@
struct file_operations fops = {
... .open = nso, ...
+.llseek = no_llseek, /* nonseekable */
};
@ nonseekable2 depends on !has_llseek @
identifier fops0.fops;
identifier open.open_f;
@@
struct file_operations fops = {
... .open = open_f, ...
+.llseek = no_llseek, /* open uses nonseekable */
};
// use seq_lseek for sequential files
/////////////////////////////////////
@ seq depends on !has_llseek @
identifier fops0.fops;
identifier sr ~= "seq_read";
@@
struct file_operations fops = {
... .read = sr, ...
+.llseek = seq_lseek, /* we have seq_read */
};
// use default_llseek if there is a readdir
///////////////////////////////////////////
@ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier readdir_e;
@@
// any other fop is used that changes pos
struct file_operations fops = {
... .readdir = readdir_e, ...
+.llseek = default_llseek, /* readdir is present */
};
// use default_llseek if at least one of read/write touches f_pos
/////////////////////////////////////////////////////////////////
@ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read.read_f;
@@
// read fops use offset
struct file_operations fops = {
... .read = read_f, ...
+.llseek = default_llseek, /* read accesses f_pos */
};
@ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write.write_f;
@@
// write fops use offset
struct file_operations fops = {
... .write = write_f, ...
+ .llseek = default_llseek, /* write accesses f_pos */
};
// Use noop_llseek if neither read nor write accesses f_pos
///////////////////////////////////////////////////////////
@ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
identifier write_no_fpos.write_f;
@@
// write fops use offset
struct file_operations fops = {
...
.write = write_f,
.read = read_f,
...
+.llseek = noop_llseek, /* read and write both use no f_pos */
};
@ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write_no_fpos.write_f;
@@
struct file_operations fops = {
... .write = write_f, ...
+.llseek = noop_llseek, /* write uses no f_pos */
};
@ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
@@
struct file_operations fops = {
... .read = read_f, ...
+.llseek = noop_llseek, /* read uses no f_pos */
};
@ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
@@
struct file_operations fops = {
...
+.llseek = noop_llseek, /* no read or write fn */
};
===== End semantic patch =====
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Julia Lawall <julia@diku.dk>
Cc: Christoph Hellwig <hch@infradead.org>
2010-08-15 18:52:59 +02:00
. llseek = no_llseek ,
2009-08-09 15:14:30 -04:00
} ;
int sunrpc_cache_register_pipefs ( struct dentry * parent ,
2011-07-25 00:35:13 -04:00
const char * name , umode_t umode ,
2009-08-09 15:14:30 -04:00
struct cache_detail * cd )
{
2013-07-14 16:43:54 +04:00
struct dentry * dir = rpc_create_cache_dir ( parent , name , umode , cd ) ;
if ( IS_ERR ( dir ) )
return PTR_ERR ( dir ) ;
2017-02-07 21:47:16 +08:00
cd - > pipefs = dir ;
2013-07-14 16:43:54 +04:00
return 0 ;
2009-08-09 15:14:30 -04:00
}
EXPORT_SYMBOL_GPL ( sunrpc_cache_register_pipefs ) ;
void sunrpc_cache_unregister_pipefs ( struct cache_detail * cd )
{
2017-02-07 21:47:16 +08:00
if ( cd - > pipefs ) {
rpc_remove_cache_dir ( cd - > pipefs ) ;
cd - > pipefs = NULL ;
}
2009-08-09 15:14:30 -04:00
}
EXPORT_SYMBOL_GPL ( sunrpc_cache_unregister_pipefs ) ;
2016-12-22 12:38:06 -05:00
void sunrpc_cache_unhash ( struct cache_detail * cd , struct cache_head * h )
{
2018-10-01 10:41:52 -04:00
spin_lock ( & cd - > hash_lock ) ;
2016-12-22 12:38:06 -05:00
if ( ! hlist_unhashed ( & h - > cache_list ) ) {
2020-01-06 13:40:35 -05:00
sunrpc_begin_cache_remove_entry ( h , cd ) ;
2018-10-01 10:41:52 -04:00
spin_unlock ( & cd - > hash_lock ) ;
2020-01-06 13:40:35 -05:00
sunrpc_end_cache_remove_entry ( h , cd ) ;
2016-12-22 12:38:06 -05:00
} else
2018-10-01 10:41:52 -04:00
spin_unlock ( & cd - > hash_lock ) ;
2016-12-22 12:38:06 -05:00
}
EXPORT_SYMBOL_GPL ( sunrpc_cache_unhash ) ;