2019-05-31 11:09:56 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2006-01-16 19:50:04 +03:00
/*
* Copyright ( C ) Sistina Software , Inc . 1997 - 2003 All rights reserved .
2007-12-12 03:51:25 +03:00
* Copyright ( C ) 2004 - 2007 Red Hat , Inc . All rights reserved .
2006-01-16 19:50:04 +03:00
*/
/*
* Quota change tags are associated with each transaction that allocates or
* deallocates space . Those changes are accumulated locally to each node ( in a
* per - node file ) and then are periodically synced to the quota file . This
* avoids the bottleneck of constantly touching the quota file , but introduces
* fuzziness in the current usage value of IDs that are being used on different
* nodes in the cluster simultaneously . So , it is possible for a user on
* multiple nodes to overrun their quota , but that overrun is controlable .
2009-09-15 23:42:56 +04:00
* Since quota tags are part of transactions , there is no need for a quota check
2006-01-16 19:50:04 +03:00
* program to be run on node crashes or anything like that .
*
* There are couple of knobs that let the administrator manage the quota
* fuzziness . " quota_quantum " sets the maximum time a quota change can be
* sitting on one node before being synced to the quota file . ( The default is
* 60 seconds . ) Another knob , " quota_scale " controls how quickly the frequency
* of quota file syncs increases as the user moves closer to their limit . The
* more frequent the syncs , the more accurate the quota enforcement , but that
* means that there is more contention between the nodes for the quota file .
* The default value is one . This sets the maximum theoretical quota overrun
* ( with infinite node with infinite bandwidth ) to twice the user ' s limit . ( In
* practice , the maximum overrun you see should be much less . ) A " quota_scale "
* number greater than one makes quota syncs more frequent and reduces the
* maximum overrun . Numbers less than one ( but greater than zero ) make quota
* syncs less frequent .
*
* GFS quotas also use per - ID Lock Value Blocks ( LVBs ) to cache the contents of
* the quota file , so it is not being constantly read .
*/
2014-03-07 00:10:45 +04:00
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2006-01-16 19:50:04 +03:00
# include <linux/sched.h>
# include <linux/slab.h>
2011-05-25 04:12:27 +04:00
# include <linux/mm.h>
2006-01-16 19:50:04 +03:00
# include <linux/spinlock.h>
# include <linux/completion.h>
# include <linux/buffer_head.h>
# include <linux/sort.h>
2006-02-08 14:50:51 +03:00
# include <linux/fs.h>
2006-10-02 19:38:25 +04:00
# include <linux/bio.h>
2006-02-28 01:23:27 +03:00
# include <linux/gfs2_ondisk.h>
2008-11-17 17:25:37 +03:00
# include <linux/kthread.h>
# include <linux/freezer.h>
2009-09-28 15:49:15 +04:00
# include <linux/quota.h>
2009-09-11 18:57:27 +04:00
# include <linux/dqblk_xfs.h>
2013-11-01 22:52:06 +04:00
# include <linux/lockref.h>
2013-11-04 14:15:08 +04:00
# include <linux/list_lru.h>
2013-12-12 14:47:59 +04:00
# include <linux/rcupdate.h>
# include <linux/rculist_bl.h>
# include <linux/bit_spinlock.h>
# include <linux/jhash.h>
2014-01-15 16:57:25 +04:00
# include <linux/vmalloc.h>
2006-01-16 19:50:04 +03:00
# include "gfs2.h"
2006-02-28 01:23:27 +03:00
# include "incore.h"
2006-01-16 19:50:04 +03:00
# include "bmap.h"
# include "glock.h"
# include "glops.h"
# include "log.h"
# include "meta_io.h"
# include "quota.h"
# include "rgrp.h"
# include "super.h"
# include "trans.h"
2006-02-08 14:50:51 +03:00
# include "inode.h"
2006-02-28 01:23:27 +03:00
# include "util.h"
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
# define GFS2_QD_HASH_SHIFT 12
2016-08-02 20:05:27 +03:00
# define GFS2_QD_HASH_SIZE BIT(GFS2_QD_HASH_SHIFT)
2013-12-12 14:47:59 +04:00
# define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1)
/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
2013-12-13 15:46:28 +04:00
/* -> sd_bitmap_lock */
2013-11-01 22:52:08 +04:00
static DEFINE_SPINLOCK ( qd_lock ) ;
2013-11-04 14:15:08 +04:00
struct list_lru gfs2_qd_lru ;
2009-01-08 01:03:37 +03:00
2013-12-12 14:47:59 +04:00
static struct hlist_bl_head qd_hash_table [ GFS2_QD_HASH_SIZE ] ;
static unsigned int gfs2_qd_hash ( const struct gfs2_sbd * sdp ,
const struct kqid qid )
{
unsigned int h ;
h = jhash ( & sdp , sizeof ( struct gfs2_sbd * ) , 0 ) ;
h = jhash ( & qid , sizeof ( struct kqid ) , h ) ;
return h & GFS2_QD_HASH_MASK ;
}
static inline void spin_lock_bucket ( unsigned int hash )
{
hlist_bl_lock ( & qd_hash_table [ hash ] ) ;
}
static inline void spin_unlock_bucket ( unsigned int hash )
{
hlist_bl_unlock ( & qd_hash_table [ hash ] ) ;
}
static void gfs2_qd_dealloc ( struct rcu_head * rcu )
{
struct gfs2_quota_data * qd = container_of ( rcu , struct gfs2_quota_data , qd_rcu ) ;
kmem_cache_free ( gfs2_quotad_cachep , qd ) ;
}
2013-11-04 14:15:08 +04:00
static void gfs2_qd_dispose ( struct list_head * list )
2009-01-08 01:03:37 +03:00
{
struct gfs2_quota_data * qd ;
struct gfs2_sbd * sdp ;
2013-11-04 14:15:08 +04:00
while ( ! list_empty ( list ) ) {
2020-02-03 21:22:45 +03:00
qd = list_first_entry ( list , struct gfs2_quota_data , qd_lru ) ;
2015-03-16 19:52:05 +03:00
sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2009-01-08 01:03:37 +03:00
2013-11-04 14:15:08 +04:00
list_del ( & qd - > qd_lru ) ;
2009-01-08 01:03:37 +03:00
/* Free from the filesystem-specific list */
2013-11-04 14:15:08 +04:00
spin_lock ( & qd_lock ) ;
2009-01-08 01:03:37 +03:00
list_del ( & qd - > qd_list ) ;
2013-11-04 14:15:08 +04:00
spin_unlock ( & qd_lock ) ;
2009-01-08 01:03:37 +03:00
2013-12-12 14:47:59 +04:00
spin_lock_bucket ( qd - > qd_hash ) ;
hlist_bl_del_rcu ( & qd - > qd_hlist ) ;
spin_unlock_bucket ( qd - > qd_hash ) ;
2009-01-08 01:03:37 +03:00
gfs2_assert_warn ( sdp , ! qd - > qd_change ) ;
gfs2_assert_warn ( sdp , ! qd - > qd_slot_count ) ;
gfs2_assert_warn ( sdp , ! qd - > qd_bh_count ) ;
2009-01-12 13:43:39 +03:00
gfs2_glock_put ( qd - > qd_gl ) ;
2009-01-08 01:03:37 +03:00
atomic_dec ( & sdp - > sd_quota_count ) ;
/* Delete it from the common reclaim list */
2013-12-12 14:47:59 +04:00
call_rcu ( & qd - > qd_rcu , gfs2_qd_dealloc ) ;
2009-01-08 01:03:37 +03:00
}
2013-11-04 14:15:08 +04:00
}
2015-02-13 01:59:35 +03:00
static enum lru_status gfs2_qd_isolate ( struct list_head * item ,
struct list_lru_one * lru , spinlock_t * lru_lock , void * arg )
2013-11-04 14:15:08 +04:00
{
struct list_head * dispose = arg ;
struct gfs2_quota_data * qd = list_entry ( item , struct gfs2_quota_data , qd_lru ) ;
if ( ! spin_trylock ( & qd - > qd_lockref . lock ) )
return LRU_SKIP ;
if ( qd - > qd_lockref . count = = 0 ) {
lockref_mark_dead ( & qd - > qd_lockref ) ;
2015-02-13 01:59:35 +03:00
list_lru_isolate_move ( lru , & qd - > qd_lru , dispose ) ;
2013-11-04 14:15:08 +04:00
}
spin_unlock ( & qd - > qd_lockref . lock ) ;
return LRU_REMOVED ;
}
static unsigned long gfs2_qd_shrink_scan ( struct shrinker * shrink ,
struct shrink_control * sc )
{
LIST_HEAD ( dispose ) ;
unsigned long freed ;
if ( ! ( sc - > gfp_mask & __GFP_FS ) )
return SHRINK_STOP ;
list_lru: introduce list_lru_shrink_{count,walk}
Kmem accounting of memcg is unusable now, because it lacks slab shrinker
support. That means when we hit the limit we will get ENOMEM w/o any
chance to recover. What we should do then is to call shrink_slab, which
would reclaim old inode/dentry caches from this cgroup. This is what
this patch set is intended to do.
Basically, it does two things. First, it introduces the notion of
per-memcg slab shrinker. A shrinker that wants to reclaim objects per
cgroup should mark itself as SHRINKER_MEMCG_AWARE. Then it will be
passed the memory cgroup to scan from in shrink_control->memcg. For
such shrinkers shrink_slab iterates over the whole cgroup subtree under
the target cgroup and calls the shrinker for each kmem-active memory
cgroup.
Secondly, this patch set makes the list_lru structure per-memcg. It's
done transparently to list_lru users - everything they have to do is to
tell list_lru_init that they want memcg-aware list_lru. Then the
list_lru will automatically distribute objects among per-memcg lists
basing on which cgroup the object is accounted to. This way to make FS
shrinkers (icache, dcache) memcg-aware we only need to make them use
memcg-aware list_lru, and this is what this patch set does.
As before, this patch set only enables per-memcg kmem reclaim when the
pressure goes from memory.limit, not from memory.kmem.limit. Handling
memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and
it is still unclear whether we will have this knob in the unified
hierarchy.
This patch (of 9):
NUMA aware slab shrinkers use the list_lru structure to distribute
objects coming from different NUMA nodes to different lists. Whenever
such a shrinker needs to count or scan objects from a particular node,
it issues commands like this:
count = list_lru_count_node(lru, sc->nid);
freed = list_lru_walk_node(lru, sc->nid, isolate_func,
isolate_arg, &sc->nr_to_scan);
where sc is an instance of the shrink_control structure passed to it
from vmscan.
To simplify this, let's add special list_lru functions to be used by
shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which
consolidate the nid and nr_to_scan arguments in the shrink_control
structure.
This will also allow us to avoid patching shrinkers that use list_lru
when we make shrink_slab() per-memcg - all we will have to do is extend
the shrink_control structure to include the target memcg and make
list_lru_shrink_{count,walk} handle this appropriately.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-13 01:58:47 +03:00
freed = list_lru_shrink_walk ( & gfs2_qd_lru , sc ,
gfs2_qd_isolate , & dispose ) ;
2013-11-04 14:15:08 +04:00
gfs2_qd_dispose ( & dispose ) ;
2013-08-28 04:18:09 +04:00
return freed ;
}
2009-01-08 01:03:37 +03:00
2013-11-04 14:15:08 +04:00
static unsigned long gfs2_qd_shrink_count ( struct shrinker * shrink ,
struct shrink_control * sc )
2013-08-28 04:18:09 +04:00
{
list_lru: introduce list_lru_shrink_{count,walk}
Kmem accounting of memcg is unusable now, because it lacks slab shrinker
support. That means when we hit the limit we will get ENOMEM w/o any
chance to recover. What we should do then is to call shrink_slab, which
would reclaim old inode/dentry caches from this cgroup. This is what
this patch set is intended to do.
Basically, it does two things. First, it introduces the notion of
per-memcg slab shrinker. A shrinker that wants to reclaim objects per
cgroup should mark itself as SHRINKER_MEMCG_AWARE. Then it will be
passed the memory cgroup to scan from in shrink_control->memcg. For
such shrinkers shrink_slab iterates over the whole cgroup subtree under
the target cgroup and calls the shrinker for each kmem-active memory
cgroup.
Secondly, this patch set makes the list_lru structure per-memcg. It's
done transparently to list_lru users - everything they have to do is to
tell list_lru_init that they want memcg-aware list_lru. Then the
list_lru will automatically distribute objects among per-memcg lists
basing on which cgroup the object is accounted to. This way to make FS
shrinkers (icache, dcache) memcg-aware we only need to make them use
memcg-aware list_lru, and this is what this patch set does.
As before, this patch set only enables per-memcg kmem reclaim when the
pressure goes from memory.limit, not from memory.kmem.limit. Handling
memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and
it is still unclear whether we will have this knob in the unified
hierarchy.
This patch (of 9):
NUMA aware slab shrinkers use the list_lru structure to distribute
objects coming from different NUMA nodes to different lists. Whenever
such a shrinker needs to count or scan objects from a particular node,
it issues commands like this:
count = list_lru_count_node(lru, sc->nid);
freed = list_lru_walk_node(lru, sc->nid, isolate_func,
isolate_arg, &sc->nr_to_scan);
where sc is an instance of the shrink_control structure passed to it
from vmscan.
To simplify this, let's add special list_lru functions to be used by
shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which
consolidate the nid and nr_to_scan arguments in the shrink_control
structure.
This will also allow us to avoid patching shrinkers that use list_lru
when we make shrink_slab() per-memcg - all we will have to do is extend
the shrink_control structure to include the target memcg and make
list_lru_shrink_{count,walk} handle this appropriately.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-13 01:58:47 +03:00
return vfs_pressure_ratio ( list_lru_shrink_count ( & gfs2_qd_lru , sc ) ) ;
2009-01-08 01:03:37 +03:00
}
2013-11-04 14:15:08 +04:00
struct shrinker gfs2_qd_shrinker = {
. count_objects = gfs2_qd_shrink_count ,
. scan_objects = gfs2_qd_shrink_scan ,
. seeks = DEFAULT_SEEKS ,
. flags = SHRINKER_NUMA_AWARE ,
} ;
2013-02-01 06:33:38 +04:00
static u64 qd2index ( struct gfs2_quota_data * qd )
{
2013-02-01 07:52:08 +04:00
struct kqid qid = qd - > qd_id ;
return ( 2 * ( u64 ) from_kqid ( & init_user_ns , qid ) ) +
2013-05-10 19:59:18 +04:00
( ( qid . type = = USRQUOTA ) ? 0 : 1 ) ;
2013-02-01 06:33:38 +04:00
}
2006-09-04 20:49:07 +04:00
static u64 qd2offset ( struct gfs2_quota_data * qd )
2006-01-16 19:50:04 +03:00
{
2006-09-04 20:49:07 +04:00
u64 offset ;
2006-01-16 19:50:04 +03:00
2013-02-01 06:33:38 +04:00
offset = qd2index ( qd ) ;
2006-01-16 19:50:04 +03:00
offset * = sizeof ( struct gfs2_quota ) ;
return offset ;
}
2013-12-12 14:47:59 +04:00
static struct gfs2_quota_data * qd_alloc ( unsigned hash , struct gfs2_sbd * sdp , struct kqid qid )
2006-01-16 19:50:04 +03:00
{
struct gfs2_quota_data * qd ;
int error ;
2008-11-17 17:25:37 +03:00
qd = kmem_cache_zalloc ( gfs2_quotad_cachep , GFP_NOFS ) ;
2006-01-16 19:50:04 +03:00
if ( ! qd )
2013-12-12 14:47:59 +04:00
return NULL ;
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
qd - > qd_sbd = sdp ;
2013-11-01 22:52:06 +04:00
qd - > qd_lockref . count = 1 ;
spin_lock_init ( & qd - > qd_lockref . lock ) ;
2013-02-01 07:52:08 +04:00
qd - > qd_id = qid ;
2006-01-16 19:50:04 +03:00
qd - > qd_slot = - 1 ;
2013-11-04 14:15:08 +04:00
INIT_LIST_HEAD ( & qd - > qd_lru ) ;
2013-12-12 14:47:59 +04:00
qd - > qd_hash = hash ;
2006-01-16 19:50:04 +03:00
2013-02-01 06:33:38 +04:00
error = gfs2_glock_get ( sdp , qd2index ( qd ) ,
2006-01-16 19:50:04 +03:00
& gfs2_quota_glops , CREATE , & qd - > qd_gl ) ;
if ( error )
goto fail ;
2013-12-12 14:47:59 +04:00
return qd ;
2006-01-16 19:50:04 +03:00
2006-09-04 20:04:26 +04:00
fail :
2008-11-17 17:25:37 +03:00
kmem_cache_free ( gfs2_quotad_cachep , qd ) ;
2013-12-12 14:47:59 +04:00
return NULL ;
2006-01-16 19:50:04 +03:00
}
2013-12-12 14:47:59 +04:00
static struct gfs2_quota_data * gfs2_qd_search_bucket ( unsigned int hash ,
const struct gfs2_sbd * sdp ,
struct kqid qid )
2006-01-16 19:50:04 +03:00
{
2013-12-12 14:47:59 +04:00
struct gfs2_quota_data * qd ;
struct hlist_bl_node * h ;
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
hlist_bl_for_each_entry_rcu ( qd , h , & qd_hash_table [ hash ] , qd_hlist ) {
if ( ! qid_eq ( qd - > qd_id , qid ) )
continue ;
if ( qd - > qd_sbd ! = sdp )
continue ;
if ( lockref_get_not_dead ( & qd - > qd_lockref ) ) {
list_lru_del ( & gfs2_qd_lru , & qd - > qd_lru ) ;
return qd ;
2006-01-16 19:50:04 +03:00
}
2013-12-12 14:47:59 +04:00
}
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
return NULL ;
}
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
static int qd_get ( struct gfs2_sbd * sdp , struct kqid qid ,
struct gfs2_quota_data * * qdp )
{
struct gfs2_quota_data * qd , * new_qd ;
unsigned int hash = gfs2_qd_hash ( sdp , qid ) ;
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
rcu_read_lock ( ) ;
* qdp = qd = gfs2_qd_search_bucket ( hash , sdp , qid ) ;
rcu_read_unlock ( ) ;
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
if ( qd )
return 0 ;
new_qd = qd_alloc ( hash , sdp , qid ) ;
if ( ! new_qd )
return - ENOMEM ;
spin_lock ( & qd_lock ) ;
spin_lock_bucket ( hash ) ;
* qdp = qd = gfs2_qd_search_bucket ( hash , sdp , qid ) ;
if ( qd = = NULL ) {
* qdp = new_qd ;
list_add ( & new_qd - > qd_list , & sdp - > sd_quota_list ) ;
hlist_bl_add_head_rcu ( & new_qd - > qd_hlist , & qd_hash_table [ hash ] ) ;
atomic_inc ( & sdp - > sd_quota_count ) ;
}
spin_unlock_bucket ( hash ) ;
spin_unlock ( & qd_lock ) ;
if ( qd ) {
gfs2_glock_put ( new_qd - > qd_gl ) ;
kmem_cache_free ( gfs2_quotad_cachep , new_qd ) ;
2006-01-16 19:50:04 +03:00
}
2013-12-12 14:47:59 +04:00
return 0 ;
2006-01-16 19:50:04 +03:00
}
2013-12-12 14:47:59 +04:00
2006-01-16 19:50:04 +03:00
static void qd_hold ( struct gfs2_quota_data * qd )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2013-11-01 22:52:06 +04:00
gfs2_assert ( sdp , ! __lockref_is_dead ( & qd - > qd_lockref ) ) ;
lockref_get ( & qd - > qd_lockref ) ;
2006-01-16 19:50:04 +03:00
}
static void qd_put ( struct gfs2_quota_data * qd )
{
2013-11-04 14:15:08 +04:00
if ( lockref_put_or_lock ( & qd - > qd_lockref ) )
return ;
2013-11-01 22:52:06 +04:00
2013-11-04 14:15:08 +04:00
qd - > qd_lockref . count = 0 ;
list_lru_add ( & gfs2_qd_lru , & qd - > qd_lru ) ;
spin_unlock ( & qd - > qd_lockref . lock ) ;
2013-11-01 22:52:06 +04:00
2006-01-16 19:50:04 +03:00
}
static int slot_get ( struct gfs2_quota_data * qd )
{
2013-12-12 21:29:32 +04:00
struct gfs2_sbd * sdp = qd - > qd_sbd ;
unsigned int bit ;
int error = 0 ;
2006-01-16 19:50:04 +03:00
2013-12-13 15:46:28 +04:00
spin_lock ( & sdp - > sd_bitmap_lock ) ;
2013-12-12 21:29:32 +04:00
if ( qd - > qd_slot_count ! = 0 )
goto out ;
2006-01-16 19:50:04 +03:00
2013-12-12 21:29:32 +04:00
error = - ENOSPC ;
bit = find_first_zero_bit ( sdp - > sd_quota_bitmap , sdp - > sd_quota_slots ) ;
if ( bit < sdp - > sd_quota_slots ) {
set_bit ( bit , sdp - > sd_quota_bitmap ) ;
qd - > qd_slot = bit ;
2014-03-31 10:19:29 +04:00
error = 0 ;
2013-12-12 21:29:32 +04:00
out :
qd - > qd_slot_count + + ;
2006-01-16 19:50:04 +03:00
}
2013-12-13 15:46:28 +04:00
spin_unlock ( & sdp - > sd_bitmap_lock ) ;
2006-01-16 19:50:04 +03:00
2013-12-12 21:29:32 +04:00
return error ;
2006-01-16 19:50:04 +03:00
}
static void slot_hold ( struct gfs2_quota_data * qd )
{
2013-12-12 21:29:32 +04:00
struct gfs2_sbd * sdp = qd - > qd_sbd ;
2006-01-16 19:50:04 +03:00
2013-12-13 15:46:28 +04:00
spin_lock ( & sdp - > sd_bitmap_lock ) ;
2006-01-16 19:50:04 +03:00
gfs2_assert ( sdp , qd - > qd_slot_count ) ;
qd - > qd_slot_count + + ;
2013-12-13 15:46:28 +04:00
spin_unlock ( & sdp - > sd_bitmap_lock ) ;
2006-01-16 19:50:04 +03:00
}
static void slot_put ( struct gfs2_quota_data * qd )
{
2013-12-12 21:29:32 +04:00
struct gfs2_sbd * sdp = qd - > qd_sbd ;
2006-01-16 19:50:04 +03:00
2013-12-13 15:46:28 +04:00
spin_lock ( & sdp - > sd_bitmap_lock ) ;
2006-01-16 19:50:04 +03:00
gfs2_assert ( sdp , qd - > qd_slot_count ) ;
if ( ! - - qd - > qd_slot_count ) {
2013-12-12 21:29:32 +04:00
BUG_ON ( ! test_and_clear_bit ( qd - > qd_slot , sdp - > sd_quota_bitmap ) ) ;
2006-01-16 19:50:04 +03:00
qd - > qd_slot = - 1 ;
}
2013-12-13 15:46:28 +04:00
spin_unlock ( & sdp - > sd_bitmap_lock ) ;
2006-01-16 19:50:04 +03:00
}
static int bh_get ( struct gfs2_quota_data * qd )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2022-02-11 18:50:36 +03:00
struct inode * inode = sdp - > sd_qc_inode ;
struct gfs2_inode * ip = GFS2_I ( inode ) ;
2006-01-16 19:50:04 +03:00
unsigned int block , offset ;
struct buffer_head * bh ;
2022-02-11 18:50:36 +03:00
struct iomap iomap = { } ;
2006-01-16 19:50:04 +03:00
int error ;
2006-02-21 15:51:39 +03:00
mutex_lock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
if ( qd - > qd_bh_count + + ) {
2006-02-21 15:51:39 +03:00
mutex_unlock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
return 0 ;
}
block = qd - > qd_slot / sdp - > sd_qc_per_block ;
2007-12-12 03:51:25 +03:00
offset = qd - > qd_slot % sdp - > sd_qc_per_block ;
2006-01-16 19:50:04 +03:00
2022-02-11 18:50:36 +03:00
error = gfs2_iomap_get ( inode ,
( loff_t ) block < < inode - > i_blkbits ,
i_blocksize ( inode ) , & iomap ) ;
2006-01-16 19:50:04 +03:00
if ( error )
goto fail ;
2022-02-11 18:50:36 +03:00
error = - ENOENT ;
if ( iomap . type ! = IOMAP_MAPPED )
goto fail ;
error = gfs2_meta_read ( ip - > i_gl , iomap . addr > > inode - > i_blkbits ,
DIO_WAIT , 0 , & bh ) ;
2006-01-16 19:50:04 +03:00
if ( error )
goto fail ;
error = - EIO ;
if ( gfs2_metatype_check ( sdp , bh , GFS2_METATYPE_QC ) )
goto fail_brelse ;
qd - > qd_bh = bh ;
qd - > qd_bh_qc = ( struct gfs2_quota_change * )
( bh - > b_data + sizeof ( struct gfs2_meta_header ) +
offset * sizeof ( struct gfs2_quota_change ) ) ;
2007-02-20 08:03:29 +03:00
mutex_unlock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
return 0 ;
2006-09-04 20:04:26 +04:00
fail_brelse :
2006-01-16 19:50:04 +03:00
brelse ( bh ) ;
2006-09-04 20:04:26 +04:00
fail :
2006-01-16 19:50:04 +03:00
qd - > qd_bh_count - - ;
2006-02-21 15:51:39 +03:00
mutex_unlock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
return error ;
}
static void bh_put ( struct gfs2_quota_data * qd )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2006-01-16 19:50:04 +03:00
2006-02-21 15:51:39 +03:00
mutex_lock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
gfs2_assert ( sdp , qd - > qd_bh_count ) ;
if ( ! - - qd - > qd_bh_count ) {
brelse ( qd - > qd_bh ) ;
qd - > qd_bh = NULL ;
qd - > qd_bh_qc = NULL ;
}
2006-02-21 15:51:39 +03:00
mutex_unlock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
}
2013-10-04 14:14:46 +04:00
static int qd_check_sync ( struct gfs2_sbd * sdp , struct gfs2_quota_data * qd ,
u64 * sync_gen )
{
if ( test_bit ( QDF_LOCKED , & qd - > qd_flags ) | |
! test_bit ( QDF_CHANGE , & qd - > qd_flags ) | |
( sync_gen & & ( qd - > qd_sync_gen > = * sync_gen ) ) )
return 0 ;
2013-11-04 14:15:08 +04:00
if ( ! lockref_get_not_dead ( & qd - > qd_lockref ) )
return 0 ;
2013-10-04 14:14:46 +04:00
2013-11-04 14:15:08 +04:00
list_move_tail ( & qd - > qd_list , & sdp - > sd_quota_list ) ;
2013-10-04 14:14:46 +04:00
set_bit ( QDF_LOCKED , & qd - > qd_flags ) ;
qd - > qd_change_sync = qd - > qd_change ;
2013-12-13 15:46:28 +04:00
slot_hold ( qd ) ;
2013-10-04 14:14:46 +04:00
return 1 ;
}
2006-01-16 19:50:04 +03:00
static int qd_fish ( struct gfs2_sbd * sdp , struct gfs2_quota_data * * qdp )
{
2022-04-01 01:38:57 +03:00
struct gfs2_quota_data * qd = NULL , * iter ;
2006-01-16 19:50:04 +03:00
int error ;
* qdp = NULL ;
2017-07-17 10:45:34 +03:00
if ( sb_rdonly ( sdp - > sd_vfs ) )
2006-01-16 19:50:04 +03:00
return 0 ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
2022-04-01 01:38:57 +03:00
list_for_each_entry ( iter , & sdp - > sd_quota_list , qd_list ) {
if ( qd_check_sync ( sdp , iter , & sdp - > sd_quota_sync_gen ) ) {
qd = iter ;
2013-10-04 14:14:46 +04:00
break ;
2022-04-01 01:38:57 +03:00
}
2006-01-16 19:50:04 +03:00
}
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
if ( qd ) {
gfs2_assert_warn ( sdp , qd - > qd_change_sync ) ;
error = bh_get ( qd ) ;
if ( error ) {
clear_bit ( QDF_LOCKED , & qd - > qd_flags ) ;
slot_put ( qd ) ;
qd_put ( qd ) ;
return error ;
}
}
* qdp = qd ;
return 0 ;
}
static void qd_unlock ( struct gfs2_quota_data * qd )
{
2015-03-16 19:52:05 +03:00
gfs2_assert_warn ( qd - > qd_gl - > gl_name . ln_sbd ,
2006-02-27 20:00:42 +03:00
test_bit ( QDF_LOCKED , & qd - > qd_flags ) ) ;
2006-01-16 19:50:04 +03:00
clear_bit ( QDF_LOCKED , & qd - > qd_flags ) ;
bh_put ( qd ) ;
slot_put ( qd ) ;
qd_put ( qd ) ;
}
2013-02-01 07:35:56 +04:00
static int qdsb_get ( struct gfs2_sbd * sdp , struct kqid qid ,
2006-01-16 19:50:04 +03:00
struct gfs2_quota_data * * qdp )
{
int error ;
2013-02-01 07:52:08 +04:00
error = qd_get ( sdp , qid , qdp ) ;
2006-01-16 19:50:04 +03:00
if ( error )
return error ;
error = slot_get ( * qdp ) ;
if ( error )
goto fail ;
error = bh_get ( * qdp ) ;
if ( error )
goto fail_slot ;
return 0 ;
2006-09-04 20:04:26 +04:00
fail_slot :
2006-01-16 19:50:04 +03:00
slot_put ( * qdp ) ;
2006-09-04 20:04:26 +04:00
fail :
2006-01-16 19:50:04 +03:00
qd_put ( * qdp ) ;
return error ;
}
static void qdsb_put ( struct gfs2_quota_data * qd )
{
bh_put ( qd ) ;
slot_put ( qd ) ;
qd_put ( qd ) ;
}
2015-10-26 18:40:28 +03:00
/**
2020-02-27 21:47:53 +03:00
* gfs2_qa_get - make sure we have a quota allocations data structure ,
* if necessary
2015-10-26 18:40:28 +03:00
* @ ip : the inode for this reservation
*/
2020-02-27 21:47:53 +03:00
int gfs2_qa_get ( struct gfs2_inode * ip )
2015-10-26 18:40:28 +03:00
{
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2022-02-11 18:50:08 +03:00
struct inode * inode = & ip - > i_inode ;
2015-10-26 18:40:28 +03:00
if ( sdp - > sd_args . ar_quota = = GFS2_QUOTA_OFF )
return 0 ;
2022-02-11 18:50:08 +03:00
spin_lock ( & inode - > i_lock ) ;
2015-10-26 18:40:28 +03:00
if ( ip - > i_qadata = = NULL ) {
2022-02-11 18:50:08 +03:00
struct gfs2_qadata * tmp ;
spin_unlock ( & inode - > i_lock ) ;
tmp = kmem_cache_zalloc ( gfs2_qadata_cachep , GFP_NOFS ) ;
if ( ! tmp )
return - ENOMEM ;
spin_lock ( & inode - > i_lock ) ;
if ( ip - > i_qadata = = NULL )
ip - > i_qadata = tmp ;
else
kmem_cache_free ( gfs2_qadata_cachep , tmp ) ;
2015-10-26 18:40:28 +03:00
}
2020-02-27 21:47:53 +03:00
ip - > i_qadata - > qa_ref + + ;
2022-02-11 18:50:08 +03:00
spin_unlock ( & inode - > i_lock ) ;
return 0 ;
2015-10-26 18:40:28 +03:00
}
2020-02-27 21:47:53 +03:00
void gfs2_qa_put ( struct gfs2_inode * ip )
2015-10-26 18:40:28 +03:00
{
2022-02-11 18:50:08 +03:00
struct inode * inode = & ip - > i_inode ;
spin_lock ( & inode - > i_lock ) ;
2020-02-27 21:47:53 +03:00
if ( ip - > i_qadata & & - - ip - > i_qadata - > qa_ref = = 0 ) {
2015-10-26 18:40:28 +03:00
kmem_cache_free ( gfs2_qadata_cachep , ip - > i_qadata ) ;
ip - > i_qadata = NULL ;
}
2022-02-11 18:50:08 +03:00
spin_unlock ( & inode - > i_lock ) ;
2015-10-26 18:40:28 +03:00
}
2013-02-01 08:27:54 +04:00
int gfs2_quota_hold ( struct gfs2_inode * ip , kuid_t uid , kgid_t gid )
2006-01-16 19:50:04 +03:00
{
2006-06-14 23:32:57 +04:00
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2012-05-18 17:28:23 +04:00
struct gfs2_quota_data * * qd ;
2006-01-16 19:50:04 +03:00
int error ;
2015-10-26 18:40:28 +03:00
if ( sdp - > sd_args . ar_quota = = GFS2_QUOTA_OFF )
return 0 ;
2020-02-27 21:47:53 +03:00
error = gfs2_qa_get ( ip ) ;
if ( error )
return error ;
2012-05-18 17:28:23 +04:00
2015-10-26 18:40:28 +03:00
qd = ip - > i_qadata - > qa_qd ;
2012-05-18 17:28:23 +04:00
2015-10-26 18:40:28 +03:00
if ( gfs2_assert_warn ( sdp , ! ip - > i_qadata - > qa_qd_num ) | |
2020-02-27 21:47:53 +03:00
gfs2_assert_warn ( sdp , ! test_bit ( GIF_QD_LOCKED , & ip - > i_flags ) ) ) {
error = - EIO ;
goto out ;
}
2006-01-16 19:50:04 +03:00
2013-02-01 07:35:56 +04:00
error = qdsb_get ( sdp , make_kqid_uid ( ip - > i_inode . i_uid ) , qd ) ;
2006-01-16 19:50:04 +03:00
if ( error )
2020-02-27 21:47:53 +03:00
goto out_unhold ;
2015-10-26 18:40:28 +03:00
ip - > i_qadata - > qa_qd_num + + ;
2006-01-16 19:50:04 +03:00
qd + + ;
2013-02-01 07:35:56 +04:00
error = qdsb_get ( sdp , make_kqid_gid ( ip - > i_inode . i_gid ) , qd ) ;
2006-01-16 19:50:04 +03:00
if ( error )
2020-02-27 21:47:53 +03:00
goto out_unhold ;
2015-10-26 18:40:28 +03:00
ip - > i_qadata - > qa_qd_num + + ;
2006-01-16 19:50:04 +03:00
qd + + ;
2013-02-01 09:56:13 +04:00
if ( ! uid_eq ( uid , NO_UID_QUOTA_CHANGE ) & &
! uid_eq ( uid , ip - > i_inode . i_uid ) ) {
2013-02-01 07:35:56 +04:00
error = qdsb_get ( sdp , make_kqid_uid ( uid ) , qd ) ;
2006-01-16 19:50:04 +03:00
if ( error )
2020-02-27 21:47:53 +03:00
goto out_unhold ;
2015-10-26 18:40:28 +03:00
ip - > i_qadata - > qa_qd_num + + ;
2006-01-16 19:50:04 +03:00
qd + + ;
}
2013-02-01 09:56:13 +04:00
if ( ! gid_eq ( gid , NO_GID_QUOTA_CHANGE ) & &
! gid_eq ( gid , ip - > i_inode . i_gid ) ) {
2013-02-01 07:35:56 +04:00
error = qdsb_get ( sdp , make_kqid_gid ( gid ) , qd ) ;
2006-01-16 19:50:04 +03:00
if ( error )
2020-02-27 21:47:53 +03:00
goto out_unhold ;
2015-10-26 18:40:28 +03:00
ip - > i_qadata - > qa_qd_num + + ;
2006-01-16 19:50:04 +03:00
qd + + ;
}
2020-02-27 21:47:53 +03:00
out_unhold :
2006-01-16 19:50:04 +03:00
if ( error )
gfs2_quota_unhold ( ip ) ;
2020-02-27 21:47:53 +03:00
out :
2006-01-16 19:50:04 +03:00
return error ;
}
void gfs2_quota_unhold ( struct gfs2_inode * ip )
{
2006-06-14 23:32:57 +04:00
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2015-07-24 17:45:43 +03:00
u32 x ;
2006-01-16 19:50:04 +03:00
2015-10-26 18:40:28 +03:00
if ( ip - > i_qadata = = NULL )
2012-05-18 17:28:23 +04:00
return ;
2020-02-27 21:47:53 +03:00
2006-01-16 19:50:04 +03:00
gfs2_assert_warn ( sdp , ! test_bit ( GIF_QD_LOCKED , & ip - > i_flags ) ) ;
2015-10-26 18:40:28 +03:00
for ( x = 0 ; x < ip - > i_qadata - > qa_qd_num ; x + + ) {
qdsb_put ( ip - > i_qadata - > qa_qd [ x ] ) ;
ip - > i_qadata - > qa_qd [ x ] = NULL ;
2006-01-16 19:50:04 +03:00
}
2015-10-26 18:40:28 +03:00
ip - > i_qadata - > qa_qd_num = 0 ;
2020-02-27 21:47:53 +03:00
gfs2_qa_put ( ip ) ;
2006-01-16 19:50:04 +03:00
}
static int sort_qd ( const void * a , const void * b )
{
2006-09-05 23:17:12 +04:00
const struct gfs2_quota_data * qd_a = * ( const struct gfs2_quota_data * * ) a ;
const struct gfs2_quota_data * qd_b = * ( const struct gfs2_quota_data * * ) b ;
2006-01-16 19:50:04 +03:00
2013-02-01 07:52:08 +04:00
if ( qid_lt ( qd_a - > qd_id , qd_b - > qd_id ) )
2006-09-05 23:17:12 +04:00
return - 1 ;
2013-02-01 07:52:08 +04:00
if ( qid_lt ( qd_b - > qd_id , qd_a - > qd_id ) )
2006-09-05 23:17:12 +04:00
return 1 ;
return 0 ;
2006-01-16 19:50:04 +03:00
}
2006-09-04 20:49:07 +04:00
static void do_qc ( struct gfs2_quota_data * qd , s64 change )
2006-01-16 19:50:04 +03:00
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2006-06-14 23:32:57 +04:00
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_qc_inode ) ;
2006-01-16 19:50:04 +03:00
struct gfs2_quota_change * qc = qd - > qd_bh_qc ;
2006-09-04 20:49:07 +04:00
s64 x ;
2006-01-16 19:50:04 +03:00
2006-02-21 15:51:39 +03:00
mutex_lock ( & sdp - > sd_quota_mutex ) ;
2012-12-14 16:36:02 +04:00
gfs2_trans_add_meta ( ip - > i_gl , qd - > qd_bh ) ;
2006-01-16 19:50:04 +03:00
if ( ! test_bit ( QDF_CHANGE , & qd - > qd_flags ) ) {
qc - > qc_change = 0 ;
qc - > qc_flags = 0 ;
2013-02-01 07:52:08 +04:00
if ( qd - > qd_id . type = = USRQUOTA )
2006-01-16 19:50:04 +03:00
qc - > qc_flags = cpu_to_be32 ( GFS2_QCF_USER ) ;
2013-02-01 07:52:08 +04:00
qc - > qc_id = cpu_to_be32 ( from_kqid ( & init_user_ns , qd - > qd_id ) ) ;
2006-01-16 19:50:04 +03:00
}
2006-10-14 18:46:30 +04:00
x = be64_to_cpu ( qc - > qc_change ) + change ;
2006-01-16 19:50:04 +03:00
qc - > qc_change = cpu_to_be64 ( x ) ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
qd - > qd_change = x ;
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
if ( ! x ) {
gfs2_assert_warn ( sdp , test_bit ( QDF_CHANGE , & qd - > qd_flags ) ) ;
clear_bit ( QDF_CHANGE , & qd - > qd_flags ) ;
qc - > qc_flags = 0 ;
qc - > qc_id = 0 ;
slot_put ( qd ) ;
qd_put ( qd ) ;
} else if ( ! test_and_set_bit ( QDF_CHANGE , & qd - > qd_flags ) ) {
qd_hold ( qd ) ;
slot_hold ( qd ) ;
}
2006-09-25 17:26:04 +04:00
2015-06-02 19:03:04 +03:00
if ( change < 0 ) /* Reset quiet flag if we freed some blocks */
clear_bit ( QDF_QMSG_QUIET , & qd - > qd_flags ) ;
2006-02-21 15:51:39 +03:00
mutex_unlock ( & sdp - > sd_quota_mutex ) ;
2006-01-16 19:50:04 +03:00
}
2015-06-02 19:02:24 +03:00
static int gfs2_write_buf_to_page ( struct gfs2_inode * ip , unsigned long index ,
unsigned off , void * buf , unsigned bytes )
{
struct inode * inode = & ip - > i_inode ;
struct gfs2_sbd * sdp = GFS2_SB ( inode ) ;
struct address_space * mapping = inode - > i_mapping ;
struct page * page ;
struct buffer_head * bh ;
void * kaddr ;
u64 blk ;
unsigned bsize = sdp - > sd_sb . sb_bsize , bnum = 0 , boff = 0 ;
unsigned to_write = bytes , pg_off = off ;
int done = 0 ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
blk = index < < ( PAGE_SHIFT - sdp - > sd_sb . sb_bsize_shift ) ;
2015-06-02 19:02:24 +03:00
boff = off % bsize ;
page = find_or_create_page ( mapping , index , GFP_NOFS ) ;
if ( ! page )
return - ENOMEM ;
if ( ! page_has_buffers ( page ) )
create_empty_buffers ( page , bsize , 0 ) ;
bh = page_buffers ( page ) ;
while ( ! done ) {
/* Find the beginning block within the page */
if ( pg_off > = ( ( bnum * bsize ) + bsize ) ) {
bh = bh - > b_this_page ;
bnum + + ;
blk + + ;
continue ;
}
if ( ! buffer_mapped ( bh ) ) {
gfs2_block_map ( inode , blk , bh , 1 ) ;
if ( ! buffer_mapped ( bh ) )
goto unlock_out ;
/* If it's a newly allocated disk block, zero it */
if ( buffer_new ( bh ) )
zero_user ( page , bnum * bsize , bh - > b_size ) ;
}
if ( PageUptodate ( page ) )
set_buffer_uptodate ( bh ) ;
if ( ! buffer_uptodate ( bh ) ) {
gfs2: add flag REQ_PRIO for metadata I/O
When gfs2 does metadata I/O, only REQ_META is used as a metadata hint of
the bio. But flag REQ_META is just a hint for block trace, not for block
layer code to handle a bio as metadata request.
For some of metadata I/Os of gfs2, A REQ_PRIO flag on the metadata bio
would be very informative to block layer code. For example, if bcache is
used as a I/O cache for gfs2, it will be possible for bcache code to get
the hint and cache the pre-fetched metadata blocks on cache device. This
behavior may be helpful to improve metadata I/O performance if the
following requests hit the cache.
Here are the locations in gfs2 code where a REQ_PRIO flag should be added,
- All places where REQ_READAHEAD is used, gfs2 code uses this flag for
metadata read ahead.
- In gfs2_meta_rq() where the first metadata block is read in.
- In gfs2_write_buf_to_page(), read in quota metadata blocks to have them
up to date.
These metadata blocks are probably to be accessed again in future, adding
a REQ_PRIO flag may have bcache to keep such metadata in fast cache
device. For system without a cache layer, REQ_PRIO can still provide hint
to block layer to handle metadata requests more properly.
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
2017-07-21 15:48:22 +03:00
ll_rw_block ( REQ_OP_READ , REQ_META | REQ_PRIO , 1 , & bh ) ;
2015-06-02 19:02:24 +03:00
wait_on_buffer ( bh ) ;
if ( ! buffer_uptodate ( bh ) )
goto unlock_out ;
}
2018-06-04 15:50:16 +03:00
if ( gfs2_is_jdata ( ip ) )
gfs2_trans_add_data ( ip - > i_gl , bh ) ;
else
gfs2_ordered_add_inode ( ip ) ;
2015-06-02 19:02:24 +03:00
/* If we need to write to the next block as well */
if ( to_write > ( bsize - boff ) ) {
pg_off + = ( bsize - boff ) ;
to_write - = ( bsize - boff ) ;
boff = pg_off % bsize ;
continue ;
}
done = 1 ;
}
/* Write to the page, now that we have setup the buffer(s) */
kaddr = kmap_atomic ( page ) ;
memcpy ( kaddr + off , buf , bytes ) ;
flush_dcache_page ( page ) ;
kunmap_atomic ( kaddr ) ;
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2015-06-02 19:02:24 +03:00
return 0 ;
unlock_out :
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2015-06-02 19:02:24 +03:00
return - EIO ;
}
static int gfs2_write_disk_quota ( struct gfs2_inode * ip , struct gfs2_quota * qp ,
loff_t loc )
{
unsigned long pg_beg ;
unsigned pg_off , nbytes , overflow = 0 ;
int pg_oflow = 0 , error ;
void * ptr ;
nbytes = sizeof ( struct gfs2_quota ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
pg_beg = loc > > PAGE_SHIFT ;
2019-09-02 19:31:06 +03:00
pg_off = offset_in_page ( loc ) ;
2015-06-02 19:02:24 +03:00
/* If the quota straddles a page boundary, split the write in two */
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
if ( ( pg_off + nbytes ) > PAGE_SIZE ) {
2015-06-02 19:02:24 +03:00
pg_oflow = 1 ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
overflow = ( pg_off + nbytes ) - PAGE_SIZE ;
2015-06-02 19:02:24 +03:00
}
ptr = qp ;
error = gfs2_write_buf_to_page ( ip , pg_beg , pg_off , ptr ,
nbytes - overflow ) ;
/* If there's an overflow, write the remaining bytes to the next page */
if ( ! error & & pg_oflow )
error = gfs2_write_buf_to_page ( ip , pg_beg + 1 , 0 ,
ptr + nbytes - overflow ,
overflow ) ;
return error ;
}
2006-02-08 14:50:51 +03:00
/**
2009-09-15 23:42:56 +04:00
* gfs2_adjust_quota - adjust record of current block usage
* @ ip : The quota inode
* @ loc : Offset of the entry in the quota file
2009-09-23 16:50:49 +04:00
* @ change : The amount of usage change to record
2009-09-15 23:42:56 +04:00
* @ qd : The quota data
2009-09-23 16:50:49 +04:00
* @ fdq : The updated limits to record
2006-02-08 14:50:51 +03:00
*
* This function was mostly borrowed from gfs2_block_truncate_page which was
* in turn mostly borrowed from ext3
2009-09-15 23:42:56 +04:00
*
* Returns : 0 or - ve on error
2006-02-08 14:50:51 +03:00
*/
2009-09-15 23:42:56 +04:00
2006-02-08 14:50:51 +03:00
static int gfs2_adjust_quota ( struct gfs2_inode * ip , loff_t loc ,
2009-09-23 16:50:49 +04:00
s64 change , struct gfs2_quota_data * qd ,
2014-10-09 18:03:13 +04:00
struct qc_dqblk * fdq )
2006-02-08 14:50:51 +03:00
{
2006-06-14 23:32:57 +04:00
struct inode * inode = & ip - > i_inode ;
2010-11-18 19:24:24 +03:00
struct gfs2_sbd * sdp = GFS2_SB ( inode ) ;
2013-06-03 03:53:40 +04:00
struct gfs2_quota q ;
2015-06-02 19:02:24 +03:00
int err ;
2009-09-23 16:50:49 +04:00
u64 size ;
2006-02-08 14:50:51 +03:00
2011-09-19 13:25:49 +04:00
if ( gfs2_is_stuffed ( ip ) ) {
2021-06-17 22:36:50 +03:00
err = gfs2_unstuff_dinode ( ip ) ;
2011-09-19 13:25:49 +04:00
if ( err )
return err ;
}
2010-05-08 01:50:18 +04:00
memset ( & q , 0 , sizeof ( struct gfs2_quota ) ) ;
2012-04-16 19:40:55 +04:00
err = gfs2_internal_read ( ip , ( char * ) & q , & loc , sizeof ( q ) ) ;
2010-05-08 01:50:18 +04:00
if ( err < 0 )
return err ;
2015-06-02 19:02:24 +03:00
loc - = sizeof ( q ) ; /* gfs2_internal_read would've advanced the loc ptr */
2010-05-08 01:50:18 +04:00
err = - EIO ;
2013-06-03 03:53:40 +04:00
be64_add_cpu ( & q . qu_value , change ) ;
2015-06-08 19:20:50 +03:00
if ( ( ( s64 ) be64_to_cpu ( q . qu_value ) ) < 0 )
2015-06-02 19:02:24 +03:00
q . qu_value = 0 ; /* Never go negative on quota usage */
2013-06-03 03:53:40 +04:00
qd - > qd_qb . qb_value = q . qu_value ;
2010-05-08 01:50:18 +04:00
if ( fdq ) {
2014-10-09 18:03:13 +04:00
if ( fdq - > d_fieldmask & QC_SPC_SOFT ) {
q . qu_warn = cpu_to_be64 ( fdq - > d_spc_softlimit > > sdp - > sd_sb . sb_bsize_shift ) ;
2013-06-03 03:53:40 +04:00
qd - > qd_qb . qb_warn = q . qu_warn ;
2010-05-08 01:50:18 +04:00
}
2014-10-09 18:03:13 +04:00
if ( fdq - > d_fieldmask & QC_SPC_HARD ) {
q . qu_limit = cpu_to_be64 ( fdq - > d_spc_hardlimit > > sdp - > sd_sb . sb_bsize_shift ) ;
2013-06-03 03:53:40 +04:00
qd - > qd_qb . qb_limit = q . qu_limit ;
2010-05-08 01:50:18 +04:00
}
2014-10-09 18:03:13 +04:00
if ( fdq - > d_fieldmask & QC_SPACE ) {
q . qu_value = cpu_to_be64 ( fdq - > d_space > > sdp - > sd_sb . sb_bsize_shift ) ;
2013-06-03 03:53:40 +04:00
qd - > qd_qb . qb_value = q . qu_value ;
2010-11-18 19:26:46 +03:00
}
2010-05-08 01:50:18 +04:00
}
2015-06-02 19:02:24 +03:00
err = gfs2_write_disk_quota ( ip , & q , loc ) ;
if ( ! err ) {
size = loc + sizeof ( struct gfs2_quota ) ;
if ( size > inode - > i_size )
i_size_write ( inode , size ) ;
2016-09-14 17:48:04 +03:00
inode - > i_mtime = inode - > i_atime = current_time ( inode ) ;
2015-06-02 19:02:24 +03:00
mark_inode_dirty ( inode ) ;
set_bit ( QDF_REFRESH , & qd - > qd_flags ) ;
2010-05-08 01:50:18 +04:00
}
2009-09-23 16:50:49 +04:00
2006-02-08 14:50:51 +03:00
return err ;
}
2006-01-16 19:50:04 +03:00
static int do_sync ( unsigned int num_qd , struct gfs2_quota_data * * qda )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = ( * qda ) - > qd_gl - > gl_name . ln_sbd ;
2006-06-14 23:32:57 +04:00
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_quota_inode ) ;
2013-10-02 14:13:25 +04:00
struct gfs2_alloc_parms ap = { . aflags = 0 , } ;
2006-01-16 19:50:04 +03:00
unsigned int data_blocks , ind_blocks ;
struct gfs2_holder * ghs , i_gh ;
unsigned int qx , x ;
struct gfs2_quota_data * qd ;
2012-07-30 17:53:19 +04:00
unsigned reserved ;
2006-01-30 21:34:10 +03:00
loff_t offset ;
2008-03-07 02:43:52 +03:00
unsigned int nalloc = 0 , blocks ;
2006-01-16 19:50:04 +03:00
int error ;
2020-02-27 21:47:53 +03:00
error = gfs2_qa_get ( ip ) ;
2012-06-06 14:17:59 +04:00
if ( error )
return error ;
2006-01-16 19:50:04 +03:00
gfs2_write_calc_reserv ( ip , sizeof ( struct gfs2_quota ) ,
& data_blocks , & ind_blocks ) ;
treewide: kmalloc() -> kmalloc_array()
The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
patch replaces cases of:
kmalloc(a * b, gfp)
with:
kmalloc_array(a * b, gfp)
as well as handling cases of:
kmalloc(a * b * c, gfp)
with:
kmalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kmalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kmalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The tools/ directory was manually excluded, since it has its own
implementation of kmalloc().
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kmalloc
+ kmalloc_array
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kmalloc(C1 * C2 * C3, ...)
|
kmalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kmalloc(sizeof(THING) * C2, ...)
|
kmalloc(sizeof(TYPE) * C2, ...)
|
kmalloc(C1 * C2 * C3, ...)
|
kmalloc(C1 * C2, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- (E1) * E2
+ E1, E2
, ...)
|
- kmalloc
+ kmalloc_array
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kmalloc
+ kmalloc_array
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 23:55:00 +03:00
ghs = kmalloc_array ( num_qd , sizeof ( struct gfs2_holder ) , GFP_NOFS ) ;
2020-02-27 21:47:53 +03:00
if ( ! ghs ) {
error = - ENOMEM ;
goto out ;
}
2006-01-16 19:50:04 +03:00
sort ( qda , num_qd , sizeof ( struct gfs2_quota_data * ) , sort_qd , NULL ) ;
2016-01-22 23:40:57 +03:00
inode_lock ( & ip - > i_inode ) ;
2006-01-16 19:50:04 +03:00
for ( qx = 0 ; qx < num_qd ; qx + + ) {
2009-09-15 23:42:56 +04:00
error = gfs2_glock_nq_init ( qda [ qx ] - > qd_gl , LM_ST_EXCLUSIVE ,
2006-01-16 19:50:04 +03:00
GL_NOCACHE , & ghs [ qx ] ) ;
if ( error )
2020-02-27 21:47:53 +03:00
goto out_dq ;
2006-01-16 19:50:04 +03:00
}
error = gfs2_glock_nq_init ( ip - > i_gl , LM_ST_EXCLUSIVE , 0 , & i_gh ) ;
if ( error )
2020-02-27 21:47:53 +03:00
goto out_dq ;
2006-01-16 19:50:04 +03:00
for ( x = 0 ; x < num_qd ; x + + ) {
offset = qd2offset ( qda [ x ] ) ;
2010-06-25 03:21:20 +04:00
if ( gfs2_write_alloc_required ( ip , offset ,
sizeof ( struct gfs2_quota ) ) )
2006-01-16 19:50:04 +03:00
nalloc + + ;
}
2008-03-07 02:43:52 +03:00
/*
* 1 blk for unstuffing inode if stuffed . We add this extra
* block to the reservation unconditionally . If the inode
* doesn ' t need unstuffing , the block will be released to the
* rgrp since it won ' t be allocated during the transaction
*/
2010-05-08 01:50:18 +04:00
/* +3 in the end for unstuffing block, inode size update block
* and another block in case quota straddles page boundary and
* two blocks need to be updated instead of 1 */
blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3 ;
2006-01-16 19:50:04 +03:00
2012-07-30 17:53:19 +04:00
reserved = 1 + ( nalloc * ( data_blocks + ind_blocks ) ) ;
2013-10-02 14:13:25 +04:00
ap . target = reserved ;
error = gfs2_inplace_reserve ( ip , & ap ) ;
2008-03-07 02:43:52 +03:00
if ( error )
goto out_alloc ;
2006-01-16 19:50:04 +03:00
2008-03-07 02:43:52 +03:00
if ( nalloc )
2012-07-30 17:53:19 +04:00
blocks + = gfs2_rg_blocks ( ip , reserved ) + nalloc * ind_blocks + RES_STATFS ;
2008-03-07 02:43:52 +03:00
error = gfs2_trans_begin ( sdp , blocks , 0 ) ;
if ( error )
goto out_ipres ;
2006-01-16 19:50:04 +03:00
for ( x = 0 ; x < num_qd ; x + + ) {
qd = qda [ x ] ;
offset = qd2offset ( qd ) ;
2009-09-23 16:50:49 +04:00
error = gfs2_adjust_quota ( ip , offset , qd - > qd_change_sync , qd , NULL ) ;
2006-02-08 14:50:51 +03:00
if ( error )
2006-01-16 19:50:04 +03:00
goto out_end_trans ;
do_qc ( qd , - qd - > qd_change_sync ) ;
2011-03-08 18:40:42 +03:00
set_bit ( QDF_REFRESH , & qd - > qd_flags ) ;
2006-01-16 19:50:04 +03:00
}
error = 0 ;
2006-09-04 20:04:26 +04:00
out_end_trans :
2006-01-16 19:50:04 +03:00
gfs2_trans_end ( sdp ) ;
2006-09-04 20:04:26 +04:00
out_ipres :
2008-03-07 02:43:52 +03:00
gfs2_inplace_release ( ip ) ;
2006-09-04 20:04:26 +04:00
out_alloc :
2006-01-16 19:50:04 +03:00
gfs2_glock_dq_uninit ( & i_gh ) ;
2020-02-27 21:47:53 +03:00
out_dq :
2006-01-16 19:50:04 +03:00
while ( qx - - )
gfs2_glock_dq_uninit ( & ghs [ qx ] ) ;
2016-01-22 23:40:57 +03:00
inode_unlock ( & ip - > i_inode ) ;
2006-01-16 19:50:04 +03:00
kfree ( ghs ) ;
2018-01-17 02:01:33 +03:00
gfs2_log_flush ( ip - > i_gl - > gl_name . ln_sbd , ip - > i_gl ,
2018-01-08 18:34:17 +03:00
GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC ) ;
2020-02-27 21:47:53 +03:00
out :
gfs2_qa_put ( ip ) ;
2006-01-16 19:50:04 +03:00
return error ;
}
2009-09-23 16:50:49 +04:00
static int update_qd ( struct gfs2_sbd * sdp , struct gfs2_quota_data * qd )
{
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_quota_inode ) ;
struct gfs2_quota q ;
struct gfs2_quota_lvb * qlvb ;
loff_t pos ;
int error ;
memset ( & q , 0 , sizeof ( struct gfs2_quota ) ) ;
pos = qd2offset ( qd ) ;
2012-04-16 19:40:55 +04:00
error = gfs2_internal_read ( ip , ( char * ) & q , & pos , sizeof ( q ) ) ;
2009-09-23 16:50:49 +04:00
if ( error < 0 )
return error ;
2012-11-14 22:47:37 +04:00
qlvb = ( struct gfs2_quota_lvb * ) qd - > qd_gl - > gl_lksb . sb_lvbptr ;
2009-09-23 16:50:49 +04:00
qlvb - > qb_magic = cpu_to_be32 ( GFS2_MAGIC ) ;
qlvb - > __pad = 0 ;
qlvb - > qb_limit = q . qu_limit ;
qlvb - > qb_warn = q . qu_warn ;
qlvb - > qb_value = q . qu_value ;
qd - > qd_qb = * qlvb ;
return 0 ;
}
2006-01-16 19:50:04 +03:00
static int do_glock ( struct gfs2_quota_data * qd , int force_refresh ,
struct gfs2_holder * q_gh )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2006-06-14 23:32:57 +04:00
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_quota_inode ) ;
2006-01-16 19:50:04 +03:00
struct gfs2_holder i_gh ;
int error ;
2006-09-04 20:04:26 +04:00
restart :
2006-01-16 19:50:04 +03:00
error = gfs2_glock_nq_init ( qd - > qd_gl , LM_ST_SHARED , 0 , q_gh ) ;
if ( error )
return error ;
2015-04-08 17:03:56 +03:00
if ( test_and_clear_bit ( QDF_REFRESH , & qd - > qd_flags ) )
force_refresh = FORCE ;
2012-11-14 22:47:37 +04:00
qd - > qd_qb = * ( struct gfs2_quota_lvb * ) qd - > qd_gl - > gl_lksb . sb_lvbptr ;
2006-01-16 19:50:04 +03:00
2006-09-01 19:05:15 +04:00
if ( force_refresh | | qd - > qd_qb . qb_magic ! = cpu_to_be32 ( GFS2_MAGIC ) ) {
2006-01-16 19:50:04 +03:00
gfs2_glock_dq_uninit ( q_gh ) ;
2009-09-11 18:21:56 +04:00
error = gfs2_glock_nq_init ( qd - > qd_gl , LM_ST_EXCLUSIVE ,
GL_NOCACHE , q_gh ) ;
2006-01-16 19:50:04 +03:00
if ( error )
return error ;
2006-09-01 19:05:15 +04:00
error = gfs2_glock_nq_init ( ip - > i_gl , LM_ST_SHARED , 0 , & i_gh ) ;
2006-01-16 19:50:04 +03:00
if ( error )
goto fail ;
2009-09-23 16:50:49 +04:00
error = update_qd ( sdp , qd ) ;
if ( error )
2009-09-15 23:42:56 +04:00
goto fail_gunlock ;
2006-01-16 19:50:04 +03:00
2009-09-23 16:50:49 +04:00
gfs2_glock_dq_uninit ( & i_gh ) ;
2009-09-11 18:21:56 +04:00
gfs2_glock_dq_uninit ( q_gh ) ;
force_refresh = 0 ;
goto restart ;
2006-01-16 19:50:04 +03:00
}
return 0 ;
2006-09-04 20:04:26 +04:00
fail_gunlock :
2006-01-16 19:50:04 +03:00
gfs2_glock_dq_uninit ( & i_gh ) ;
2006-09-04 20:04:26 +04:00
fail :
2006-01-16 19:50:04 +03:00
gfs2_glock_dq_uninit ( q_gh ) ;
return error ;
}
2013-02-01 08:27:54 +04:00
int gfs2_quota_lock ( struct gfs2_inode * ip , kuid_t uid , kgid_t gid )
2006-01-16 19:50:04 +03:00
{
2006-06-14 23:32:57 +04:00
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2011-03-08 18:40:42 +03:00
struct gfs2_quota_data * qd ;
2015-07-24 17:45:43 +03:00
u32 x ;
2006-01-16 19:50:04 +03:00
int error = 0 ;
2020-05-05 19:53:21 +03:00
if ( sdp - > sd_args . ar_quota ! = GFS2_QUOTA_ON )
2006-01-16 19:50:04 +03:00
return 0 ;
2015-10-26 18:40:28 +03:00
error = gfs2_quota_hold ( ip , uid , gid ) ;
if ( error )
return error ;
sort ( ip - > i_qadata - > qa_qd , ip - > i_qadata - > qa_qd_num ,
2012-05-18 17:28:23 +04:00
sizeof ( struct gfs2_quota_data * ) , sort_qd , NULL ) ;
2006-01-16 19:50:04 +03:00
2015-10-26 18:40:28 +03:00
for ( x = 0 ; x < ip - > i_qadata - > qa_qd_num ; x + + ) {
qd = ip - > i_qadata - > qa_qd [ x ] ;
error = do_glock ( qd , NO_FORCE , & ip - > i_qadata - > qa_qd_ghs [ x ] ) ;
2006-01-16 19:50:04 +03:00
if ( error )
break ;
}
if ( ! error )
set_bit ( GIF_QD_LOCKED , & ip - > i_flags ) ;
else {
while ( x - - )
2015-10-26 18:40:28 +03:00
gfs2_glock_dq_uninit ( & ip - > i_qadata - > qa_qd_ghs [ x ] ) ;
2006-01-16 19:50:04 +03:00
gfs2_quota_unhold ( ip ) ;
}
return error ;
}
static int need_sync ( struct gfs2_quota_data * qd )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2006-01-16 19:50:04 +03:00
struct gfs2_tune * gt = & sdp - > sd_tune ;
2006-09-04 20:49:07 +04:00
s64 value ;
2006-01-16 19:50:04 +03:00
unsigned int num , den ;
int do_sync = 1 ;
if ( ! qd - > qd_qb . qb_limit )
return 0 ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
value = qd - > qd_change ;
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
spin_lock ( & gt - > gt_spin ) ;
num = gt - > gt_quota_scale_num ;
den = gt - > gt_quota_scale_den ;
spin_unlock ( & gt - > gt_spin ) ;
if ( value < 0 )
do_sync = 0 ;
2006-09-01 19:05:15 +04:00
else if ( ( s64 ) be64_to_cpu ( qd - > qd_qb . qb_value ) > =
( s64 ) be64_to_cpu ( qd - > qd_qb . qb_limit ) )
2006-01-16 19:50:04 +03:00
do_sync = 0 ;
else {
value * = gfs2_jindex_size ( sdp ) * num ;
2008-07-11 17:39:56 +04:00
value = div_s64 ( value , den ) ;
2006-09-01 19:05:15 +04:00
value + = ( s64 ) be64_to_cpu ( qd - > qd_qb . qb_value ) ;
2006-09-04 20:49:07 +04:00
if ( value < ( s64 ) be64_to_cpu ( qd - > qd_qb . qb_limit ) )
2006-01-16 19:50:04 +03:00
do_sync = 0 ;
}
return do_sync ;
}
void gfs2_quota_unlock ( struct gfs2_inode * ip )
{
2013-10-04 14:31:05 +04:00
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2006-01-16 19:50:04 +03:00
struct gfs2_quota_data * qda [ 4 ] ;
unsigned int count = 0 ;
2015-07-24 17:45:43 +03:00
u32 x ;
2013-10-04 14:31:05 +04:00
int found ;
2006-01-16 19:50:04 +03:00
if ( ! test_and_clear_bit ( GIF_QD_LOCKED , & ip - > i_flags ) )
2020-05-05 19:55:03 +03:00
return ;
2006-01-16 19:50:04 +03:00
2015-10-26 18:40:28 +03:00
for ( x = 0 ; x < ip - > i_qadata - > qa_qd_num ; x + + ) {
2006-01-16 19:50:04 +03:00
struct gfs2_quota_data * qd ;
int sync ;
2015-10-26 18:40:28 +03:00
qd = ip - > i_qadata - > qa_qd [ x ] ;
2006-01-16 19:50:04 +03:00
sync = need_sync ( qd ) ;
2015-10-26 18:40:28 +03:00
gfs2_glock_dq_uninit ( & ip - > i_qadata - > qa_qd_ghs [ x ] ) ;
2013-10-04 14:31:05 +04:00
if ( ! sync )
continue ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2013-10-04 14:31:05 +04:00
found = qd_check_sync ( sdp , qd , NULL ) ;
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2013-10-04 14:31:05 +04:00
if ( ! found )
continue ;
gfs2_assert_warn ( sdp , qd - > qd_change_sync ) ;
if ( bh_get ( qd ) ) {
clear_bit ( QDF_LOCKED , & qd - > qd_flags ) ;
slot_put ( qd ) ;
qd_put ( qd ) ;
continue ;
}
2006-01-16 19:50:04 +03:00
2013-10-04 14:31:05 +04:00
qda [ count + + ] = qd ;
2006-01-16 19:50:04 +03:00
}
if ( count ) {
do_sync ( count , qda ) ;
for ( x = 0 ; x < count ; x + + )
qd_unlock ( qda [ x ] ) ;
}
gfs2_quota_unhold ( ip ) ;
}
# define MAX_LINE 256
static int print_message ( struct gfs2_quota_data * qd , char * type )
{
2015-03-16 19:52:05 +03:00
struct gfs2_sbd * sdp = qd - > qd_gl - > gl_name . ln_sbd ;
2006-01-16 19:50:04 +03:00
2014-03-07 00:10:46 +04:00
fs_info ( sdp , " quota %s for %s %u \n " ,
type ,
2014-03-07 00:10:45 +04:00
( qd - > qd_id . type = = USRQUOTA ) ? " user " : " group " ,
from_kqid ( & init_user_ns , qd - > qd_id ) ) ;
2006-01-16 19:50:04 +03:00
return 0 ;
}
2015-03-18 20:04:37 +03:00
/**
* gfs2_quota_check - check if allocating new blocks will exceed quota
* @ ip : The inode for which this check is being performed
* @ uid : The uid to check against
* @ gid : The gid to check against
* @ ap : The allocation parameters . ap - > target contains the requested
* blocks . ap - > min_target , if set , contains the minimum blks
* requested .
*
* Returns : 0 on success .
* min_req = ap - > min_target ? ap - > min_target : ap - > target ;
2018-10-02 12:22:41 +03:00
* quota must allow at least min_req blks for success and
2015-03-18 20:04:37 +03:00
* ap - > allowed is set to the number of blocks allowed
*
* - EDQUOT otherwise , quota violation . ap - > allowed is set to number
* of blocks available .
*/
2015-03-18 20:03:41 +03:00
int gfs2_quota_check ( struct gfs2_inode * ip , kuid_t uid , kgid_t gid ,
struct gfs2_alloc_parms * ap )
2006-01-16 19:50:04 +03:00
{
2006-06-14 23:32:57 +04:00
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2006-01-16 19:50:04 +03:00
struct gfs2_quota_data * qd ;
2015-03-18 20:04:37 +03:00
s64 value , warn , limit ;
2015-07-24 17:45:43 +03:00
u32 x ;
2006-01-16 19:50:04 +03:00
int error = 0 ;
2015-03-18 20:04:37 +03:00
ap - > allowed = UINT_MAX ; /* Assume we are permitted a whole lot */
2006-01-16 19:50:04 +03:00
if ( ! test_bit ( GIF_QD_LOCKED , & ip - > i_flags ) )
return 0 ;
2015-10-26 18:40:28 +03:00
for ( x = 0 ; x < ip - > i_qadata - > qa_qd_num ; x + + ) {
qd = ip - > i_qadata - > qa_qd [ x ] ;
2006-01-16 19:50:04 +03:00
2013-02-01 07:52:08 +04:00
if ( ! ( qid_eq ( qd - > qd_id , make_kqid_uid ( uid ) ) | |
qid_eq ( qd - > qd_id , make_kqid_gid ( gid ) ) ) )
2006-01-16 19:50:04 +03:00
continue ;
2015-03-18 20:04:37 +03:00
warn = ( s64 ) be64_to_cpu ( qd - > qd_qb . qb_warn ) ;
limit = ( s64 ) be64_to_cpu ( qd - > qd_qb . qb_limit ) ;
2006-09-01 19:05:15 +04:00
value = ( s64 ) be64_to_cpu ( qd - > qd_qb . qb_value ) ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2015-03-18 20:04:37 +03:00
value + = qd - > qd_change ;
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
2015-03-18 20:04:37 +03:00
if ( limit > 0 & & ( limit - value ) < ap - > allowed )
ap - > allowed = limit - value ;
/* If we can't meet the target */
if ( limit & & limit < ( value + ( s64 ) ap - > target ) ) {
/* If no min_target specified or we don't meet
* min_target , return - EDQUOT */
if ( ! ap - > min_target | | ap - > min_target > ap - > allowed ) {
2015-06-02 19:03:04 +03:00
if ( ! test_and_set_bit ( QDF_QMSG_QUIET ,
& qd - > qd_flags ) ) {
print_message ( qd , " exceeded " ) ;
quota_send_warning ( qd - > qd_id ,
sdp - > sd_vfs - > s_dev ,
QUOTA_NL_BHARDWARN ) ;
}
2015-03-18 20:04:37 +03:00
error = - EDQUOT ;
break ;
}
} else if ( warn & & warn < value & &
2006-01-16 19:50:04 +03:00
time_after_eq ( jiffies , qd - > qd_last_warn +
2015-03-18 20:04:37 +03:00
gfs2_tune_get ( sdp , gt_quota_warn_period )
* HZ ) ) {
2013-02-01 07:52:08 +04:00
quota_send_warning ( qd - > qd_id ,
2009-09-28 15:49:15 +04:00
sdp - > sd_vfs - > s_dev , QUOTA_NL_BSOFTWARN ) ;
2006-01-16 19:50:04 +03:00
error = print_message ( qd , " warning " ) ;
qd - > qd_last_warn = jiffies ;
}
}
return error ;
}
2006-09-04 20:49:07 +04:00
void gfs2_quota_change ( struct gfs2_inode * ip , s64 change ,
2013-02-01 08:27:54 +04:00
kuid_t uid , kgid_t gid )
2006-01-16 19:50:04 +03:00
{
struct gfs2_quota_data * qd ;
2015-07-24 17:45:43 +03:00
u32 x ;
2015-10-26 18:40:28 +03:00
struct gfs2_sbd * sdp = GFS2_SB ( & ip - > i_inode ) ;
2006-01-16 19:50:04 +03:00
2015-10-26 18:40:28 +03:00
if ( sdp - > sd_args . ar_quota ! = GFS2_QUOTA_ON | |
gfs2_assert_warn ( sdp , change ) )
2006-01-16 19:50:04 +03:00
return ;
2008-11-04 13:05:22 +03:00
if ( ip - > i_diskflags & GFS2_DIF_SYSTEM )
2006-01-16 19:50:04 +03:00
return ;
2020-05-05 19:50:24 +03:00
if ( gfs2_assert_withdraw ( sdp , ip - > i_qadata & &
ip - > i_qadata - > qa_ref > 0 ) )
return ;
2015-10-26 18:40:28 +03:00
for ( x = 0 ; x < ip - > i_qadata - > qa_qd_num ; x + + ) {
qd = ip - > i_qadata - > qa_qd [ x ] ;
2006-01-16 19:50:04 +03:00
2013-02-01 07:52:08 +04:00
if ( qid_eq ( qd - > qd_id , make_kqid_uid ( uid ) ) | |
qid_eq ( qd - > qd_id , make_kqid_gid ( gid ) ) ) {
2006-01-16 19:50:04 +03:00
do_qc ( qd , change ) ;
}
}
}
2012-07-03 18:45:28 +04:00
int gfs2_quota_sync ( struct super_block * sb , int type )
2006-01-16 19:50:04 +03:00
{
2009-09-11 17:36:44 +04:00
struct gfs2_sbd * sdp = sb - > s_fs_info ;
2006-01-16 19:50:04 +03:00
struct gfs2_quota_data * * qda ;
2019-08-31 23:29:12 +03:00
unsigned int max_qd = PAGE_SIZE / sizeof ( struct gfs2_holder ) ;
2006-01-16 19:50:04 +03:00
unsigned int num_qd ;
unsigned int x ;
int error = 0 ;
qda = kcalloc ( max_qd , sizeof ( struct gfs2_quota_data * ) , GFP_KERNEL ) ;
if ( ! qda )
return - ENOMEM ;
2013-10-04 15:29:34 +04:00
mutex_lock ( & sdp - > sd_quota_sync_mutex ) ;
sdp - > sd_quota_sync_gen + + ;
2006-01-16 19:50:04 +03:00
do {
num_qd = 0 ;
for ( ; ; ) {
error = qd_fish ( sdp , qda + num_qd ) ;
if ( error | | ! qda [ num_qd ] )
break ;
if ( + + num_qd = = max_qd )
break ;
}
if ( num_qd ) {
if ( ! error )
error = do_sync ( num_qd , qda ) ;
if ( ! error )
for ( x = 0 ; x < num_qd ; x + + )
qda [ x ] - > qd_sync_gen =
sdp - > sd_quota_sync_gen ;
for ( x = 0 ; x < num_qd ; x + + )
qd_unlock ( qda [ x ] ) ;
}
} while ( ! error & & num_qd = = max_qd ) ;
2013-10-04 15:29:34 +04:00
mutex_unlock ( & sdp - > sd_quota_sync_mutex ) ;
2006-01-16 19:50:04 +03:00
kfree ( qda ) ;
return error ;
}
2013-02-01 07:42:40 +04:00
int gfs2_quota_refresh ( struct gfs2_sbd * sdp , struct kqid qid )
2006-01-16 19:50:04 +03:00
{
struct gfs2_quota_data * qd ;
struct gfs2_holder q_gh ;
int error ;
2013-02-01 07:52:08 +04:00
error = qd_get ( sdp , qid , & qd ) ;
2006-01-16 19:50:04 +03:00
if ( error )
return error ;
error = do_glock ( qd , FORCE , & q_gh ) ;
if ( ! error )
gfs2_glock_dq_uninit ( & q_gh ) ;
qd_put ( qd ) ;
return error ;
}
int gfs2_quota_init ( struct gfs2_sbd * sdp )
{
2006-06-14 23:32:57 +04:00
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_qc_inode ) ;
2010-08-11 12:53:11 +04:00
u64 size = i_size_read ( sdp - > sd_qc_inode ) ;
unsigned int blocks = size > > sdp - > sd_sb . sb_bsize_shift ;
2006-01-16 19:50:04 +03:00
unsigned int x , slot = 0 ;
unsigned int found = 0 ;
2013-12-12 14:47:59 +04:00
unsigned int hash ;
2013-12-12 21:29:32 +04:00
unsigned int bm_size ;
2006-09-04 20:49:07 +04:00
u64 dblock ;
u32 extlen = 0 ;
2006-01-16 19:50:04 +03:00
int error ;
2010-08-11 12:53:11 +04:00
if ( gfs2_check_internal_file_size ( sdp - > sd_qc_inode , 1 , 64 < < 20 ) )
2006-09-25 17:26:04 +04:00
return - EIO ;
2010-08-11 12:53:11 +04:00
2006-01-16 19:50:04 +03:00
sdp - > sd_quota_slots = blocks * sdp - > sd_qc_per_block ;
2013-12-12 21:29:32 +04:00
bm_size = DIV_ROUND_UP ( sdp - > sd_quota_slots , 8 * sizeof ( unsigned long ) ) ;
bm_size * = sizeof ( unsigned long ) ;
2006-01-16 19:50:04 +03:00
error = - ENOMEM ;
2014-02-26 22:07:56 +04:00
sdp - > sd_quota_bitmap = kzalloc ( bm_size , GFP_NOFS | __GFP_NOWARN ) ;
2013-12-12 21:29:32 +04:00
if ( sdp - > sd_quota_bitmap = = NULL )
2014-02-26 22:07:56 +04:00
sdp - > sd_quota_bitmap = __vmalloc ( bm_size , GFP_NOFS |
2020-06-02 07:51:40 +03:00
__GFP_ZERO ) ;
2006-01-16 19:50:04 +03:00
if ( ! sdp - > sd_quota_bitmap )
return error ;
for ( x = 0 ; x < blocks ; x + + ) {
struct buffer_head * bh ;
2013-11-26 19:17:09 +04:00
const struct gfs2_quota_change * qc ;
2006-01-16 19:50:04 +03:00
unsigned int y ;
if ( ! extlen ) {
2021-04-01 00:17:38 +03:00
extlen = 32 ;
error = gfs2_get_extent ( & ip - > i_inode , x , & dblock , & extlen ) ;
2006-01-16 19:50:04 +03:00
if ( error )
goto fail ;
}
error = - EIO ;
2006-09-22 01:05:23 +04:00
bh = gfs2_meta_ra ( ip - > i_gl , dblock , extlen ) ;
if ( ! bh )
goto fail ;
2006-01-16 19:50:04 +03:00
if ( gfs2_metatype_check ( sdp , bh , GFS2_METATYPE_QC ) ) {
brelse ( bh ) ;
goto fail ;
}
2013-11-26 19:17:09 +04:00
qc = ( const struct gfs2_quota_change * ) ( bh - > b_data + sizeof ( struct gfs2_meta_header ) ) ;
2006-09-22 01:05:23 +04:00
for ( y = 0 ; y < sdp - > sd_qc_per_block & & slot < sdp - > sd_quota_slots ;
2006-01-16 19:50:04 +03:00
y + + , slot + + ) {
struct gfs2_quota_data * qd ;
2013-11-26 19:17:09 +04:00
s64 qc_change = be64_to_cpu ( qc - > qc_change ) ;
u32 qc_flags = be32_to_cpu ( qc - > qc_flags ) ;
enum quota_type qtype = ( qc_flags & GFS2_QCF_USER ) ?
USRQUOTA : GRPQUOTA ;
struct kqid qc_id = make_kqid ( & init_user_ns , qtype ,
be32_to_cpu ( qc - > qc_id ) ) ;
qc + + ;
if ( ! qc_change )
2006-01-16 19:50:04 +03:00
continue ;
2013-12-12 14:47:59 +04:00
hash = gfs2_qd_hash ( sdp , qc_id ) ;
qd = qd_alloc ( hash , sdp , qc_id ) ;
if ( qd = = NULL ) {
2006-01-16 19:50:04 +03:00
brelse ( bh ) ;
goto fail ;
}
set_bit ( QDF_CHANGE , & qd - > qd_flags ) ;
2013-11-26 19:17:09 +04:00
qd - > qd_change = qc_change ;
2006-01-16 19:50:04 +03:00
qd - > qd_slot = slot ;
qd - > qd_slot_count = 1 ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2013-12-12 21:29:32 +04:00
BUG_ON ( test_and_set_bit ( slot , sdp - > sd_quota_bitmap ) ) ;
2006-01-16 19:50:04 +03:00
list_add ( & qd - > qd_list , & sdp - > sd_quota_list ) ;
atomic_inc ( & sdp - > sd_quota_count ) ;
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
spin_lock_bucket ( hash ) ;
hlist_bl_add_head_rcu ( & qd - > qd_hlist , & qd_hash_table [ hash ] ) ;
spin_unlock_bucket ( hash ) ;
2006-01-16 19:50:04 +03:00
found + + ;
}
brelse ( bh ) ;
dblock + + ;
extlen - - ;
}
if ( found )
fs_info ( sdp , " found %u quota changes \n " , found ) ;
return 0 ;
2006-09-04 20:04:26 +04:00
fail :
2006-01-16 19:50:04 +03:00
gfs2_quota_cleanup ( sdp ) ;
return error ;
}
void gfs2_quota_cleanup ( struct gfs2_sbd * sdp )
{
struct list_head * head = & sdp - > sd_quota_list ;
struct gfs2_quota_data * qd ;
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
while ( ! list_empty ( head ) ) {
2020-02-03 21:22:45 +03:00
qd = list_last_entry ( head , struct gfs2_quota_data , qd_list ) ;
2006-01-16 19:50:04 +03:00
list_del ( & qd - > qd_list ) ;
2013-12-12 14:47:59 +04:00
2009-01-08 01:03:37 +03:00
/* Also remove if this qd exists in the reclaim list */
2013-11-04 14:15:08 +04:00
list_lru_del ( & gfs2_qd_lru , & qd - > qd_lru ) ;
2006-01-16 19:50:04 +03:00
atomic_dec ( & sdp - > sd_quota_count ) ;
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
2013-12-12 14:47:59 +04:00
spin_lock_bucket ( qd - > qd_hash ) ;
hlist_bl_del_rcu ( & qd - > qd_hlist ) ;
spin_unlock_bucket ( qd - > qd_hash ) ;
2013-12-12 15:34:09 +04:00
gfs2_assert_warn ( sdp , ! qd - > qd_change ) ;
gfs2_assert_warn ( sdp , ! qd - > qd_slot_count ) ;
2006-01-16 19:50:04 +03:00
gfs2_assert_warn ( sdp , ! qd - > qd_bh_count ) ;
2009-01-12 13:43:39 +03:00
gfs2_glock_put ( qd - > qd_gl ) ;
2013-12-12 14:47:59 +04:00
call_rcu ( & qd - > qd_rcu , gfs2_qd_dealloc ) ;
2006-01-16 19:50:04 +03:00
2013-11-01 22:52:08 +04:00
spin_lock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
}
2013-11-01 22:52:08 +04:00
spin_unlock ( & qd_lock ) ;
2006-01-16 19:50:04 +03:00
gfs2_assert_warn ( sdp , ! atomic_read ( & sdp - > sd_quota_count ) ) ;
2014-11-20 08:18:38 +03:00
kvfree ( sdp - > sd_quota_bitmap ) ;
sdp - > sd_quota_bitmap = NULL ;
2006-01-16 19:50:04 +03:00
}
2008-11-17 17:25:37 +03:00
static void quotad_error ( struct gfs2_sbd * sdp , const char * msg , int error )
{
if ( error = = 0 | | error = = - EROFS )
return ;
2019-11-14 17:52:15 +03:00
if ( ! gfs2_withdrawn ( sdp ) ) {
2019-04-16 21:23:28 +03:00
if ( ! cmpxchg ( & sdp - > sd_log_error , 0 , error ) )
fs_err ( sdp , " gfs2_quotad: %s error %d \n " , msg , error ) ;
GFS2: Withdraw for IO errors writing to the journal or statfs
Before this patch, if GFS2 encountered IO errors while writing to
the journal, it would not report the problem, so they would go
unnoticed, sometimes for many hours. Sometimes this would only be
noticed later, when recovery tried to do journal replay and failed
due to invalid metadata at the blocks that resulted in IO errors.
This patch makes GFS2's log daemon check for IO errors. If it
encounters one, it withdraws from the file system and reports
why in dmesg. A similar action is taken when IO errors occur when
writing to the system statfs file.
These errors are also reported back to any callers of fsync, since
that requires the journal to be flushed. Therefore, any IO errors
that would previously go unnoticed are now noticed and the file
system is withdrawn as early as possible, thus preventing further
file system damage.
Also note that this reintroduces superblock variable sd_log_error,
which Christoph removed with commit f729b66fca.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
2017-08-16 19:30:06 +03:00
wake_up ( & sdp - > sd_logd_waitq ) ;
}
2008-11-17 17:25:37 +03:00
}
static void quotad_check_timeo ( struct gfs2_sbd * sdp , const char * msg ,
2009-09-11 17:36:44 +04:00
int ( * fxn ) ( struct super_block * sb , int type ) ,
2008-11-17 17:25:37 +03:00
unsigned long t , unsigned long * timeo ,
unsigned int * new_timeo )
{
if ( t > = * timeo ) {
2009-09-11 17:36:44 +04:00
int error = fxn ( sdp - > sd_vfs , 0 ) ;
2008-11-17 17:25:37 +03:00
quotad_error ( sdp , msg , error ) ;
* timeo = gfs2_tune_get_i ( & sdp - > sd_tune , new_timeo ) * HZ ;
} else {
* timeo - = t ;
}
}
2008-11-18 16:38:48 +03:00
static void quotad_check_trunc_list ( struct gfs2_sbd * sdp )
{
struct gfs2_inode * ip ;
while ( 1 ) {
ip = NULL ;
spin_lock ( & sdp - > sd_trunc_lock ) ;
if ( ! list_empty ( & sdp - > sd_trunc_list ) ) {
2020-02-03 21:22:45 +03:00
ip = list_first_entry ( & sdp - > sd_trunc_list ,
2008-11-18 16:38:48 +03:00
struct gfs2_inode , i_trunc_list ) ;
list_del_init ( & ip - > i_trunc_list ) ;
}
spin_unlock ( & sdp - > sd_trunc_lock ) ;
if ( ip = = NULL )
return ;
gfs2_glock_finish_truncate ( ip ) ;
}
}
2009-10-20 11:39:44 +04:00
void gfs2_wake_up_statfs ( struct gfs2_sbd * sdp ) {
if ( ! sdp - > sd_statfs_force_sync ) {
sdp - > sd_statfs_force_sync = 1 ;
wake_up ( & sdp - > sd_quota_wait ) ;
}
}
2008-11-17 17:25:37 +03:00
/**
* gfs2_quotad - Write cached quota changes into the quota file
2021-03-30 19:44:29 +03:00
* @ data : Pointer to GFS2 superblock
2008-11-17 17:25:37 +03:00
*
*/
int gfs2_quotad ( void * data )
{
struct gfs2_sbd * sdp = data ;
struct gfs2_tune * tune = & sdp - > sd_tune ;
unsigned long statfs_timeo = 0 ;
unsigned long quotad_timeo = 0 ;
unsigned long t = 0 ;
DEFINE_WAIT ( wait ) ;
2008-11-18 16:38:48 +03:00
int empty ;
2008-11-17 17:25:37 +03:00
while ( ! kthread_should_stop ( ) ) {
gfs2: Force withdraw to replay journals and wait for it to finish
When a node withdraws from a file system, it often leaves its journal
in an incomplete state. This is especially true when the withdraw is
caused by io errors writing to the journal. Before this patch, a
withdraw would try to write a "shutdown" record to the journal, tell
dlm it's done with the file system, and none of the other nodes
know about the problem. Later, when the problem is fixed and the
withdrawn node is rebooted, it would then discover that its own
journal was incomplete, and replay it. However, replaying it at this
point is almost guaranteed to introduce corruption because the other
nodes are likely to have used affected resource groups that appeared
in the journal since the time of the withdraw. Replaying the journal
later will overwrite any changes made, and not through any fault of
dlm, which was instructed during the withdraw to release those
resources.
This patch makes file system withdraws seen by the entire cluster.
Withdrawing nodes dequeue their journal glock to allow recovery.
The remaining nodes check all the journals to see if they are
clean or in need of replay. They try to replay dirty journals, but
only the journals of withdrawn nodes will be "not busy" and
therefore available for replay.
Until the journal replay is complete, no i/o related glocks may be
given out, to ensure that the replay does not cause the
aforementioned corruption: We cannot allow any journal replay to
overwrite blocks associated with a glock once it is held.
The "live" glock which is now used to signal when a withdraw
occurs. When a withdraw occurs, the node signals its withdraw by
dequeueing the "live" glock and trying to enqueue it in EX mode,
thus forcing the other nodes to all see a demote request, by way
of a "1CB" (one callback) try lock. The "live" glock is not
granted in EX; the callback is only just used to indicate a
withdraw has occurred.
Note that all nodes in the cluster must wait for the recovering
node to finish replaying the withdrawing node's journal before
continuing. To this end, it checks that the journals are clean
multiple times in a retry loop.
Also note that the withdraw function may be called from a wide
variety of situations, and therefore, we need to take extra
precautions to make sure pointers are valid before using them in
many circumstances.
We also need to take care when glocks decide to withdraw, since
the withdraw code now uses glocks.
Also, before this patch, if a process encountered an error and
decided to withdraw, if another process was already withdrawing,
the second withdraw would be silently ignored, which set it free
to unlock its glocks. That's correct behavior if the original
withdrawer encounters further errors down the road. But if
secondary waiters don't wait for the journal replay, unlocking
glocks will allow other nodes to use them, despite the fact that
the journal containing those blocks is being replayed. The
replay needs to finish before our glocks are released to other
nodes. IOW, secondary withdraws need to wait for the first
withdraw to finish.
For example, if an rgrp glock is unlocked by a process that didn't
wait for the first withdraw, a journal replay could introduce file
system corruption by replaying a rgrp block that has already been
granted to a different cluster node.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
2020-01-28 22:23:45 +03:00
if ( gfs2_withdrawn ( sdp ) )
goto bypass ;
2008-11-17 17:25:37 +03:00
/* Update the master statfs file */
2009-10-20 11:39:44 +04:00
if ( sdp - > sd_statfs_force_sync ) {
int error = gfs2_statfs_sync ( sdp - > sd_vfs , 0 ) ;
quotad_error ( sdp , " statfs " , error ) ;
statfs_timeo = gfs2_tune_get ( sdp , gt_statfs_quantum ) * HZ ;
}
else
quotad_check_timeo ( sdp , " statfs " , gfs2_statfs_sync , t ,
& statfs_timeo ,
& tune - > gt_statfs_quantum ) ;
2008-11-17 17:25:37 +03:00
/* Update quota file */
2013-06-03 14:12:59 +04:00
quotad_check_timeo ( sdp , " sync " , gfs2_quota_sync , t ,
2008-11-17 17:25:37 +03:00
& quotad_timeo , & tune - > gt_quota_quantum ) ;
2008-11-18 16:38:48 +03:00
/* Check for & recover partially truncated inodes */
quotad_check_trunc_list ( sdp ) ;
2011-11-22 00:32:22 +04:00
try_to_freeze ( ) ;
gfs2: Force withdraw to replay journals and wait for it to finish
When a node withdraws from a file system, it often leaves its journal
in an incomplete state. This is especially true when the withdraw is
caused by io errors writing to the journal. Before this patch, a
withdraw would try to write a "shutdown" record to the journal, tell
dlm it's done with the file system, and none of the other nodes
know about the problem. Later, when the problem is fixed and the
withdrawn node is rebooted, it would then discover that its own
journal was incomplete, and replay it. However, replaying it at this
point is almost guaranteed to introduce corruption because the other
nodes are likely to have used affected resource groups that appeared
in the journal since the time of the withdraw. Replaying the journal
later will overwrite any changes made, and not through any fault of
dlm, which was instructed during the withdraw to release those
resources.
This patch makes file system withdraws seen by the entire cluster.
Withdrawing nodes dequeue their journal glock to allow recovery.
The remaining nodes check all the journals to see if they are
clean or in need of replay. They try to replay dirty journals, but
only the journals of withdrawn nodes will be "not busy" and
therefore available for replay.
Until the journal replay is complete, no i/o related glocks may be
given out, to ensure that the replay does not cause the
aforementioned corruption: We cannot allow any journal replay to
overwrite blocks associated with a glock once it is held.
The "live" glock which is now used to signal when a withdraw
occurs. When a withdraw occurs, the node signals its withdraw by
dequeueing the "live" glock and trying to enqueue it in EX mode,
thus forcing the other nodes to all see a demote request, by way
of a "1CB" (one callback) try lock. The "live" glock is not
granted in EX; the callback is only just used to indicate a
withdraw has occurred.
Note that all nodes in the cluster must wait for the recovering
node to finish replaying the withdrawing node's journal before
continuing. To this end, it checks that the journals are clean
multiple times in a retry loop.
Also note that the withdraw function may be called from a wide
variety of situations, and therefore, we need to take extra
precautions to make sure pointers are valid before using them in
many circumstances.
We also need to take care when glocks decide to withdraw, since
the withdraw code now uses glocks.
Also, before this patch, if a process encountered an error and
decided to withdraw, if another process was already withdrawing,
the second withdraw would be silently ignored, which set it free
to unlock its glocks. That's correct behavior if the original
withdrawer encounters further errors down the road. But if
secondary waiters don't wait for the journal replay, unlocking
glocks will allow other nodes to use them, despite the fact that
the journal containing those blocks is being replayed. The
replay needs to finish before our glocks are released to other
nodes. IOW, secondary withdraws need to wait for the first
withdraw to finish.
For example, if an rgrp glock is unlocked by a process that didn't
wait for the first withdraw, a journal replay could introduce file
system corruption by replaying a rgrp block that has already been
granted to a different cluster node.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
2020-01-28 22:23:45 +03:00
bypass :
2008-11-17 17:25:37 +03:00
t = min ( quotad_timeo , statfs_timeo ) ;
2009-03-31 18:49:08 +04:00
prepare_to_wait ( & sdp - > sd_quota_wait , & wait , TASK_INTERRUPTIBLE ) ;
2008-11-18 16:38:48 +03:00
spin_lock ( & sdp - > sd_trunc_lock ) ;
empty = list_empty ( & sdp - > sd_trunc_list ) ;
spin_unlock ( & sdp - > sd_trunc_lock ) ;
2009-10-20 11:39:44 +04:00
if ( empty & & ! sdp - > sd_statfs_force_sync )
2008-11-18 16:38:48 +03:00
t - = schedule_timeout ( t ) ;
else
t = 0 ;
2008-11-17 17:25:37 +03:00
finish_wait ( & sdp - > sd_quota_wait , & wait ) ;
}
return 0 ;
}
2014-11-19 18:41:07 +03:00
static int gfs2_quota_get_state ( struct super_block * sb , struct qc_state * state )
2009-09-11 18:57:27 +04:00
{
struct gfs2_sbd * sdp = sb - > s_fs_info ;
2014-11-19 18:41:07 +03:00
memset ( state , 0 , sizeof ( * state ) ) ;
2010-05-05 02:10:56 +04:00
switch ( sdp - > sd_args . ar_quota ) {
case GFS2_QUOTA_ON :
2014-11-19 18:41:07 +03:00
state - > s_state [ USRQUOTA ] . flags | = QCI_LIMITS_ENFORCED ;
state - > s_state [ GRPQUOTA ] . flags | = QCI_LIMITS_ENFORCED ;
2020-08-24 01:36:59 +03:00
fallthrough ;
2010-05-05 02:10:56 +04:00
case GFS2_QUOTA_ACCOUNT :
2014-11-19 18:41:07 +03:00
state - > s_state [ USRQUOTA ] . flags | = QCI_ACCT_ENABLED |
QCI_SYSFILE ;
state - > s_state [ GRPQUOTA ] . flags | = QCI_ACCT_ENABLED |
QCI_SYSFILE ;
2010-05-05 02:10:56 +04:00
break ;
case GFS2_QUOTA_OFF :
break ;
}
2009-09-11 18:57:27 +04:00
if ( sdp - > sd_quota_inode ) {
2014-11-19 18:41:07 +03:00
state - > s_state [ USRQUOTA ] . ino =
GFS2_I ( sdp - > sd_quota_inode ) - > i_no_addr ;
state - > s_state [ USRQUOTA ] . blocks = sdp - > sd_quota_inode - > i_blocks ;
2009-09-11 18:57:27 +04:00
}
2014-11-19 18:41:07 +03:00
state - > s_state [ USRQUOTA ] . nextents = 1 ; /* unsupported */
state - > s_state [ GRPQUOTA ] = state - > s_state [ USRQUOTA ] ;
state - > s_incoredqs = list_lru_count ( & gfs2_qd_lru ) ;
2009-09-11 18:57:27 +04:00
return 0 ;
}
2012-09-16 13:07:49 +04:00
static int gfs2_get_dqblk ( struct super_block * sb , struct kqid qid ,
2014-10-09 18:03:13 +04:00
struct qc_dqblk * fdq )
2009-09-28 14:52:16 +04:00
{
struct gfs2_sbd * sdp = sb - > s_fs_info ;
struct gfs2_quota_lvb * qlvb ;
struct gfs2_quota_data * qd ;
struct gfs2_holder q_gh ;
int error ;
2014-10-09 18:03:13 +04:00
memset ( fdq , 0 , sizeof ( * fdq ) ) ;
2009-09-28 14:52:16 +04:00
if ( sdp - > sd_args . ar_quota = = GFS2_QUOTA_OFF )
return - ESRCH ; /* Crazy XFS error code */
2013-02-01 08:09:30 +04:00
if ( ( qid . type ! = USRQUOTA ) & &
( qid . type ! = GRPQUOTA ) )
2009-09-28 14:52:16 +04:00
return - EINVAL ;
2013-02-01 07:52:08 +04:00
error = qd_get ( sdp , qid , & qd ) ;
2009-09-28 14:52:16 +04:00
if ( error )
return error ;
error = do_glock ( qd , FORCE , & q_gh ) ;
if ( error )
goto out ;
2012-11-14 22:47:37 +04:00
qlvb = ( struct gfs2_quota_lvb * ) qd - > qd_gl - > gl_lksb . sb_lvbptr ;
2014-10-09 18:03:13 +04:00
fdq - > d_spc_hardlimit = be64_to_cpu ( qlvb - > qb_limit ) < < sdp - > sd_sb . sb_bsize_shift ;
fdq - > d_spc_softlimit = be64_to_cpu ( qlvb - > qb_warn ) < < sdp - > sd_sb . sb_bsize_shift ;
fdq - > d_space = be64_to_cpu ( qlvb - > qb_value ) < < sdp - > sd_sb . sb_bsize_shift ;
2009-09-28 14:52:16 +04:00
gfs2_glock_dq_uninit ( & q_gh ) ;
out :
qd_put ( qd ) ;
return error ;
}
2009-09-23 16:50:49 +04:00
/* GFS2 only supports a subset of the XFS fields */
2014-10-09 18:03:13 +04:00
# define GFS2_FIELDMASK (QC_SPC_SOFT|QC_SPC_HARD|QC_SPACE)
2009-09-23 16:50:49 +04:00
2012-09-16 13:07:49 +04:00
static int gfs2_set_dqblk ( struct super_block * sb , struct kqid qid ,
2014-10-09 18:03:13 +04:00
struct qc_dqblk * fdq )
2009-09-23 16:50:49 +04:00
{
struct gfs2_sbd * sdp = sb - > s_fs_info ;
struct gfs2_inode * ip = GFS2_I ( sdp - > sd_quota_inode ) ;
struct gfs2_quota_data * qd ;
struct gfs2_holder q_gh , i_gh ;
unsigned int data_blocks , ind_blocks ;
unsigned int blocks = 0 ;
int alloc_required ;
loff_t offset ;
int error ;
if ( sdp - > sd_args . ar_quota = = GFS2_QUOTA_OFF )
return - ESRCH ; /* Crazy XFS error code */
2013-02-01 08:09:30 +04:00
if ( ( qid . type ! = USRQUOTA ) & &
( qid . type ! = GRPQUOTA ) )
2009-09-23 16:50:49 +04:00
return - EINVAL ;
if ( fdq - > d_fieldmask & ~ GFS2_FIELDMASK )
return - EINVAL ;
2013-02-01 07:52:08 +04:00
error = qd_get ( sdp , qid , & qd ) ;
2009-09-23 16:50:49 +04:00
if ( error )
return error ;
2020-02-27 21:47:53 +03:00
error = gfs2_qa_get ( ip ) ;
2012-06-06 14:17:59 +04:00
if ( error )
goto out_put ;
2016-01-22 23:40:57 +03:00
inode_lock ( & ip - > i_inode ) ;
2009-09-23 16:50:49 +04:00
error = gfs2_glock_nq_init ( qd - > qd_gl , LM_ST_EXCLUSIVE , 0 , & q_gh ) ;
if ( error )
2012-06-06 14:17:59 +04:00
goto out_unlockput ;
2009-09-23 16:50:49 +04:00
error = gfs2_glock_nq_init ( ip - > i_gl , LM_ST_EXCLUSIVE , 0 , & i_gh ) ;
if ( error )
goto out_q ;
/* Check for existing entry, if none then alloc new blocks */
error = update_qd ( sdp , qd ) ;
if ( error )
goto out_i ;
/* If nothing has changed, this is a no-op */
2014-10-09 18:03:13 +04:00
if ( ( fdq - > d_fieldmask & QC_SPC_SOFT ) & &
( ( fdq - > d_spc_softlimit > > sdp - > sd_sb . sb_bsize_shift ) = = be64_to_cpu ( qd - > qd_qb . qb_warn ) ) )
fdq - > d_fieldmask ^ = QC_SPC_SOFT ;
2010-11-18 19:26:46 +03:00
2014-10-09 18:03:13 +04:00
if ( ( fdq - > d_fieldmask & QC_SPC_HARD ) & &
( ( fdq - > d_spc_hardlimit > > sdp - > sd_sb . sb_bsize_shift ) = = be64_to_cpu ( qd - > qd_qb . qb_limit ) ) )
fdq - > d_fieldmask ^ = QC_SPC_HARD ;
2010-11-18 19:26:46 +03:00
2014-10-09 18:03:13 +04:00
if ( ( fdq - > d_fieldmask & QC_SPACE ) & &
( ( fdq - > d_space > > sdp - > sd_sb . sb_bsize_shift ) = = be64_to_cpu ( qd - > qd_qb . qb_value ) ) )
fdq - > d_fieldmask ^ = QC_SPACE ;
2010-11-18 19:26:46 +03:00
2009-09-23 16:50:49 +04:00
if ( fdq - > d_fieldmask = = 0 )
goto out_i ;
offset = qd2offset ( qd ) ;
2010-06-25 03:21:20 +04:00
alloc_required = gfs2_write_alloc_required ( ip , offset , sizeof ( struct gfs2_quota ) ) ;
2011-02-07 19:22:41 +03:00
if ( gfs2_is_stuffed ( ip ) )
alloc_required = 1 ;
2009-09-23 16:50:49 +04:00
if ( alloc_required ) {
2013-10-02 14:13:25 +04:00
struct gfs2_alloc_parms ap = { . aflags = 0 , } ;
2009-09-23 16:50:49 +04:00
gfs2_write_calc_reserv ( ip , sizeof ( struct gfs2_quota ) ,
& data_blocks , & ind_blocks ) ;
2011-11-21 22:36:17 +04:00
blocks = 1 + data_blocks + ind_blocks ;
2013-10-02 14:13:25 +04:00
ap . target = blocks ;
error = gfs2_inplace_reserve ( ip , & ap ) ;
2009-09-23 16:50:49 +04:00
if ( error )
2011-11-21 22:36:17 +04:00
goto out_i ;
2012-07-30 17:53:19 +04:00
blocks + = gfs2_rg_blocks ( ip , blocks ) ;
2009-09-23 16:50:49 +04:00
}
2011-02-07 19:22:41 +03:00
/* Some quotas span block boundaries and can update two blocks,
adding an extra block to the transaction to handle such quotas */
error = gfs2_trans_begin ( sdp , blocks + RES_DINODE + 2 , 0 ) ;
2009-09-23 16:50:49 +04:00
if ( error )
goto out_release ;
/* Apply changes */
error = gfs2_adjust_quota ( ip , offset , 0 , qd , fdq ) ;
2015-06-02 19:03:04 +03:00
if ( ! error )
clear_bit ( QDF_QMSG_QUIET , & qd - > qd_flags ) ;
2009-09-23 16:50:49 +04:00
gfs2_trans_end ( sdp ) ;
out_release :
2011-11-21 22:36:17 +04:00
if ( alloc_required )
2009-09-23 16:50:49 +04:00
gfs2_inplace_release ( ip ) ;
out_i :
gfs2_glock_dq_uninit ( & i_gh ) ;
out_q :
gfs2_glock_dq_uninit ( & q_gh ) ;
2012-06-06 14:17:59 +04:00
out_unlockput :
2020-02-27 21:47:53 +03:00
gfs2_qa_put ( ip ) ;
2016-01-22 23:40:57 +03:00
inode_unlock ( & ip - > i_inode ) ;
2012-06-06 14:17:59 +04:00
out_put :
2009-09-23 16:50:49 +04:00
qd_put ( qd ) ;
return error ;
}
2009-09-15 12:59:02 +04:00
const struct quotactl_ops gfs2_quotactl_ops = {
. quota_sync = gfs2_quota_sync ,
2014-11-19 18:41:07 +03:00
. get_state = gfs2_quota_get_state ,
2010-05-07 01:04:58 +04:00
. get_dqblk = gfs2_get_dqblk ,
2010-05-07 01:05:17 +04:00
. set_dqblk = gfs2_set_dqblk ,
2009-09-15 12:59:02 +04:00
} ;
2013-12-12 14:47:59 +04:00
void __init gfs2_quota_hash_init ( void )
{
unsigned i ;
for ( i = 0 ; i < GFS2_QD_HASH_SIZE ; i + + )
INIT_HLIST_BL_HEAD ( & qd_hash_table [ i ] ) ;
}