2012-06-28 18:03:02 +02:00
/*
* Copyright ( C ) 2011 STRATO . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/sched.h>
# include <linux/pagemap.h>
# include <linux/writeback.h>
# include <linux/blkdev.h>
# include <linux/rbtree.h>
# include <linux/slab.h>
# include <linux/workqueue.h>
2013-01-29 06:04:50 +00:00
# include <linux/btrfs.h>
2012-06-28 18:03:02 +02:00
# include "ctree.h"
# include "transaction.h"
# include "disk-io.h"
# include "locking.h"
# include "ulist.h"
# include "backref.h"
2013-04-25 16:04:51 +00:00
# include "extent_io.h"
2014-05-13 17:30:47 -07:00
# include "qgroup.h"
2012-06-28 18:03:02 +02:00
2015-04-17 10:23:16 +08:00
2012-06-28 18:03:02 +02:00
/* TODO XXX FIXME
* - subvol delete - > delete when ref goes to 0 ? delete limits also ?
* - reorganize keys
* - compressed
* - sync
* - copy also limits on subvol creation
* - limit
* - caches fuer ulists
* - performance benchmarks
* - check all ioctl parameters
*/
2015-03-12 16:10:13 +08:00
static void btrfs_qgroup_update_old_refcnt ( struct btrfs_qgroup * qg , u64 seq ,
int mod )
{
if ( qg - > old_refcnt < seq )
qg - > old_refcnt = seq ;
qg - > old_refcnt + = mod ;
}
static void btrfs_qgroup_update_new_refcnt ( struct btrfs_qgroup * qg , u64 seq ,
int mod )
{
if ( qg - > new_refcnt < seq )
qg - > new_refcnt = seq ;
qg - > new_refcnt + = mod ;
}
static inline u64 btrfs_qgroup_get_old_refcnt ( struct btrfs_qgroup * qg , u64 seq )
{
if ( qg - > old_refcnt < seq )
return 0 ;
return qg - > old_refcnt - seq ;
}
static inline u64 btrfs_qgroup_get_new_refcnt ( struct btrfs_qgroup * qg , u64 seq )
{
if ( qg - > new_refcnt < seq )
return 0 ;
return qg - > new_refcnt - seq ;
}
2012-06-28 18:03:02 +02:00
/*
* glue structure to represent the relations between qgroups .
*/
struct btrfs_qgroup_list {
struct list_head next_group ;
struct list_head next_member ;
struct btrfs_qgroup * group ;
struct btrfs_qgroup * member ;
} ;
2016-10-26 16:23:50 +02:00
static inline u64 qgroup_to_aux ( struct btrfs_qgroup * qg )
{
return ( u64 ) ( uintptr_t ) qg ;
}
static inline struct btrfs_qgroup * unode_aux_to_qgroup ( struct ulist_node * n )
{
return ( struct btrfs_qgroup * ) ( uintptr_t ) n - > aux ;
}
2014-05-13 17:30:47 -07:00
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
static int
qgroup_rescan_init ( struct btrfs_fs_info * fs_info , u64 progress_objectid ,
int init_flags ) ;
static void qgroup_rescan_zero_tracking ( struct btrfs_fs_info * fs_info ) ;
2013-04-25 16:04:51 +00:00
2013-04-07 10:50:17 +00:00
/* must be called with qgroup_ioctl_lock held */
2012-06-28 18:03:02 +02:00
static struct btrfs_qgroup * find_qgroup_rb ( struct btrfs_fs_info * fs_info ,
u64 qgroupid )
{
struct rb_node * n = fs_info - > qgroup_tree . rb_node ;
struct btrfs_qgroup * qgroup ;
while ( n ) {
qgroup = rb_entry ( n , struct btrfs_qgroup , node ) ;
if ( qgroup - > qgroupid < qgroupid )
n = n - > rb_left ;
else if ( qgroup - > qgroupid > qgroupid )
n = n - > rb_right ;
else
return qgroup ;
}
return NULL ;
}
/* must be called with qgroup_lock held */
static struct btrfs_qgroup * add_qgroup_rb ( struct btrfs_fs_info * fs_info ,
u64 qgroupid )
{
struct rb_node * * p = & fs_info - > qgroup_tree . rb_node ;
struct rb_node * parent = NULL ;
struct btrfs_qgroup * qgroup ;
while ( * p ) {
parent = * p ;
qgroup = rb_entry ( parent , struct btrfs_qgroup , node ) ;
if ( qgroup - > qgroupid < qgroupid )
p = & ( * p ) - > rb_left ;
else if ( qgroup - > qgroupid > qgroupid )
p = & ( * p ) - > rb_right ;
else
return qgroup ;
}
qgroup = kzalloc ( sizeof ( * qgroup ) , GFP_ATOMIC ) ;
if ( ! qgroup )
return ERR_PTR ( - ENOMEM ) ;
qgroup - > qgroupid = qgroupid ;
INIT_LIST_HEAD ( & qgroup - > groups ) ;
INIT_LIST_HEAD ( & qgroup - > members ) ;
INIT_LIST_HEAD ( & qgroup - > dirty ) ;
rb_link_node ( & qgroup - > node , parent , p ) ;
rb_insert_color ( & qgroup - > node , & fs_info - > qgroup_tree ) ;
return qgroup ;
}
2013-08-14 09:13:36 +08:00
static void __del_qgroup_rb ( struct btrfs_qgroup * qgroup )
2012-06-28 18:03:02 +02:00
{
struct btrfs_qgroup_list * list ;
list_del ( & qgroup - > dirty ) ;
while ( ! list_empty ( & qgroup - > groups ) ) {
list = list_first_entry ( & qgroup - > groups ,
struct btrfs_qgroup_list , next_group ) ;
list_del ( & list - > next_group ) ;
list_del ( & list - > next_member ) ;
kfree ( list ) ;
}
while ( ! list_empty ( & qgroup - > members ) ) {
list = list_first_entry ( & qgroup - > members ,
struct btrfs_qgroup_list , next_member ) ;
list_del ( & list - > next_group ) ;
list_del ( & list - > next_member ) ;
kfree ( list ) ;
}
kfree ( qgroup ) ;
2013-08-14 09:13:36 +08:00
}
2012-06-28 18:03:02 +02:00
2013-08-14 09:13:36 +08:00
/* must be called with qgroup_lock held */
static int del_qgroup_rb ( struct btrfs_fs_info * fs_info , u64 qgroupid )
{
struct btrfs_qgroup * qgroup = find_qgroup_rb ( fs_info , qgroupid ) ;
if ( ! qgroup )
return - ENOENT ;
rb_erase ( & qgroup - > node , & fs_info - > qgroup_tree ) ;
__del_qgroup_rb ( qgroup ) ;
2012-06-28 18:03:02 +02:00
return 0 ;
}
/* must be called with qgroup_lock held */
static int add_relation_rb ( struct btrfs_fs_info * fs_info ,
u64 memberid , u64 parentid )
{
struct btrfs_qgroup * member ;
struct btrfs_qgroup * parent ;
struct btrfs_qgroup_list * list ;
member = find_qgroup_rb ( fs_info , memberid ) ;
parent = find_qgroup_rb ( fs_info , parentid ) ;
if ( ! member | | ! parent )
return - ENOENT ;
list = kzalloc ( sizeof ( * list ) , GFP_ATOMIC ) ;
if ( ! list )
return - ENOMEM ;
list - > group = parent ;
list - > member = member ;
list_add_tail ( & list - > next_group , & member - > groups ) ;
list_add_tail ( & list - > next_member , & parent - > members ) ;
return 0 ;
}
/* must be called with qgroup_lock held */
static int del_relation_rb ( struct btrfs_fs_info * fs_info ,
u64 memberid , u64 parentid )
{
struct btrfs_qgroup * member ;
struct btrfs_qgroup * parent ;
struct btrfs_qgroup_list * list ;
member = find_qgroup_rb ( fs_info , memberid ) ;
parent = find_qgroup_rb ( fs_info , parentid ) ;
if ( ! member | | ! parent )
return - ENOENT ;
list_for_each_entry ( list , & member - > groups , next_group ) {
if ( list - > group = = parent ) {
list_del ( & list - > next_group ) ;
list_del ( & list - > next_member ) ;
kfree ( list ) ;
return 0 ;
}
}
return - ENOENT ;
}
2014-05-07 17:06:09 -04:00
# ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_verify_qgroup_counts ( struct btrfs_fs_info * fs_info , u64 qgroupid ,
u64 rfer , u64 excl )
{
struct btrfs_qgroup * qgroup ;
qgroup = find_qgroup_rb ( fs_info , qgroupid ) ;
if ( ! qgroup )
return - EINVAL ;
if ( qgroup - > rfer ! = rfer | | qgroup - > excl ! = excl )
return - EINVAL ;
return 0 ;
}
# endif
2012-06-28 18:03:02 +02:00
/*
* The full config is read in one go , only called from open_ctree ( )
* It doesn ' t use any locking , as at this point we ' re still single - threaded
*/
int btrfs_read_qgroup_config ( struct btrfs_fs_info * fs_info )
{
struct btrfs_key key ;
struct btrfs_key found_key ;
struct btrfs_root * quota_root = fs_info - > quota_root ;
struct btrfs_path * path = NULL ;
struct extent_buffer * l ;
int slot ;
int ret = 0 ;
u64 flags = 0 ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
u64 rescan_progress = 0 ;
2012-06-28 18:03:02 +02:00
2016-09-02 15:40:02 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) )
2012-06-28 18:03:02 +02:00
return 0 ;
2017-02-13 12:10:20 +01:00
fs_info - > qgroup_ulist = ulist_alloc ( GFP_KERNEL ) ;
2013-05-06 11:03:27 +00:00
if ( ! fs_info - > qgroup_ulist ) {
ret = - ENOMEM ;
goto out ;
}
2012-06-28 18:03:02 +02:00
path = btrfs_alloc_path ( ) ;
if ( ! path ) {
ret = - ENOMEM ;
goto out ;
}
/* default this to quota off, in case no status key is found */
fs_info - > qgroup_flags = 0 ;
/*
* pass 1 : read status , all qgroup infos and limits
*/
key . objectid = 0 ;
key . type = 0 ;
key . offset = 0 ;
ret = btrfs_search_slot_for_read ( quota_root , & key , path , 1 , 1 ) ;
if ( ret )
goto out ;
while ( 1 ) {
struct btrfs_qgroup * qgroup ;
slot = path - > slots [ 0 ] ;
l = path - > nodes [ 0 ] ;
btrfs_item_key_to_cpu ( l , & found_key , slot ) ;
if ( found_key . type = = BTRFS_QGROUP_STATUS_KEY ) {
struct btrfs_qgroup_status_item * ptr ;
ptr = btrfs_item_ptr ( l , slot ,
struct btrfs_qgroup_status_item ) ;
if ( btrfs_qgroup_status_version ( l , ptr ) ! =
BTRFS_QGROUP_STATUS_VERSION ) {
2013-12-20 11:37:06 -05:00
btrfs_err ( fs_info ,
" old qgroup version, quota disabled " ) ;
2012-06-28 18:03:02 +02:00
goto out ;
}
if ( btrfs_qgroup_status_generation ( l , ptr ) ! =
fs_info - > generation ) {
flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
2013-12-20 11:37:06 -05:00
btrfs_err ( fs_info ,
2016-09-20 10:05:00 -04:00
" qgroup generation mismatch, marked as inconsistent " ) ;
2012-06-28 18:03:02 +02:00
}
fs_info - > qgroup_flags = btrfs_qgroup_status_flags ( l ,
ptr ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
rescan_progress = btrfs_qgroup_status_rescan ( l , ptr ) ;
2012-06-28 18:03:02 +02:00
goto next1 ;
}
if ( found_key . type ! = BTRFS_QGROUP_INFO_KEY & &
found_key . type ! = BTRFS_QGROUP_LIMIT_KEY )
goto next1 ;
qgroup = find_qgroup_rb ( fs_info , found_key . offset ) ;
if ( ( qgroup & & found_key . type = = BTRFS_QGROUP_INFO_KEY ) | |
( ! qgroup & & found_key . type = = BTRFS_QGROUP_LIMIT_KEY ) ) {
2015-07-06 15:38:11 +02:00
btrfs_err ( fs_info , " inconsistent qgroup config " ) ;
2012-06-28 18:03:02 +02:00
flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
}
if ( ! qgroup ) {
qgroup = add_qgroup_rb ( fs_info , found_key . offset ) ;
if ( IS_ERR ( qgroup ) ) {
ret = PTR_ERR ( qgroup ) ;
goto out ;
}
}
switch ( found_key . type ) {
case BTRFS_QGROUP_INFO_KEY : {
struct btrfs_qgroup_info_item * ptr ;
ptr = btrfs_item_ptr ( l , slot ,
struct btrfs_qgroup_info_item ) ;
qgroup - > rfer = btrfs_qgroup_info_rfer ( l , ptr ) ;
qgroup - > rfer_cmpr = btrfs_qgroup_info_rfer_cmpr ( l , ptr ) ;
qgroup - > excl = btrfs_qgroup_info_excl ( l , ptr ) ;
qgroup - > excl_cmpr = btrfs_qgroup_info_excl_cmpr ( l , ptr ) ;
/* generation currently unused */
break ;
}
case BTRFS_QGROUP_LIMIT_KEY : {
struct btrfs_qgroup_limit_item * ptr ;
ptr = btrfs_item_ptr ( l , slot ,
struct btrfs_qgroup_limit_item ) ;
qgroup - > lim_flags = btrfs_qgroup_limit_flags ( l , ptr ) ;
qgroup - > max_rfer = btrfs_qgroup_limit_max_rfer ( l , ptr ) ;
qgroup - > max_excl = btrfs_qgroup_limit_max_excl ( l , ptr ) ;
qgroup - > rsv_rfer = btrfs_qgroup_limit_rsv_rfer ( l , ptr ) ;
qgroup - > rsv_excl = btrfs_qgroup_limit_rsv_excl ( l , ptr ) ;
break ;
}
}
next1 :
ret = btrfs_next_item ( quota_root , path ) ;
if ( ret < 0 )
goto out ;
if ( ret )
break ;
}
btrfs_release_path ( path ) ;
/*
* pass 2 : read all qgroup relations
*/
key . objectid = 0 ;
key . type = BTRFS_QGROUP_RELATION_KEY ;
key . offset = 0 ;
ret = btrfs_search_slot_for_read ( quota_root , & key , path , 1 , 0 ) ;
if ( ret )
goto out ;
while ( 1 ) {
slot = path - > slots [ 0 ] ;
l = path - > nodes [ 0 ] ;
btrfs_item_key_to_cpu ( l , & found_key , slot ) ;
if ( found_key . type ! = BTRFS_QGROUP_RELATION_KEY )
goto next2 ;
if ( found_key . objectid > found_key . offset ) {
/* parent <- member, not needed to build config */
/* FIXME should we omit the key completely? */
goto next2 ;
}
ret = add_relation_rb ( fs_info , found_key . objectid ,
found_key . offset ) ;
2013-01-17 01:22:08 -07:00
if ( ret = = - ENOENT ) {
2013-12-20 11:37:06 -05:00
btrfs_warn ( fs_info ,
" orphan qgroup relation 0x%llx->0x%llx " ,
2013-08-20 13:20:07 +02:00
found_key . objectid , found_key . offset ) ;
2013-01-17 01:22:08 -07:00
ret = 0 ; /* ignore the error */
}
2012-06-28 18:03:02 +02:00
if ( ret )
goto out ;
next2 :
ret = btrfs_next_item ( quota_root , path ) ;
if ( ret < 0 )
goto out ;
if ( ret )
break ;
}
out :
fs_info - > qgroup_flags | = flags ;
2016-09-02 15:40:02 -04:00
if ( ! ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON ) )
clear_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) ;
else if ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN & &
ret > = 0 )
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
ret = qgroup_rescan_init ( fs_info , rescan_progress , 0 ) ;
2012-06-28 18:03:02 +02:00
btrfs_free_path ( path ) ;
2013-05-28 15:47:23 +00:00
if ( ret < 0 ) {
2013-05-06 11:03:27 +00:00
ulist_free ( fs_info - > qgroup_ulist ) ;
2013-05-28 15:47:23 +00:00
fs_info - > qgroup_ulist = NULL ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_RESCAN ;
2013-05-28 15:47:23 +00:00
}
2013-05-06 11:03:27 +00:00
2012-06-28 18:03:02 +02:00
return ret < 0 ? ret : 0 ;
}
/*
2013-08-14 09:13:37 +08:00
* This is called from close_ctree ( ) or open_ctree ( ) or btrfs_quota_disable ( ) ,
* first two are in single - threaded paths . And for the third one , we have set
* quota_root to be null with qgroup_lock held before , so it is safe to clean
* up the in - memory structures without qgroup_lock held .
2012-06-28 18:03:02 +02:00
*/
void btrfs_free_qgroup_config ( struct btrfs_fs_info * fs_info )
{
struct rb_node * n ;
struct btrfs_qgroup * qgroup ;
while ( ( n = rb_first ( & fs_info - > qgroup_tree ) ) ) {
qgroup = rb_entry ( n , struct btrfs_qgroup , node ) ;
rb_erase ( n , & fs_info - > qgroup_tree ) ;
2013-08-14 09:13:36 +08:00
__del_qgroup_rb ( qgroup ) ;
2012-06-28 18:03:02 +02:00
}
2013-07-13 21:02:54 +08:00
/*
* we call btrfs_free_qgroup_config ( ) when umounting
2016-05-19 21:18:45 -04:00
* filesystem and disabling quota , so we set qgroup_ulist
2013-07-13 21:02:54 +08:00
* to be null here to avoid double free .
*/
2013-05-06 11:03:27 +00:00
ulist_free ( fs_info - > qgroup_ulist ) ;
2013-07-13 21:02:54 +08:00
fs_info - > qgroup_ulist = NULL ;
2012-06-28 18:03:02 +02:00
}
static int add_qgroup_relation_item ( struct btrfs_trans_handle * trans ,
struct btrfs_root * quota_root ,
u64 src , u64 dst )
{
int ret ;
struct btrfs_path * path ;
struct btrfs_key key ;
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
key . objectid = src ;
key . type = BTRFS_QGROUP_RELATION_KEY ;
key . offset = dst ;
ret = btrfs_insert_empty_item ( trans , quota_root , path , & key , 0 ) ;
btrfs_mark_buffer_dirty ( path - > nodes [ 0 ] ) ;
btrfs_free_path ( path ) ;
return ret ;
}
static int del_qgroup_relation_item ( struct btrfs_trans_handle * trans ,
struct btrfs_root * quota_root ,
u64 src , u64 dst )
{
int ret ;
struct btrfs_path * path ;
struct btrfs_key key ;
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
key . objectid = src ;
key . type = BTRFS_QGROUP_RELATION_KEY ;
key . offset = dst ;
ret = btrfs_search_slot ( trans , quota_root , & key , path , - 1 , 1 ) ;
if ( ret < 0 )
goto out ;
if ( ret > 0 ) {
ret = - ENOENT ;
goto out ;
}
ret = btrfs_del_item ( trans , quota_root , path ) ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
static int add_qgroup_item ( struct btrfs_trans_handle * trans ,
struct btrfs_root * quota_root , u64 qgroupid )
{
int ret ;
struct btrfs_path * path ;
struct btrfs_qgroup_info_item * qgroup_info ;
struct btrfs_qgroup_limit_item * qgroup_limit ;
struct extent_buffer * leaf ;
struct btrfs_key key ;
2016-06-21 09:52:41 -04:00
if ( btrfs_is_testing ( quota_root - > fs_info ) )
2014-05-07 17:06:09 -04:00
return 0 ;
2014-09-29 23:53:21 +02:00
2012-06-28 18:03:02 +02:00
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
key . objectid = 0 ;
key . type = BTRFS_QGROUP_INFO_KEY ;
key . offset = qgroupid ;
2014-08-18 14:01:17 -07:00
/*
* Avoid a transaction abort by catching - EEXIST here . In that
* case , we proceed by re - initializing the existing structure
* on disk .
*/
2012-06-28 18:03:02 +02:00
ret = btrfs_insert_empty_item ( trans , quota_root , path , & key ,
sizeof ( * qgroup_info ) ) ;
2014-08-18 14:01:17 -07:00
if ( ret & & ret ! = - EEXIST )
2012-06-28 18:03:02 +02:00
goto out ;
leaf = path - > nodes [ 0 ] ;
qgroup_info = btrfs_item_ptr ( leaf , path - > slots [ 0 ] ,
struct btrfs_qgroup_info_item ) ;
btrfs_set_qgroup_info_generation ( leaf , qgroup_info , trans - > transid ) ;
btrfs_set_qgroup_info_rfer ( leaf , qgroup_info , 0 ) ;
btrfs_set_qgroup_info_rfer_cmpr ( leaf , qgroup_info , 0 ) ;
btrfs_set_qgroup_info_excl ( leaf , qgroup_info , 0 ) ;
btrfs_set_qgroup_info_excl_cmpr ( leaf , qgroup_info , 0 ) ;
btrfs_mark_buffer_dirty ( leaf ) ;
btrfs_release_path ( path ) ;
key . type = BTRFS_QGROUP_LIMIT_KEY ;
ret = btrfs_insert_empty_item ( trans , quota_root , path , & key ,
sizeof ( * qgroup_limit ) ) ;
2014-08-18 14:01:17 -07:00
if ( ret & & ret ! = - EEXIST )
2012-06-28 18:03:02 +02:00
goto out ;
leaf = path - > nodes [ 0 ] ;
qgroup_limit = btrfs_item_ptr ( leaf , path - > slots [ 0 ] ,
struct btrfs_qgroup_limit_item ) ;
btrfs_set_qgroup_limit_flags ( leaf , qgroup_limit , 0 ) ;
btrfs_set_qgroup_limit_max_rfer ( leaf , qgroup_limit , 0 ) ;
btrfs_set_qgroup_limit_max_excl ( leaf , qgroup_limit , 0 ) ;
btrfs_set_qgroup_limit_rsv_rfer ( leaf , qgroup_limit , 0 ) ;
btrfs_set_qgroup_limit_rsv_excl ( leaf , qgroup_limit , 0 ) ;
btrfs_mark_buffer_dirty ( leaf ) ;
ret = 0 ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
static int del_qgroup_item ( struct btrfs_trans_handle * trans ,
struct btrfs_root * quota_root , u64 qgroupid )
{
int ret ;
struct btrfs_path * path ;
struct btrfs_key key ;
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
key . objectid = 0 ;
key . type = BTRFS_QGROUP_INFO_KEY ;
key . offset = qgroupid ;
ret = btrfs_search_slot ( trans , quota_root , & key , path , - 1 , 1 ) ;
if ( ret < 0 )
goto out ;
if ( ret > 0 ) {
ret = - ENOENT ;
goto out ;
}
ret = btrfs_del_item ( trans , quota_root , path ) ;
if ( ret )
goto out ;
btrfs_release_path ( path ) ;
key . type = BTRFS_QGROUP_LIMIT_KEY ;
ret = btrfs_search_slot ( trans , quota_root , & key , path , - 1 , 1 ) ;
if ( ret < 0 )
goto out ;
if ( ret > 0 ) {
ret = - ENOENT ;
goto out ;
}
ret = btrfs_del_item ( trans , quota_root , path ) ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
static int update_qgroup_limit_item ( struct btrfs_trans_handle * trans ,
2014-11-20 21:01:41 -05:00
struct btrfs_root * root ,
struct btrfs_qgroup * qgroup )
2012-06-28 18:03:02 +02:00
{
struct btrfs_path * path ;
struct btrfs_key key ;
struct extent_buffer * l ;
struct btrfs_qgroup_limit_item * qgroup_limit ;
int ret ;
int slot ;
key . objectid = 0 ;
key . type = BTRFS_QGROUP_LIMIT_KEY ;
2014-11-20 21:01:41 -05:00
key . offset = qgroup - > qgroupid ;
2012-06-28 18:03:02 +02:00
path = btrfs_alloc_path ( ) ;
2013-02-27 11:20:56 +00:00
if ( ! path )
return - ENOMEM ;
2012-06-28 18:03:02 +02:00
ret = btrfs_search_slot ( trans , root , & key , path , 0 , 1 ) ;
if ( ret > 0 )
ret = - ENOENT ;
if ( ret )
goto out ;
l = path - > nodes [ 0 ] ;
slot = path - > slots [ 0 ] ;
2013-11-04 22:34:29 +01:00
qgroup_limit = btrfs_item_ptr ( l , slot , struct btrfs_qgroup_limit_item ) ;
2014-11-20 21:01:41 -05:00
btrfs_set_qgroup_limit_flags ( l , qgroup_limit , qgroup - > lim_flags ) ;
btrfs_set_qgroup_limit_max_rfer ( l , qgroup_limit , qgroup - > max_rfer ) ;
btrfs_set_qgroup_limit_max_excl ( l , qgroup_limit , qgroup - > max_excl ) ;
btrfs_set_qgroup_limit_rsv_rfer ( l , qgroup_limit , qgroup - > rsv_rfer ) ;
btrfs_set_qgroup_limit_rsv_excl ( l , qgroup_limit , qgroup - > rsv_excl ) ;
2012-06-28 18:03:02 +02:00
btrfs_mark_buffer_dirty ( l ) ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
static int update_qgroup_info_item ( struct btrfs_trans_handle * trans ,
struct btrfs_root * root ,
struct btrfs_qgroup * qgroup )
{
struct btrfs_path * path ;
struct btrfs_key key ;
struct extent_buffer * l ;
struct btrfs_qgroup_info_item * qgroup_info ;
int ret ;
int slot ;
2016-06-21 09:52:41 -04:00
if ( btrfs_is_testing ( root - > fs_info ) )
2014-05-07 17:06:09 -04:00
return 0 ;
2014-09-29 23:53:21 +02:00
2012-06-28 18:03:02 +02:00
key . objectid = 0 ;
key . type = BTRFS_QGROUP_INFO_KEY ;
key . offset = qgroup - > qgroupid ;
path = btrfs_alloc_path ( ) ;
2013-02-27 11:20:56 +00:00
if ( ! path )
return - ENOMEM ;
2012-06-28 18:03:02 +02:00
ret = btrfs_search_slot ( trans , root , & key , path , 0 , 1 ) ;
if ( ret > 0 )
ret = - ENOENT ;
if ( ret )
goto out ;
l = path - > nodes [ 0 ] ;
slot = path - > slots [ 0 ] ;
2013-11-04 22:34:29 +01:00
qgroup_info = btrfs_item_ptr ( l , slot , struct btrfs_qgroup_info_item ) ;
2012-06-28 18:03:02 +02:00
btrfs_set_qgroup_info_generation ( l , qgroup_info , trans - > transid ) ;
btrfs_set_qgroup_info_rfer ( l , qgroup_info , qgroup - > rfer ) ;
btrfs_set_qgroup_info_rfer_cmpr ( l , qgroup_info , qgroup - > rfer_cmpr ) ;
btrfs_set_qgroup_info_excl ( l , qgroup_info , qgroup - > excl ) ;
btrfs_set_qgroup_info_excl_cmpr ( l , qgroup_info , qgroup - > excl_cmpr ) ;
btrfs_mark_buffer_dirty ( l ) ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
static int update_qgroup_status_item ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info ,
struct btrfs_root * root )
{
struct btrfs_path * path ;
struct btrfs_key key ;
struct extent_buffer * l ;
struct btrfs_qgroup_status_item * ptr ;
int ret ;
int slot ;
key . objectid = 0 ;
key . type = BTRFS_QGROUP_STATUS_KEY ;
key . offset = 0 ;
path = btrfs_alloc_path ( ) ;
2013-02-27 11:20:56 +00:00
if ( ! path )
return - ENOMEM ;
2012-06-28 18:03:02 +02:00
ret = btrfs_search_slot ( trans , root , & key , path , 0 , 1 ) ;
if ( ret > 0 )
ret = - ENOENT ;
if ( ret )
goto out ;
l = path - > nodes [ 0 ] ;
slot = path - > slots [ 0 ] ;
ptr = btrfs_item_ptr ( l , slot , struct btrfs_qgroup_status_item ) ;
btrfs_set_qgroup_status_flags ( l , ptr , fs_info - > qgroup_flags ) ;
btrfs_set_qgroup_status_generation ( l , ptr , trans - > transid ) ;
2013-04-25 16:04:51 +00:00
btrfs_set_qgroup_status_rescan ( l , ptr ,
fs_info - > qgroup_rescan_progress . objectid ) ;
2012-06-28 18:03:02 +02:00
btrfs_mark_buffer_dirty ( l ) ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
/*
* called with qgroup_lock held
*/
static int btrfs_clean_quota_tree ( struct btrfs_trans_handle * trans ,
struct btrfs_root * root )
{
struct btrfs_path * path ;
struct btrfs_key key ;
2013-02-27 11:16:57 +00:00
struct extent_buffer * leaf = NULL ;
2012-06-28 18:03:02 +02:00
int ret ;
2013-02-27 11:16:57 +00:00
int nr = 0 ;
2012-06-28 18:03:02 +02:00
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
2013-02-27 11:16:57 +00:00
path - > leave_spinning = 1 ;
key . objectid = 0 ;
key . offset = 0 ;
key . type = 0 ;
2012-06-28 18:03:02 +02:00
2013-02-27 11:16:57 +00:00
while ( 1 ) {
2012-06-28 18:03:02 +02:00
ret = btrfs_search_slot ( trans , root , & key , path , - 1 , 1 ) ;
2013-02-27 11:16:57 +00:00
if ( ret < 0 )
goto out ;
leaf = path - > nodes [ 0 ] ;
nr = btrfs_header_nritems ( leaf ) ;
if ( ! nr )
2012-06-28 18:03:02 +02:00
break ;
2013-02-27 11:16:57 +00:00
/*
* delete the leaf one by one
* since the whole tree is going
* to be deleted .
*/
path - > slots [ 0 ] = 0 ;
ret = btrfs_del_items ( trans , root , path , 0 , nr ) ;
2012-06-28 18:03:02 +02:00
if ( ret )
goto out ;
2013-02-27 11:16:57 +00:00
2012-06-28 18:03:02 +02:00
btrfs_release_path ( path ) ;
}
ret = 0 ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
int btrfs_quota_enable ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info )
{
struct btrfs_root * quota_root ;
2013-04-07 10:24:57 +00:00
struct btrfs_root * tree_root = fs_info - > tree_root ;
2012-06-28 18:03:02 +02:00
struct btrfs_path * path = NULL ;
struct btrfs_qgroup_status_item * ptr ;
struct extent_buffer * leaf ;
struct btrfs_key key ;
2013-04-07 10:24:57 +00:00
struct btrfs_key found_key ;
struct btrfs_qgroup * qgroup = NULL ;
2012-06-28 18:03:02 +02:00
int ret = 0 ;
2013-04-07 10:24:57 +00:00
int slot ;
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
if ( fs_info - > quota_root ) {
2016-09-02 15:40:02 -04:00
set_bit ( BTRFS_FS_QUOTA_ENABLING , & fs_info - > flags ) ;
2012-06-28 18:03:02 +02:00
goto out ;
}
2017-02-13 11:03:44 +01:00
fs_info - > qgroup_ulist = ulist_alloc ( GFP_KERNEL ) ;
2013-05-06 11:03:27 +00:00
if ( ! fs_info - > qgroup_ulist ) {
ret = - ENOMEM ;
goto out ;
}
2012-06-28 18:03:02 +02:00
/*
* initially create the quota tree
*/
quota_root = btrfs_create_tree ( trans , fs_info ,
BTRFS_QUOTA_TREE_OBJECTID ) ;
if ( IS_ERR ( quota_root ) ) {
ret = PTR_ERR ( quota_root ) ;
goto out ;
}
path = btrfs_alloc_path ( ) ;
2012-10-16 05:44:21 +00:00
if ( ! path ) {
ret = - ENOMEM ;
goto out_free_root ;
}
2012-06-28 18:03:02 +02:00
key . objectid = 0 ;
key . type = BTRFS_QGROUP_STATUS_KEY ;
key . offset = 0 ;
ret = btrfs_insert_empty_item ( trans , quota_root , path , & key ,
sizeof ( * ptr ) ) ;
if ( ret )
2012-10-16 05:44:21 +00:00
goto out_free_path ;
2012-06-28 18:03:02 +02:00
leaf = path - > nodes [ 0 ] ;
ptr = btrfs_item_ptr ( leaf , path - > slots [ 0 ] ,
struct btrfs_qgroup_status_item ) ;
btrfs_set_qgroup_status_generation ( leaf , ptr , trans - > transid ) ;
btrfs_set_qgroup_status_version ( leaf , ptr , BTRFS_QGROUP_STATUS_VERSION ) ;
fs_info - > qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
btrfs_set_qgroup_status_flags ( leaf , ptr , fs_info - > qgroup_flags ) ;
2013-04-25 16:04:51 +00:00
btrfs_set_qgroup_status_rescan ( leaf , ptr , 0 ) ;
2012-06-28 18:03:02 +02:00
btrfs_mark_buffer_dirty ( leaf ) ;
2013-04-07 10:24:57 +00:00
key . objectid = 0 ;
key . type = BTRFS_ROOT_REF_KEY ;
key . offset = 0 ;
btrfs_release_path ( path ) ;
ret = btrfs_search_slot_for_read ( tree_root , & key , path , 1 , 0 ) ;
if ( ret > 0 )
goto out_add_root ;
if ( ret < 0 )
goto out_free_path ;
while ( 1 ) {
slot = path - > slots [ 0 ] ;
leaf = path - > nodes [ 0 ] ;
btrfs_item_key_to_cpu ( leaf , & found_key , slot ) ;
if ( found_key . type = = BTRFS_ROOT_REF_KEY ) {
ret = add_qgroup_item ( trans , quota_root ,
found_key . offset ) ;
if ( ret )
goto out_free_path ;
qgroup = add_qgroup_rb ( fs_info , found_key . offset ) ;
if ( IS_ERR ( qgroup ) ) {
ret = PTR_ERR ( qgroup ) ;
goto out_free_path ;
}
}
ret = btrfs_next_item ( tree_root , path ) ;
if ( ret < 0 )
goto out_free_path ;
if ( ret )
break ;
}
out_add_root :
btrfs_release_path ( path ) ;
ret = add_qgroup_item ( trans , quota_root , BTRFS_FS_TREE_OBJECTID ) ;
if ( ret )
goto out_free_path ;
qgroup = add_qgroup_rb ( fs_info , BTRFS_FS_TREE_OBJECTID ) ;
if ( IS_ERR ( qgroup ) ) {
ret = PTR_ERR ( qgroup ) ;
goto out_free_path ;
}
2013-04-07 10:50:17 +00:00
spin_lock ( & fs_info - > qgroup_lock ) ;
2012-06-28 18:03:02 +02:00
fs_info - > quota_root = quota_root ;
2016-09-02 15:40:02 -04:00
set_bit ( BTRFS_FS_QUOTA_ENABLING , & fs_info - > flags ) ;
2012-06-28 18:03:02 +02:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
2012-10-16 05:44:21 +00:00
out_free_path :
2012-06-28 18:03:02 +02:00
btrfs_free_path ( path ) ;
2012-10-16 05:44:21 +00:00
out_free_root :
if ( ret ) {
free_extent_buffer ( quota_root - > node ) ;
free_extent_buffer ( quota_root - > commit_root ) ;
kfree ( quota_root ) ;
}
out :
2013-05-28 15:47:23 +00:00
if ( ret ) {
2013-05-06 11:03:27 +00:00
ulist_free ( fs_info - > qgroup_ulist ) ;
2013-05-28 15:47:23 +00:00
fs_info - > qgroup_ulist = NULL ;
}
2013-04-07 10:50:16 +00:00
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
int btrfs_quota_disable ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info )
{
struct btrfs_root * quota_root ;
int ret = 0 ;
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
2013-04-07 10:50:17 +00:00
if ( ! fs_info - > quota_root )
2013-04-07 10:50:16 +00:00
goto out ;
2016-09-02 15:40:02 -04:00
clear_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) ;
2016-08-08 22:08:06 -04:00
btrfs_qgroup_wait_for_completion ( fs_info , false ) ;
2015-11-06 10:36:42 -08:00
spin_lock ( & fs_info - > qgroup_lock ) ;
2012-06-28 18:03:02 +02:00
quota_root = fs_info - > quota_root ;
fs_info - > quota_root = NULL ;
2015-02-27 16:24:26 +08:00
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_ON ;
2012-06-28 18:03:02 +02:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
2013-08-14 09:13:37 +08:00
btrfs_free_qgroup_config ( fs_info ) ;
2012-06-28 18:03:02 +02:00
ret = btrfs_clean_quota_tree ( trans , quota_root ) ;
if ( ret )
goto out ;
2017-08-17 10:25:11 -04:00
ret = btrfs_del_root ( trans , fs_info , & quota_root - > root_key ) ;
2012-06-28 18:03:02 +02:00
if ( ret )
goto out ;
list_del ( & quota_root - > dirty_list ) ;
btrfs_tree_lock ( quota_root - > node ) ;
2017-02-10 18:47:57 +01:00
clean_tree_block ( fs_info , quota_root - > node ) ;
2012-06-28 18:03:02 +02:00
btrfs_tree_unlock ( quota_root - > node ) ;
btrfs_free_tree_block ( trans , quota_root , quota_root - > node , 0 , 1 ) ;
free_extent_buffer ( quota_root - > node ) ;
free_extent_buffer ( quota_root - > commit_root ) ;
kfree ( quota_root ) ;
out :
2013-04-07 10:50:16 +00:00
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
2013-04-25 16:04:51 +00:00
static void qgroup_dirty ( struct btrfs_fs_info * fs_info ,
struct btrfs_qgroup * qgroup )
2012-06-28 18:03:02 +02:00
{
2013-04-25 16:04:51 +00:00
if ( list_empty ( & qgroup - > dirty ) )
list_add ( & qgroup - > dirty , & fs_info - > dirty_qgroups ) ;
2012-06-28 18:03:02 +02:00
}
2016-10-20 10:28:41 +08:00
static void report_reserved_underflow ( struct btrfs_fs_info * fs_info ,
struct btrfs_qgroup * qgroup ,
u64 num_bytes )
{
2017-04-18 17:00:12 +02:00
# ifdef CONFIG_BTRFS_DEBUG
WARN_ON ( qgroup - > reserved < num_bytes ) ;
btrfs_debug ( fs_info ,
2016-10-20 10:28:41 +08:00
" qgroup %llu reserved space underflow, have: %llu, to free: %llu " ,
qgroup - > qgroupid , qgroup - > reserved , num_bytes ) ;
2017-04-18 17:00:12 +02:00
# endif
2016-10-20 10:28:41 +08:00
qgroup - > reserved = 0 ;
}
2015-02-27 16:24:27 +08:00
/*
* The easy accounting , if we are adding / removing the only ref for an extent
2016-05-19 21:18:45 -04:00
* then this qgroup and all of the parent qgroups get their reference and
2015-02-27 16:24:27 +08:00
* exclusive counts adjusted .
*
* Caller should hold fs_info - > qgroup_lock .
*/
static int __qgroup_excl_accounting ( struct btrfs_fs_info * fs_info ,
struct ulist * tmp , u64 ref_root ,
u64 num_bytes , int sign )
{
struct btrfs_qgroup * qgroup ;
struct btrfs_qgroup_list * glist ;
struct ulist_node * unode ;
struct ulist_iterator uiter ;
int ret = 0 ;
qgroup = find_qgroup_rb ( fs_info , ref_root ) ;
if ( ! qgroup )
goto out ;
qgroup - > rfer + = sign * num_bytes ;
qgroup - > rfer_cmpr + = sign * num_bytes ;
WARN_ON ( sign < 0 & & qgroup - > excl < num_bytes ) ;
qgroup - > excl + = sign * num_bytes ;
qgroup - > excl_cmpr + = sign * num_bytes ;
2016-10-20 10:28:41 +08:00
if ( sign > 0 ) {
2017-03-13 15:52:08 +08:00
trace_qgroup_update_reserve ( fs_info , qgroup , - ( s64 ) num_bytes ) ;
2017-04-18 17:00:12 +02:00
if ( qgroup - > reserved < num_bytes )
2016-10-20 10:28:41 +08:00
report_reserved_underflow ( fs_info , qgroup , num_bytes ) ;
else
qgroup - > reserved - = num_bytes ;
}
2015-02-27 16:24:27 +08:00
qgroup_dirty ( fs_info , qgroup ) ;
/* Get all of the parent groups that contain this qgroup */
list_for_each_entry ( glist , & qgroup - > groups , next_group ) {
ret = ulist_add ( tmp , glist - > group - > qgroupid ,
2016-10-26 16:23:50 +02:00
qgroup_to_aux ( glist - > group ) , GFP_ATOMIC ) ;
2015-02-27 16:24:27 +08:00
if ( ret < 0 )
goto out ;
}
/* Iterate all of the parents and adjust their reference counts */
ULIST_ITER_INIT ( & uiter ) ;
while ( ( unode = ulist_next ( tmp , & uiter ) ) ) {
2016-10-26 16:23:50 +02:00
qgroup = unode_aux_to_qgroup ( unode ) ;
2015-02-27 16:24:27 +08:00
qgroup - > rfer + = sign * num_bytes ;
qgroup - > rfer_cmpr + = sign * num_bytes ;
WARN_ON ( sign < 0 & & qgroup - > excl < num_bytes ) ;
qgroup - > excl + = sign * num_bytes ;
2016-10-20 10:28:41 +08:00
if ( sign > 0 ) {
2017-03-13 15:52:08 +08:00
trace_qgroup_update_reserve ( fs_info , qgroup ,
- ( s64 ) num_bytes ) ;
2017-04-18 17:00:12 +02:00
if ( qgroup - > reserved < num_bytes )
2016-10-20 10:28:41 +08:00
report_reserved_underflow ( fs_info , qgroup ,
num_bytes ) ;
else
qgroup - > reserved - = num_bytes ;
}
2015-02-27 16:24:27 +08:00
qgroup - > excl_cmpr + = sign * num_bytes ;
qgroup_dirty ( fs_info , qgroup ) ;
/* Add any parents of the parents */
list_for_each_entry ( glist , & qgroup - > groups , next_group ) {
ret = ulist_add ( tmp , glist - > group - > qgroupid ,
2016-10-26 16:23:50 +02:00
qgroup_to_aux ( glist - > group ) , GFP_ATOMIC ) ;
2015-02-27 16:24:27 +08:00
if ( ret < 0 )
goto out ;
}
}
ret = 0 ;
out :
return ret ;
}
/*
* Quick path for updating qgroup with only excl refs .
*
* In that case , just update all parent will be enough .
* Or we needs to do a full rescan .
* Caller should also hold fs_info - > qgroup_lock .
*
* Return 0 for quick update , return > 0 for need to full rescan
* and mark INCONSISTENT flag .
* Return < 0 for other error .
*/
static int quick_update_accounting ( struct btrfs_fs_info * fs_info ,
struct ulist * tmp , u64 src , u64 dst ,
int sign )
{
struct btrfs_qgroup * qgroup ;
int ret = 1 ;
int err = 0 ;
qgroup = find_qgroup_rb ( fs_info , src ) ;
if ( ! qgroup )
goto out ;
if ( qgroup - > excl = = qgroup - > rfer ) {
ret = 0 ;
err = __qgroup_excl_accounting ( fs_info , tmp , dst ,
qgroup - > excl , sign ) ;
if ( err < 0 ) {
ret = err ;
goto out ;
}
}
out :
if ( ret )
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
return ret ;
}
2012-06-28 18:03:02 +02:00
int btrfs_add_qgroup_relation ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info , u64 src , u64 dst )
{
struct btrfs_root * quota_root ;
2013-04-07 10:50:18 +00:00
struct btrfs_qgroup * parent ;
struct btrfs_qgroup * member ;
2013-04-17 14:49:51 +00:00
struct btrfs_qgroup_list * list ;
2015-02-27 16:24:27 +08:00
struct ulist * tmp ;
2012-06-28 18:03:02 +02:00
int ret = 0 ;
2015-02-27 16:24:22 +08:00
/* Check the level of src and dst first */
if ( btrfs_qgroup_level ( src ) > = btrfs_qgroup_level ( dst ) )
return - EINVAL ;
2017-02-13 12:41:02 +01:00
tmp = ulist_alloc ( GFP_KERNEL ) ;
2015-05-02 17:19:55 +02:00
if ( ! tmp )
return - ENOMEM ;
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
quota_root = fs_info - > quota_root ;
2013-04-07 10:50:16 +00:00
if ( ! quota_root ) {
ret = - EINVAL ;
goto out ;
}
2013-04-07 10:50:18 +00:00
member = find_qgroup_rb ( fs_info , src ) ;
parent = find_qgroup_rb ( fs_info , dst ) ;
if ( ! member | | ! parent ) {
ret = - EINVAL ;
goto out ;
}
2012-06-28 18:03:02 +02:00
2013-04-17 14:49:51 +00:00
/* check if such qgroup relation exist firstly */
list_for_each_entry ( list , & member - > groups , next_group ) {
if ( list - > group = = parent ) {
ret = - EEXIST ;
goto out ;
}
}
2012-06-28 18:03:02 +02:00
ret = add_qgroup_relation_item ( trans , quota_root , src , dst ) ;
if ( ret )
2013-04-07 10:50:16 +00:00
goto out ;
2012-06-28 18:03:02 +02:00
ret = add_qgroup_relation_item ( trans , quota_root , dst , src ) ;
if ( ret ) {
del_qgroup_relation_item ( trans , quota_root , src , dst ) ;
2013-04-07 10:50:16 +00:00
goto out ;
2012-06-28 18:03:02 +02:00
}
spin_lock ( & fs_info - > qgroup_lock ) ;
2016-06-22 18:54:23 -04:00
ret = add_relation_rb ( fs_info , src , dst ) ;
2015-02-27 16:24:27 +08:00
if ( ret < 0 ) {
spin_unlock ( & fs_info - > qgroup_lock ) ;
goto out ;
}
ret = quick_update_accounting ( fs_info , tmp , src , dst , 1 ) ;
2012-06-28 18:03:02 +02:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
2013-04-07 10:50:16 +00:00
out :
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2015-02-27 16:24:27 +08:00
ulist_free ( tmp ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
2017-02-13 13:00:51 +01:00
static int __del_qgroup_relation ( struct btrfs_trans_handle * trans ,
2012-06-28 18:03:02 +02:00
struct btrfs_fs_info * fs_info , u64 src , u64 dst )
{
struct btrfs_root * quota_root ;
2013-04-17 14:49:51 +00:00
struct btrfs_qgroup * parent ;
struct btrfs_qgroup * member ;
struct btrfs_qgroup_list * list ;
2015-02-27 16:24:27 +08:00
struct ulist * tmp ;
2012-06-28 18:03:02 +02:00
int ret = 0 ;
int err ;
2017-02-13 12:41:02 +01:00
tmp = ulist_alloc ( GFP_KERNEL ) ;
2015-02-27 16:24:27 +08:00
if ( ! tmp )
return - ENOMEM ;
2012-06-28 18:03:02 +02:00
quota_root = fs_info - > quota_root ;
2013-04-07 10:50:16 +00:00
if ( ! quota_root ) {
ret = - EINVAL ;
goto out ;
}
2012-06-28 18:03:02 +02:00
2013-04-17 14:49:51 +00:00
member = find_qgroup_rb ( fs_info , src ) ;
parent = find_qgroup_rb ( fs_info , dst ) ;
if ( ! member | | ! parent ) {
ret = - EINVAL ;
goto out ;
}
/* check if such qgroup relation exist firstly */
list_for_each_entry ( list , & member - > groups , next_group ) {
if ( list - > group = = parent )
goto exist ;
}
ret = - ENOENT ;
goto out ;
exist :
2012-06-28 18:03:02 +02:00
ret = del_qgroup_relation_item ( trans , quota_root , src , dst ) ;
err = del_qgroup_relation_item ( trans , quota_root , dst , src ) ;
if ( err & & ! ret )
ret = err ;
spin_lock ( & fs_info - > qgroup_lock ) ;
del_relation_rb ( fs_info , src , dst ) ;
2015-02-27 16:24:27 +08:00
ret = quick_update_accounting ( fs_info , tmp , src , dst , - 1 ) ;
2012-06-28 18:03:02 +02:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
2013-04-07 10:50:16 +00:00
out :
2015-02-27 16:24:27 +08:00
ulist_free ( tmp ) ;
2014-11-24 10:27:09 -05:00
return ret ;
}
int btrfs_del_qgroup_relation ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info , u64 src , u64 dst )
{
int ret = 0 ;
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
ret = __del_qgroup_relation ( trans , fs_info , src , dst ) ;
2013-04-07 10:50:16 +00:00
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2014-11-24 10:27:09 -05:00
2012-06-28 18:03:02 +02:00
return ret ;
}
int btrfs_create_qgroup ( struct btrfs_trans_handle * trans ,
2015-01-18 10:59:23 -05:00
struct btrfs_fs_info * fs_info , u64 qgroupid )
2012-06-28 18:03:02 +02:00
{
struct btrfs_root * quota_root ;
struct btrfs_qgroup * qgroup ;
int ret = 0 ;
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
quota_root = fs_info - > quota_root ;
2013-04-07 10:50:16 +00:00
if ( ! quota_root ) {
ret = - EINVAL ;
goto out ;
}
2013-04-17 14:49:51 +00:00
qgroup = find_qgroup_rb ( fs_info , qgroupid ) ;
if ( qgroup ) {
ret = - EEXIST ;
goto out ;
}
2012-06-28 18:03:02 +02:00
ret = add_qgroup_item ( trans , quota_root , qgroupid ) ;
2013-04-17 14:49:51 +00:00
if ( ret )
goto out ;
2012-06-28 18:03:02 +02:00
spin_lock ( & fs_info - > qgroup_lock ) ;
qgroup = add_qgroup_rb ( fs_info , qgroupid ) ;
spin_unlock ( & fs_info - > qgroup_lock ) ;
if ( IS_ERR ( qgroup ) )
ret = PTR_ERR ( qgroup ) ;
2013-04-07 10:50:16 +00:00
out :
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
int btrfs_remove_qgroup ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info , u64 qgroupid )
{
struct btrfs_root * quota_root ;
2013-01-17 01:22:09 -07:00
struct btrfs_qgroup * qgroup ;
2014-11-24 10:27:09 -05:00
struct btrfs_qgroup_list * list ;
2012-06-28 18:03:02 +02:00
int ret = 0 ;
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
quota_root = fs_info - > quota_root ;
2013-04-07 10:50:16 +00:00
if ( ! quota_root ) {
ret = - EINVAL ;
goto out ;
}
2012-06-28 18:03:02 +02:00
2013-01-17 01:22:09 -07:00
qgroup = find_qgroup_rb ( fs_info , qgroupid ) ;
2013-04-17 14:49:51 +00:00
if ( ! qgroup ) {
ret = - ENOENT ;
goto out ;
} else {
2014-11-24 10:27:09 -05:00
/* check if there are no children of this qgroup */
if ( ! list_empty ( & qgroup - > members ) ) {
2013-04-07 10:50:16 +00:00
ret = - EBUSY ;
goto out ;
2013-01-17 01:22:09 -07:00
}
}
2012-06-28 18:03:02 +02:00
ret = del_qgroup_item ( trans , quota_root , qgroupid ) ;
2017-09-17 09:02:29 +00:00
if ( ret & & ret ! = - ENOENT )
goto out ;
2012-06-28 18:03:02 +02:00
2014-11-24 10:27:09 -05:00
while ( ! list_empty ( & qgroup - > groups ) ) {
list = list_first_entry ( & qgroup - > groups ,
struct btrfs_qgroup_list , next_group ) ;
ret = __del_qgroup_relation ( trans , fs_info ,
qgroupid ,
list - > group - > qgroupid ) ;
if ( ret )
goto out ;
}
2012-06-28 18:03:02 +02:00
spin_lock ( & fs_info - > qgroup_lock ) ;
2016-06-22 18:54:23 -04:00
del_qgroup_rb ( fs_info , qgroupid ) ;
2012-06-28 18:03:02 +02:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
2013-04-07 10:50:16 +00:00
out :
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
int btrfs_limit_qgroup ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info , u64 qgroupid ,
struct btrfs_qgroup_limit * limit )
{
2013-04-07 10:50:16 +00:00
struct btrfs_root * quota_root ;
2012-06-28 18:03:02 +02:00
struct btrfs_qgroup * qgroup ;
int ret = 0 ;
2015-06-03 14:57:32 +08:00
/* Sometimes we would want to clear the limit on this qgroup.
* To meet this requirement , we treat the - 1 as a special value
* which tell kernel to clear the limit on this qgroup .
*/
const u64 CLEAR_VALUE = - 1 ;
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
quota_root = fs_info - > quota_root ;
if ( ! quota_root ) {
ret = - EINVAL ;
goto out ;
}
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:20 +00:00
qgroup = find_qgroup_rb ( fs_info , qgroupid ) ;
if ( ! qgroup ) {
ret = - ENOENT ;
goto out ;
}
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:17 +00:00
spin_lock ( & fs_info - > qgroup_lock ) ;
2015-06-03 14:57:32 +08:00
if ( limit - > flags & BTRFS_QGROUP_LIMIT_MAX_RFER ) {
if ( limit - > max_rfer = = CLEAR_VALUE ) {
qgroup - > lim_flags & = ~ BTRFS_QGROUP_LIMIT_MAX_RFER ;
limit - > flags & = ~ BTRFS_QGROUP_LIMIT_MAX_RFER ;
qgroup - > max_rfer = 0 ;
} else {
qgroup - > max_rfer = limit - > max_rfer ;
}
}
if ( limit - > flags & BTRFS_QGROUP_LIMIT_MAX_EXCL ) {
if ( limit - > max_excl = = CLEAR_VALUE ) {
qgroup - > lim_flags & = ~ BTRFS_QGROUP_LIMIT_MAX_EXCL ;
limit - > flags & = ~ BTRFS_QGROUP_LIMIT_MAX_EXCL ;
qgroup - > max_excl = 0 ;
} else {
qgroup - > max_excl = limit - > max_excl ;
}
}
if ( limit - > flags & BTRFS_QGROUP_LIMIT_RSV_RFER ) {
if ( limit - > rsv_rfer = = CLEAR_VALUE ) {
qgroup - > lim_flags & = ~ BTRFS_QGROUP_LIMIT_RSV_RFER ;
limit - > flags & = ~ BTRFS_QGROUP_LIMIT_RSV_RFER ;
qgroup - > rsv_rfer = 0 ;
} else {
qgroup - > rsv_rfer = limit - > rsv_rfer ;
}
}
if ( limit - > flags & BTRFS_QGROUP_LIMIT_RSV_EXCL ) {
if ( limit - > rsv_excl = = CLEAR_VALUE ) {
qgroup - > lim_flags & = ~ BTRFS_QGROUP_LIMIT_RSV_EXCL ;
limit - > flags & = ~ BTRFS_QGROUP_LIMIT_RSV_EXCL ;
qgroup - > rsv_excl = 0 ;
} else {
qgroup - > rsv_excl = limit - > rsv_excl ;
}
}
2015-02-06 11:06:25 -05:00
qgroup - > lim_flags | = limit - > flags ;
2012-06-28 18:03:02 +02:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
2014-11-20 21:01:41 -05:00
ret = update_qgroup_limit_item ( trans , quota_root , qgroup ) ;
if ( ret ) {
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
btrfs_info ( fs_info , " unable to update quota limit for %llu " ,
qgroupid ) ;
}
2013-04-07 10:50:16 +00:00
out :
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
2014-07-17 12:39:01 -07:00
2016-10-18 09:31:27 +08:00
int btrfs_qgroup_trace_extent_nolock ( struct btrfs_fs_info * fs_info ,
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
struct btrfs_delayed_ref_root * delayed_refs ,
struct btrfs_qgroup_extent_record * record )
2015-04-16 14:34:17 +08:00
{
struct rb_node * * p = & delayed_refs - > dirty_extent_root . rb_node ;
struct rb_node * parent_node = NULL ;
struct btrfs_qgroup_extent_record * entry ;
u64 bytenr = record - > bytenr ;
2015-11-05 14:38:00 -08:00
assert_spin_locked ( & delayed_refs - > lock ) ;
2016-10-18 09:31:27 +08:00
trace_btrfs_qgroup_trace_extent ( fs_info , record ) ;
2015-11-05 14:38:00 -08:00
2015-04-16 14:34:17 +08:00
while ( * p ) {
parent_node = * p ;
entry = rb_entry ( parent_node , struct btrfs_qgroup_extent_record ,
node ) ;
if ( bytenr < entry - > bytenr )
p = & ( * p ) - > rb_left ;
else if ( bytenr > entry - > bytenr )
p = & ( * p ) - > rb_right ;
else
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
return 1 ;
2015-04-16 14:34:17 +08:00
}
rb_link_node ( & record - > node , parent_node , p ) ;
rb_insert_color ( & record - > node , & delayed_refs - > dirty_extent_root ) ;
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
return 0 ;
}
2017-02-15 10:43:03 +08:00
int btrfs_qgroup_trace_extent_post ( struct btrfs_fs_info * fs_info ,
struct btrfs_qgroup_extent_record * qrecord )
{
struct ulist * old_root ;
u64 bytenr = qrecord - > bytenr ;
int ret ;
ret = btrfs_find_all_roots ( NULL , fs_info , bytenr , 0 , & old_root ) ;
if ( ret < 0 )
return ret ;
/*
* Here we don ' t need to get the lock of
* trans - > transaction - > delayed_refs , since inserted qrecord won ' t
* be deleted , only qrecord - > node may be modified ( new qrecord insert )
*
* So modifying qrecord - > old_roots is safe here
*/
qrecord - > old_roots = old_root ;
return 0 ;
}
2016-10-18 09:31:27 +08:00
int btrfs_qgroup_trace_extent ( struct btrfs_trans_handle * trans ,
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
struct btrfs_fs_info * fs_info , u64 bytenr , u64 num_bytes ,
gfp_t gfp_flag )
{
struct btrfs_qgroup_extent_record * record ;
struct btrfs_delayed_ref_root * delayed_refs ;
int ret ;
2016-09-02 15:40:02 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags )
| | bytenr = = 0 | | num_bytes = = 0 )
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
return 0 ;
if ( WARN_ON ( trans = = NULL ) )
return - EINVAL ;
record = kmalloc ( sizeof ( * record ) , gfp_flag ) ;
if ( ! record )
return - ENOMEM ;
delayed_refs = & trans - > transaction - > delayed_refs ;
record - > bytenr = bytenr ;
record - > num_bytes = num_bytes ;
record - > old_roots = NULL ;
spin_lock ( & delayed_refs - > lock ) ;
2016-06-22 18:54:24 -04:00
ret = btrfs_qgroup_trace_extent_nolock ( fs_info , delayed_refs , record ) ;
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
spin_unlock ( & delayed_refs - > lock ) ;
2017-02-15 10:43:03 +08:00
if ( ret > 0 ) {
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions:
1. btrfs_qgroup_insert_dirty_extent_nolock()
Almost the same with original code.
For delayed_ref usage, which has delayed refs locked.
Change the return value type to int, since caller never needs the
pointer, but only needs to know if they need to free the allocated
memory.
2. btrfs_qgroup_insert_dirty_extent()
The more encapsulated version.
Will do the delayed_refs lock, memory allocation, quota enabled check
and other things.
The original design is to keep exported functions to minimal, but since
more btrfs hacks exposed, like replacing path in balance, we need to
record dirty extents manually, so we have to add such functions.
Also, add comment for both functions, to info developers how to keep
qgroup correct when doing hacks.
Cc: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
2016-08-15 10:36:50 +08:00
kfree ( record ) ;
2017-02-15 10:43:03 +08:00
return 0 ;
}
return btrfs_qgroup_trace_extent_post ( fs_info , record ) ;
2015-04-16 14:34:17 +08:00
}
2016-10-18 09:31:28 +08:00
int btrfs_qgroup_trace_leaf_items ( struct btrfs_trans_handle * trans ,
2016-06-22 18:54:24 -04:00
struct btrfs_fs_info * fs_info ,
2016-10-18 09:31:28 +08:00
struct extent_buffer * eb )
{
int nr = btrfs_header_nritems ( eb ) ;
int i , extent_type , ret ;
struct btrfs_key key ;
struct btrfs_file_extent_item * fi ;
u64 bytenr , num_bytes ;
/* We can be called directly from walk_up_proc() */
2016-06-22 18:54:23 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) )
2016-10-18 09:31:28 +08:00
return 0 ;
for ( i = 0 ; i < nr ; i + + ) {
btrfs_item_key_to_cpu ( eb , & key , i ) ;
if ( key . type ! = BTRFS_EXTENT_DATA_KEY )
continue ;
fi = btrfs_item_ptr ( eb , i , struct btrfs_file_extent_item ) ;
/* filter out non qgroup-accountable extents */
extent_type = btrfs_file_extent_type ( eb , fi ) ;
if ( extent_type = = BTRFS_FILE_EXTENT_INLINE )
continue ;
bytenr = btrfs_file_extent_disk_bytenr ( eb , fi ) ;
if ( ! bytenr )
continue ;
num_bytes = btrfs_file_extent_disk_num_bytes ( eb , fi ) ;
2016-06-22 18:54:23 -04:00
ret = btrfs_qgroup_trace_extent ( trans , fs_info , bytenr ,
num_bytes , GFP_NOFS ) ;
2016-10-18 09:31:28 +08:00
if ( ret )
return ret ;
}
2017-06-20 08:15:26 -04:00
cond_resched ( ) ;
2016-10-18 09:31:28 +08:00
return 0 ;
}
/*
* Walk up the tree from the bottom , freeing leaves and any interior
* nodes which have had all slots visited . If a node ( leaf or
* interior ) is freed , the node above it will have it ' s slot
* incremented . The root node will never be freed .
*
* At the end of this function , we should have a path which has all
* slots incremented to the next position for a search . If we need to
* read a new node it will be NULL and the node above it will have the
* correct slot selected for a later read .
*
* If we increment the root nodes slot counter past the number of
* elements , 1 is returned to signal completion of the search .
*/
2017-02-10 20:30:23 +01:00
static int adjust_slots_upwards ( struct btrfs_path * path , int root_level )
2016-10-18 09:31:28 +08:00
{
int level = 0 ;
int nr , slot ;
struct extent_buffer * eb ;
if ( root_level = = 0 )
return 1 ;
while ( level < = root_level ) {
eb = path - > nodes [ level ] ;
nr = btrfs_header_nritems ( eb ) ;
path - > slots [ level ] + + ;
slot = path - > slots [ level ] ;
if ( slot > = nr | | level = = 0 ) {
/*
* Don ' t free the root - we will detect this
* condition after our loop and return a
* positive value for caller to stop walking the tree .
*/
if ( level ! = root_level ) {
btrfs_tree_unlock_rw ( eb , path - > locks [ level ] ) ;
path - > locks [ level ] = 0 ;
free_extent_buffer ( eb ) ;
path - > nodes [ level ] = NULL ;
path - > slots [ level ] = 0 ;
}
} else {
/*
* We have a valid slot to walk back down
* from . Stop here so caller can process these
* new nodes .
*/
break ;
}
level + + ;
}
eb = path - > nodes [ root_level ] ;
if ( path - > slots [ root_level ] > = btrfs_header_nritems ( eb ) )
return 1 ;
return 0 ;
}
int btrfs_qgroup_trace_subtree ( struct btrfs_trans_handle * trans ,
struct btrfs_root * root ,
struct extent_buffer * root_eb ,
u64 root_gen , int root_level )
{
2016-06-22 18:54:23 -04:00
struct btrfs_fs_info * fs_info = root - > fs_info ;
2016-10-18 09:31:28 +08:00
int ret = 0 ;
int level ;
struct extent_buffer * eb = root_eb ;
struct btrfs_path * path = NULL ;
2017-07-12 09:42:19 +03:00
BUG_ON ( root_level < 0 | | root_level > = BTRFS_MAX_LEVEL ) ;
2016-10-18 09:31:28 +08:00
BUG_ON ( root_eb = = NULL ) ;
2016-06-22 18:54:23 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) )
2016-10-18 09:31:28 +08:00
return 0 ;
if ( ! extent_buffer_uptodate ( root_eb ) ) {
ret = btrfs_read_buffer ( root_eb , root_gen ) ;
if ( ret )
goto out ;
}
if ( root_level = = 0 ) {
2016-06-22 18:54:24 -04:00
ret = btrfs_qgroup_trace_leaf_items ( trans , fs_info , root_eb ) ;
2016-10-18 09:31:28 +08:00
goto out ;
}
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
/*
* Walk down the tree . Missing extent blocks are filled in as
* we go . Metadata is accounted every time we read a new
* extent block .
*
* When we reach a leaf , we account for file extent items in it ,
* walk back up the tree ( adjusting slot pointers as we go )
* and restart the search process .
*/
extent_buffer_get ( root_eb ) ; /* For path */
path - > nodes [ root_level ] = root_eb ;
path - > slots [ root_level ] = 0 ;
path - > locks [ root_level ] = 0 ; /* so release_path doesn't try to unlock */
walk_down :
level = root_level ;
while ( level > = 0 ) {
if ( path - > nodes [ level ] = = NULL ) {
int parent_slot ;
u64 child_gen ;
u64 child_bytenr ;
/*
* We need to get child blockptr / gen from parent before
* we can read it .
*/
eb = path - > nodes [ level + 1 ] ;
parent_slot = path - > slots [ level + 1 ] ;
child_bytenr = btrfs_node_blockptr ( eb , parent_slot ) ;
child_gen = btrfs_node_ptr_generation ( eb , parent_slot ) ;
2016-06-22 18:54:24 -04:00
eb = read_tree_block ( fs_info , child_bytenr , child_gen ) ;
2016-10-18 09:31:28 +08:00
if ( IS_ERR ( eb ) ) {
ret = PTR_ERR ( eb ) ;
goto out ;
} else if ( ! extent_buffer_uptodate ( eb ) ) {
free_extent_buffer ( eb ) ;
ret = - EIO ;
goto out ;
}
path - > nodes [ level ] = eb ;
path - > slots [ level ] = 0 ;
btrfs_tree_read_lock ( eb ) ;
btrfs_set_lock_blocking_rw ( eb , BTRFS_READ_LOCK ) ;
path - > locks [ level ] = BTRFS_READ_LOCK_BLOCKING ;
2016-06-22 18:54:23 -04:00
ret = btrfs_qgroup_trace_extent ( trans , fs_info ,
child_bytenr ,
fs_info - > nodesize ,
GFP_NOFS ) ;
2016-10-18 09:31:28 +08:00
if ( ret )
goto out ;
}
if ( level = = 0 ) {
2016-06-22 18:54:24 -04:00
ret = btrfs_qgroup_trace_leaf_items ( trans , fs_info ,
path - > nodes [ level ] ) ;
2016-10-18 09:31:28 +08:00
if ( ret )
goto out ;
/* Nonzero return here means we completed our search */
2017-02-10 20:30:23 +01:00
ret = adjust_slots_upwards ( path , root_level ) ;
2016-10-18 09:31:28 +08:00
if ( ret )
break ;
/* Restart search with new slots */
goto walk_down ;
}
level - - ;
}
ret = 0 ;
out :
btrfs_free_path ( path ) ;
return ret ;
}
2015-04-12 16:52:34 +08:00
# define UPDATE_NEW 0
# define UPDATE_OLD 1
/*
* Walk all of the roots that points to the bytenr and adjust their refcnts .
*/
static int qgroup_update_refcnt ( struct btrfs_fs_info * fs_info ,
struct ulist * roots , struct ulist * tmp ,
struct ulist * qgroups , u64 seq , int update_old )
{
struct ulist_node * unode ;
struct ulist_iterator uiter ;
struct ulist_node * tmp_unode ;
struct ulist_iterator tmp_uiter ;
struct btrfs_qgroup * qg ;
int ret = 0 ;
if ( ! roots )
return 0 ;
ULIST_ITER_INIT ( & uiter ) ;
while ( ( unode = ulist_next ( roots , & uiter ) ) ) {
qg = find_qgroup_rb ( fs_info , unode - > val ) ;
if ( ! qg )
continue ;
ulist_reinit ( tmp ) ;
2016-10-26 16:23:50 +02:00
ret = ulist_add ( qgroups , qg - > qgroupid , qgroup_to_aux ( qg ) ,
2015-04-12 16:52:34 +08:00
GFP_ATOMIC ) ;
if ( ret < 0 )
return ret ;
2016-10-26 16:23:50 +02:00
ret = ulist_add ( tmp , qg - > qgroupid , qgroup_to_aux ( qg ) , GFP_ATOMIC ) ;
2015-04-12 16:52:34 +08:00
if ( ret < 0 )
return ret ;
ULIST_ITER_INIT ( & tmp_uiter ) ;
while ( ( tmp_unode = ulist_next ( tmp , & tmp_uiter ) ) ) {
struct btrfs_qgroup_list * glist ;
2016-10-26 16:23:50 +02:00
qg = unode_aux_to_qgroup ( tmp_unode ) ;
2015-04-12 16:52:34 +08:00
if ( update_old )
btrfs_qgroup_update_old_refcnt ( qg , seq , 1 ) ;
else
btrfs_qgroup_update_new_refcnt ( qg , seq , 1 ) ;
list_for_each_entry ( glist , & qg - > groups , next_group ) {
ret = ulist_add ( qgroups , glist - > group - > qgroupid ,
2016-10-26 16:23:50 +02:00
qgroup_to_aux ( glist - > group ) ,
2015-04-12 16:52:34 +08:00
GFP_ATOMIC ) ;
if ( ret < 0 )
return ret ;
ret = ulist_add ( tmp , glist - > group - > qgroupid ,
2016-10-26 16:23:50 +02:00
qgroup_to_aux ( glist - > group ) ,
2015-04-12 16:52:34 +08:00
GFP_ATOMIC ) ;
if ( ret < 0 )
return ret ;
}
}
}
return 0 ;
}
2015-04-12 16:59:57 +08:00
/*
* Update qgroup rfer / excl counters .
* Rfer update is easy , codes can explain themselves .
2015-04-17 10:23:16 +08:00
*
2015-04-12 16:59:57 +08:00
* Excl update is tricky , the update is split into 2 part .
* Part 1 : Possible exclusive < - > sharing detect :
* | A | ! A |
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* B | * | - |
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* ! B | + | * * |
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* Conditions :
* A : cur_old_roots < nr_old_roots ( not exclusive before )
* ! A : cur_old_roots = = nr_old_roots ( possible exclusive before )
* B : cur_new_roots < nr_new_roots ( not exclusive now )
2016-05-19 21:18:45 -04:00
* ! B : cur_new_roots = = nr_new_roots ( possible exclusive now )
2015-04-12 16:59:57 +08:00
*
* Results :
* + : Possible sharing - > exclusive - : Possible exclusive - > sharing
* * : Definitely not changed . * * : Possible unchanged .
*
* For ! A and ! B condition , the exception is cur_old / new_roots = = 0 case .
*
* To make the logic clear , we first use condition A and B to split
* combination into 4 results .
*
* Then , for result " + " and " - " , check old / new_roots = = 0 case , as in them
* only on variant maybe 0.
*
* Lastly , check result * * , since there are 2 variants maybe 0 , split them
* again ( 2 x2 ) .
* But this time we don ' t need to consider other things , the codes and logic
* is easy to understand now .
*/
static int qgroup_update_counters ( struct btrfs_fs_info * fs_info ,
struct ulist * qgroups ,
u64 nr_old_roots ,
u64 nr_new_roots ,
u64 num_bytes , u64 seq )
{
struct ulist_node * unode ;
struct ulist_iterator uiter ;
struct btrfs_qgroup * qg ;
u64 cur_new_count , cur_old_count ;
ULIST_ITER_INIT ( & uiter ) ;
while ( ( unode = ulist_next ( qgroups , & uiter ) ) ) {
bool dirty = false ;
2016-10-26 16:23:50 +02:00
qg = unode_aux_to_qgroup ( unode ) ;
2015-04-12 16:59:57 +08:00
cur_old_count = btrfs_qgroup_get_old_refcnt ( qg , seq ) ;
cur_new_count = btrfs_qgroup_get_new_refcnt ( qg , seq ) ;
2016-06-09 17:27:55 -04:00
trace_qgroup_update_counters ( fs_info , qg - > qgroupid ,
cur_old_count , cur_new_count ) ;
2016-03-29 17:19:55 -07:00
2015-04-12 16:59:57 +08:00
/* Rfer update part */
if ( cur_old_count = = 0 & & cur_new_count > 0 ) {
qg - > rfer + = num_bytes ;
qg - > rfer_cmpr + = num_bytes ;
dirty = true ;
}
if ( cur_old_count > 0 & & cur_new_count = = 0 ) {
qg - > rfer - = num_bytes ;
qg - > rfer_cmpr - = num_bytes ;
dirty = true ;
}
/* Excl update part */
/* Exclusive/none -> shared case */
if ( cur_old_count = = nr_old_roots & &
cur_new_count < nr_new_roots ) {
/* Exclusive -> shared */
if ( cur_old_count ! = 0 ) {
qg - > excl - = num_bytes ;
qg - > excl_cmpr - = num_bytes ;
dirty = true ;
}
}
/* Shared -> exclusive/none case */
if ( cur_old_count < nr_old_roots & &
cur_new_count = = nr_new_roots ) {
/* Shared->exclusive */
if ( cur_new_count ! = 0 ) {
qg - > excl + = num_bytes ;
qg - > excl_cmpr + = num_bytes ;
dirty = true ;
}
}
/* Exclusive/none -> exclusive/none case */
if ( cur_old_count = = nr_old_roots & &
cur_new_count = = nr_new_roots ) {
if ( cur_old_count = = 0 ) {
/* None -> exclusive/none */
if ( cur_new_count ! = 0 ) {
/* None -> exclusive */
qg - > excl + = num_bytes ;
qg - > excl_cmpr + = num_bytes ;
dirty = true ;
}
/* None -> none, nothing changed */
} else {
/* Exclusive -> exclusive/none */
if ( cur_new_count = = 0 ) {
/* Exclusive -> none */
qg - > excl - = num_bytes ;
qg - > excl_cmpr - = num_bytes ;
dirty = true ;
}
/* Exclusive -> exclusive, nothing changed */
}
}
2015-08-03 14:44:29 +08:00
2015-04-12 16:59:57 +08:00
if ( dirty )
qgroup_dirty ( fs_info , qg ) ;
}
return 0 ;
}
2017-02-27 15:10:34 +08:00
/*
* Check if the @ roots potentially is a list of fs tree roots
*
* Return 0 for definitely not a fs / subvol tree roots ulist
* Return 1 for possible fs / subvol tree roots in the list ( considering an empty
* one as well )
*/
static int maybe_fs_roots ( struct ulist * roots )
{
struct ulist_node * unode ;
struct ulist_iterator uiter ;
/* Empty one, still possible for fs roots */
if ( ! roots | | roots - > nnodes = = 0 )
return 1 ;
ULIST_ITER_INIT ( & uiter ) ;
unode = ulist_next ( roots , & uiter ) ;
if ( ! unode )
return 1 ;
/*
* If it contains fs tree roots , then it must belong to fs / subvol
* trees .
* If it contains a non - fs tree , it won ' t be shared with fs / subvol trees .
*/
return is_fstree ( unode - > val ) ;
}
2015-04-16 17:18:36 +08:00
int
2015-04-16 15:37:33 +08:00
btrfs_qgroup_account_extent ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info ,
u64 bytenr , u64 num_bytes ,
struct ulist * old_roots , struct ulist * new_roots )
{
struct ulist * qgroups = NULL ;
struct ulist * tmp = NULL ;
u64 seq ;
u64 nr_new_roots = 0 ;
u64 nr_old_roots = 0 ;
int ret = 0 ;
2017-02-13 14:05:24 +01:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) )
return 0 ;
2017-02-27 15:10:34 +08:00
if ( new_roots ) {
if ( ! maybe_fs_roots ( new_roots ) )
goto out_free ;
2015-04-16 15:37:33 +08:00
nr_new_roots = new_roots - > nnodes ;
2017-02-27 15:10:34 +08:00
}
if ( old_roots ) {
if ( ! maybe_fs_roots ( old_roots ) )
goto out_free ;
2015-04-16 15:37:33 +08:00
nr_old_roots = old_roots - > nnodes ;
2017-02-27 15:10:34 +08:00
}
/* Quick exit, either not fs tree roots, or won't affect any qgroup */
if ( nr_old_roots = = 0 & & nr_new_roots = = 0 )
goto out_free ;
2015-04-16 15:37:33 +08:00
BUG_ON ( ! fs_info - > quota_root ) ;
2016-06-09 17:27:55 -04:00
trace_btrfs_qgroup_account_extent ( fs_info , bytenr , num_bytes ,
nr_old_roots , nr_new_roots ) ;
2016-03-29 17:19:55 -07:00
2015-04-16 15:37:33 +08:00
qgroups = ulist_alloc ( GFP_NOFS ) ;
if ( ! qgroups ) {
ret = - ENOMEM ;
goto out_free ;
}
tmp = ulist_alloc ( GFP_NOFS ) ;
if ( ! tmp ) {
ret = - ENOMEM ;
goto out_free ;
}
mutex_lock ( & fs_info - > qgroup_rescan_lock ) ;
if ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN ) {
if ( fs_info - > qgroup_rescan_progress . objectid < = bytenr ) {
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
ret = 0 ;
goto out_free ;
}
}
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
spin_lock ( & fs_info - > qgroup_lock ) ;
seq = fs_info - > qgroup_seq ;
/* Update old refcnts using old_roots */
ret = qgroup_update_refcnt ( fs_info , old_roots , tmp , qgroups , seq ,
UPDATE_OLD ) ;
if ( ret < 0 )
goto out ;
/* Update new refcnts using new_roots */
ret = qgroup_update_refcnt ( fs_info , new_roots , tmp , qgroups , seq ,
UPDATE_NEW ) ;
if ( ret < 0 )
goto out ;
qgroup_update_counters ( fs_info , qgroups , nr_old_roots , nr_new_roots ,
num_bytes , seq ) ;
/*
* Bump qgroup_seq to avoid seq overlap
*/
fs_info - > qgroup_seq + = max ( nr_old_roots , nr_new_roots ) + 1 ;
out :
spin_unlock ( & fs_info - > qgroup_lock ) ;
out_free :
ulist_free ( tmp ) ;
ulist_free ( qgroups ) ;
ulist_free ( old_roots ) ;
ulist_free ( new_roots ) ;
return ret ;
}
int btrfs_qgroup_account_extents ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info )
{
struct btrfs_qgroup_extent_record * record ;
struct btrfs_delayed_ref_root * delayed_refs ;
struct ulist * new_roots = NULL ;
struct rb_node * node ;
2015-04-20 09:53:50 +08:00
u64 qgroup_to_skip ;
2015-04-16 15:37:33 +08:00
int ret = 0 ;
delayed_refs = & trans - > transaction - > delayed_refs ;
2015-04-20 09:53:50 +08:00
qgroup_to_skip = delayed_refs - > qgroup_to_skip ;
2015-04-16 15:37:33 +08:00
while ( ( node = rb_first ( & delayed_refs - > dirty_extent_root ) ) ) {
record = rb_entry ( node , struct btrfs_qgroup_extent_record ,
node ) ;
2016-06-09 17:27:55 -04:00
trace_btrfs_qgroup_account_extents ( fs_info , record ) ;
2016-03-29 17:19:55 -07:00
2015-04-16 15:37:33 +08:00
if ( ! ret ) {
2017-02-27 15:10:35 +08:00
/*
* Old roots should be searched when inserting qgroup
* extent record
*/
if ( WARN_ON ( ! record - > old_roots ) ) {
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots ( NULL , fs_info ,
record - > bytenr , 0 ,
& record - > old_roots ) ;
if ( ret < 0 )
goto cleanup ;
}
2015-04-16 15:37:33 +08:00
/*
2017-03-16 10:04:34 -06:00
* Use SEQ_LAST as time_seq to do special search , which
2015-04-16 15:37:33 +08:00
* doesn ' t lock tree or delayed_refs and search current
* root . It ' s safe inside commit_transaction ( ) .
*/
ret = btrfs_find_all_roots ( trans , fs_info ,
2017-03-16 10:04:34 -06:00
record - > bytenr , SEQ_LAST , & new_roots ) ;
2015-04-16 15:37:33 +08:00
if ( ret < 0 )
goto cleanup ;
2017-02-27 15:10:35 +08:00
if ( qgroup_to_skip ) {
2015-04-20 09:53:50 +08:00
ulist_del ( new_roots , qgroup_to_skip , 0 ) ;
2017-02-27 15:10:35 +08:00
ulist_del ( record - > old_roots , qgroup_to_skip ,
0 ) ;
}
2015-04-16 15:37:33 +08:00
ret = btrfs_qgroup_account_extent ( trans , fs_info ,
record - > bytenr , record - > num_bytes ,
record - > old_roots , new_roots ) ;
record - > old_roots = NULL ;
new_roots = NULL ;
}
cleanup :
ulist_free ( record - > old_roots ) ;
ulist_free ( new_roots ) ;
new_roots = NULL ;
rb_erase ( node , & delayed_refs - > dirty_extent_root ) ;
kfree ( record ) ;
}
return ret ;
}
2012-06-28 18:03:02 +02:00
/*
* called from commit_transaction . Writes all changed qgroups to disk .
*/
int btrfs_run_qgroups ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info )
{
struct btrfs_root * quota_root = fs_info - > quota_root ;
int ret = 0 ;
2013-04-25 16:04:52 +00:00
int start_rescan_worker = 0 ;
2012-06-28 18:03:02 +02:00
if ( ! quota_root )
goto out ;
2016-09-02 15:40:02 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) & &
test_bit ( BTRFS_FS_QUOTA_ENABLING , & fs_info - > flags ) )
2013-04-25 16:04:52 +00:00
start_rescan_worker = 1 ;
2016-09-02 15:40:02 -04:00
if ( test_and_clear_bit ( BTRFS_FS_QUOTA_ENABLING , & fs_info - > flags ) )
set_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) ;
2012-06-28 18:03:02 +02:00
spin_lock ( & fs_info - > qgroup_lock ) ;
while ( ! list_empty ( & fs_info - > dirty_qgroups ) ) {
struct btrfs_qgroup * qgroup ;
qgroup = list_first_entry ( & fs_info - > dirty_qgroups ,
struct btrfs_qgroup , dirty ) ;
list_del_init ( & qgroup - > dirty ) ;
spin_unlock ( & fs_info - > qgroup_lock ) ;
ret = update_qgroup_info_item ( trans , quota_root , qgroup ) ;
2014-11-20 21:04:56 -05:00
if ( ret )
fs_info - > qgroup_flags | =
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
ret = update_qgroup_limit_item ( trans , quota_root , qgroup ) ;
2012-06-28 18:03:02 +02:00
if ( ret )
fs_info - > qgroup_flags | =
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
spin_lock ( & fs_info - > qgroup_lock ) ;
}
2016-09-02 15:40:02 -04:00
if ( test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) )
2012-06-28 18:03:02 +02:00
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_ON ;
else
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_ON ;
spin_unlock ( & fs_info - > qgroup_lock ) ;
ret = update_qgroup_status_item ( trans , fs_info , quota_root ) ;
if ( ret )
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
2013-04-25 16:04:52 +00:00
if ( ! ret & & start_rescan_worker ) {
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
ret = qgroup_rescan_init ( fs_info , 0 , 1 ) ;
if ( ! ret ) {
qgroup_rescan_zero_tracking ( fs_info ) ;
2014-02-28 10:46:16 +08:00
btrfs_queue_work ( fs_info - > qgroup_rescan_workers ,
& fs_info - > qgroup_rescan_work ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
}
2013-04-25 16:04:52 +00:00
ret = 0 ;
}
2012-06-28 18:03:02 +02:00
out :
return ret ;
}
/*
2016-05-19 21:18:45 -04:00
* Copy the accounting information between qgroups . This is necessary
2016-03-30 17:57:48 -07:00
* when a snapshot or a subvolume is created . Throwing an error will
* cause a transaction abort so we take extra care here to only error
* when a readonly fs is a reasonable outcome .
2012-06-28 18:03:02 +02:00
*/
int btrfs_qgroup_inherit ( struct btrfs_trans_handle * trans ,
struct btrfs_fs_info * fs_info , u64 srcid , u64 objectid ,
struct btrfs_qgroup_inherit * inherit )
{
int ret = 0 ;
int i ;
u64 * i_qgroups ;
struct btrfs_root * quota_root = fs_info - > quota_root ;
struct btrfs_qgroup * srcgroup ;
struct btrfs_qgroup * dstgroup ;
u32 level_size = 0 ;
2013-04-07 10:50:19 +00:00
u64 nums ;
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:16 +00:00
mutex_lock ( & fs_info - > qgroup_ioctl_lock ) ;
2016-09-02 15:40:02 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) )
2013-04-07 10:50:16 +00:00
goto out ;
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:16 +00:00
if ( ! quota_root ) {
ret = - EINVAL ;
goto out ;
}
2012-06-28 18:03:02 +02:00
2013-04-07 10:50:19 +00:00
if ( inherit ) {
i_qgroups = ( u64 * ) ( inherit + 1 ) ;
nums = inherit - > num_qgroups + 2 * inherit - > num_ref_copies +
2 * inherit - > num_excl_copies ;
for ( i = 0 ; i < nums ; + + i ) {
srcgroup = find_qgroup_rb ( fs_info , * i_qgroups ) ;
2014-11-11 07:18:22 -05:00
2016-03-30 17:57:48 -07:00
/*
* Zero out invalid groups so we can ignore
* them later .
*/
if ( ! srcgroup | |
( ( srcgroup - > qgroupid > > 48 ) < = ( objectid > > 48 ) ) )
* i_qgroups = 0ULL ;
2013-04-07 10:50:19 +00:00
+ + i_qgroups ;
}
}
2012-06-28 18:03:02 +02:00
/*
* create a tracking group for the subvol itself
*/
ret = add_qgroup_item ( trans , quota_root , objectid ) ;
if ( ret )
goto out ;
if ( srcid ) {
struct btrfs_root * srcroot ;
struct btrfs_key srckey ;
srckey . objectid = srcid ;
srckey . type = BTRFS_ROOT_ITEM_KEY ;
srckey . offset = ( u64 ) - 1 ;
srcroot = btrfs_read_fs_root_no_name ( fs_info , & srckey ) ;
if ( IS_ERR ( srcroot ) ) {
ret = PTR_ERR ( srcroot ) ;
goto out ;
}
2016-06-22 18:54:23 -04:00
level_size = fs_info - > nodesize ;
2012-06-28 18:03:02 +02:00
}
/*
* add qgroup to all inherited groups
*/
if ( inherit ) {
i_qgroups = ( u64 * ) ( inherit + 1 ) ;
2016-03-30 17:57:48 -07:00
for ( i = 0 ; i < inherit - > num_qgroups ; + + i , + + i_qgroups ) {
if ( * i_qgroups = = 0 )
continue ;
2012-06-28 18:03:02 +02:00
ret = add_qgroup_relation_item ( trans , quota_root ,
objectid , * i_qgroups ) ;
2016-03-30 17:57:48 -07:00
if ( ret & & ret ! = - EEXIST )
2012-06-28 18:03:02 +02:00
goto out ;
ret = add_qgroup_relation_item ( trans , quota_root ,
* i_qgroups , objectid ) ;
2016-03-30 17:57:48 -07:00
if ( ret & & ret ! = - EEXIST )
2012-06-28 18:03:02 +02:00
goto out ;
}
2016-03-30 17:57:48 -07:00
ret = 0 ;
2012-06-28 18:03:02 +02:00
}
spin_lock ( & fs_info - > qgroup_lock ) ;
dstgroup = add_qgroup_rb ( fs_info , objectid ) ;
2012-07-30 02:15:43 -06:00
if ( IS_ERR ( dstgroup ) ) {
ret = PTR_ERR ( dstgroup ) ;
2012-06-28 18:03:02 +02:00
goto unlock ;
2012-07-30 02:15:43 -06:00
}
2012-06-28 18:03:02 +02:00
2014-11-20 20:58:34 -05:00
if ( inherit & & inherit - > flags & BTRFS_QGROUP_INHERIT_SET_LIMITS ) {
dstgroup - > lim_flags = inherit - > lim . flags ;
dstgroup - > max_rfer = inherit - > lim . max_rfer ;
dstgroup - > max_excl = inherit - > lim . max_excl ;
dstgroup - > rsv_rfer = inherit - > lim . rsv_rfer ;
dstgroup - > rsv_excl = inherit - > lim . rsv_excl ;
2014-11-20 21:01:41 -05:00
ret = update_qgroup_limit_item ( trans , quota_root , dstgroup ) ;
if ( ret ) {
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
2016-09-20 10:05:00 -04:00
btrfs_info ( fs_info ,
" unable to update quota limit for %llu " ,
dstgroup - > qgroupid ) ;
2014-11-20 21:01:41 -05:00
goto unlock ;
}
2014-11-20 20:58:34 -05:00
}
2012-06-28 18:03:02 +02:00
if ( srcid ) {
srcgroup = find_qgroup_rb ( fs_info , srcid ) ;
2012-09-14 20:06:30 -04:00
if ( ! srcgroup )
2012-06-28 18:03:02 +02:00
goto unlock ;
2014-05-13 17:30:47 -07:00
/*
* We call inherit after we clone the root in order to make sure
* our counts don ' t go crazy , so at this point the only
* difference between the two roots should be the root node .
*/
dstgroup - > rfer = srcgroup - > rfer ;
dstgroup - > rfer_cmpr = srcgroup - > rfer_cmpr ;
dstgroup - > excl = level_size ;
dstgroup - > excl_cmpr = level_size ;
2012-06-28 18:03:02 +02:00
srcgroup - > excl = level_size ;
srcgroup - > excl_cmpr = level_size ;
2014-11-20 20:14:38 -05:00
/* inherit the limit info */
dstgroup - > lim_flags = srcgroup - > lim_flags ;
dstgroup - > max_rfer = srcgroup - > max_rfer ;
dstgroup - > max_excl = srcgroup - > max_excl ;
dstgroup - > rsv_rfer = srcgroup - > rsv_rfer ;
dstgroup - > rsv_excl = srcgroup - > rsv_excl ;
2012-06-28 18:03:02 +02:00
qgroup_dirty ( fs_info , dstgroup ) ;
qgroup_dirty ( fs_info , srcgroup ) ;
}
2012-09-14 20:06:30 -04:00
if ( ! inherit )
2012-06-28 18:03:02 +02:00
goto unlock ;
i_qgroups = ( u64 * ) ( inherit + 1 ) ;
for ( i = 0 ; i < inherit - > num_qgroups ; + + i ) {
2016-03-30 17:57:48 -07:00
if ( * i_qgroups ) {
2016-06-22 18:54:23 -04:00
ret = add_relation_rb ( fs_info , objectid , * i_qgroups ) ;
2016-03-30 17:57:48 -07:00
if ( ret )
goto unlock ;
}
2012-06-28 18:03:02 +02:00
+ + i_qgroups ;
}
2016-03-30 17:57:48 -07:00
for ( i = 0 ; i < inherit - > num_ref_copies ; + + i , i_qgroups + = 2 ) {
2012-06-28 18:03:02 +02:00
struct btrfs_qgroup * src ;
struct btrfs_qgroup * dst ;
2016-03-30 17:57:48 -07:00
if ( ! i_qgroups [ 0 ] | | ! i_qgroups [ 1 ] )
continue ;
2012-06-28 18:03:02 +02:00
src = find_qgroup_rb ( fs_info , i_qgroups [ 0 ] ) ;
dst = find_qgroup_rb ( fs_info , i_qgroups [ 1 ] ) ;
if ( ! src | | ! dst ) {
ret = - EINVAL ;
goto unlock ;
}
dst - > rfer = src - > rfer - level_size ;
dst - > rfer_cmpr = src - > rfer_cmpr - level_size ;
}
2016-03-30 17:57:48 -07:00
for ( i = 0 ; i < inherit - > num_excl_copies ; + + i , i_qgroups + = 2 ) {
2012-06-28 18:03:02 +02:00
struct btrfs_qgroup * src ;
struct btrfs_qgroup * dst ;
2016-03-30 17:57:48 -07:00
if ( ! i_qgroups [ 0 ] | | ! i_qgroups [ 1 ] )
continue ;
2012-06-28 18:03:02 +02:00
src = find_qgroup_rb ( fs_info , i_qgroups [ 0 ] ) ;
dst = find_qgroup_rb ( fs_info , i_qgroups [ 1 ] ) ;
if ( ! src | | ! dst ) {
ret = - EINVAL ;
goto unlock ;
}
dst - > excl = src - > excl + level_size ;
dst - > excl_cmpr = src - > excl_cmpr + level_size ;
}
unlock :
spin_unlock ( & fs_info - > qgroup_lock ) ;
out :
2013-04-07 10:50:16 +00:00
mutex_unlock ( & fs_info - > qgroup_ioctl_lock ) ;
2012-06-28 18:03:02 +02:00
return ret ;
}
2017-01-25 09:50:33 -05:00
static bool qgroup_check_limits ( const struct btrfs_qgroup * qg , u64 num_bytes )
{
if ( ( qg - > lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER ) & &
qg - > reserved + ( s64 ) qg - > rfer + num_bytes > qg - > max_rfer )
return false ;
if ( ( qg - > lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL ) & &
qg - > reserved + ( s64 ) qg - > excl + num_bytes > qg - > max_excl )
return false ;
return true ;
}
static int qgroup_reserve ( struct btrfs_root * root , u64 num_bytes , bool enforce )
2012-06-28 18:03:02 +02:00
{
struct btrfs_root * quota_root ;
struct btrfs_qgroup * qgroup ;
struct btrfs_fs_info * fs_info = root - > fs_info ;
u64 ref_root = root - > root_key . objectid ;
int ret = 0 ;
2017-03-27 12:29:57 -05:00
int retried = 0 ;
2012-06-28 18:03:02 +02:00
struct ulist_node * unode ;
struct ulist_iterator uiter ;
if ( ! is_fstree ( ref_root ) )
return 0 ;
if ( num_bytes = = 0 )
return 0 ;
2017-05-11 21:17:33 +00:00
if ( test_bit ( BTRFS_FS_QUOTA_OVERRIDE , & fs_info - > flags ) & &
capable ( CAP_SYS_RESOURCE ) )
enforce = false ;
2017-03-27 12:29:57 -05:00
retry :
2012-06-28 18:03:02 +02:00
spin_lock ( & fs_info - > qgroup_lock ) ;
quota_root = fs_info - > quota_root ;
if ( ! quota_root )
goto out ;
qgroup = find_qgroup_rb ( fs_info , ref_root ) ;
if ( ! qgroup )
goto out ;
/*
* in a first step , we check all affected qgroups if any limits would
* be exceeded
*/
2013-05-06 11:03:27 +00:00
ulist_reinit ( fs_info - > qgroup_ulist ) ;
ret = ulist_add ( fs_info - > qgroup_ulist , qgroup - > qgroupid ,
2013-04-17 14:00:36 +00:00
( uintptr_t ) qgroup , GFP_ATOMIC ) ;
if ( ret < 0 )
goto out ;
2012-06-28 18:03:02 +02:00
ULIST_ITER_INIT ( & uiter ) ;
2013-05-06 11:03:27 +00:00
while ( ( unode = ulist_next ( fs_info - > qgroup_ulist , & uiter ) ) ) {
2012-06-28 18:03:02 +02:00
struct btrfs_qgroup * qg ;
struct btrfs_qgroup_list * glist ;
2016-10-26 16:23:50 +02:00
qg = unode_aux_to_qgroup ( unode ) ;
2012-06-28 18:03:02 +02:00
2017-01-25 09:50:33 -05:00
if ( enforce & & ! qgroup_check_limits ( qg , num_bytes ) ) {
2017-03-27 12:29:57 -05:00
/*
* Commit the tree and retry , since we may have
* deletions which would free up space .
*/
if ( ! retried & & qg - > reserved > 0 ) {
struct btrfs_trans_handle * trans ;
spin_unlock ( & fs_info - > qgroup_lock ) ;
ret = btrfs_start_delalloc_inodes ( root , 0 ) ;
if ( ret )
return ret ;
2017-06-23 09:48:21 -07:00
btrfs_wait_ordered_extents ( root , U64_MAX , 0 , ( u64 ) - 1 ) ;
2017-03-27 12:29:57 -05:00
trans = btrfs_join_transaction ( root ) ;
if ( IS_ERR ( trans ) )
return PTR_ERR ( trans ) ;
ret = btrfs_commit_transaction ( trans ) ;
if ( ret )
return ret ;
retried + + ;
goto retry ;
}
2012-06-28 18:03:02 +02:00
ret = - EDQUOT ;
2013-03-06 11:51:47 +00:00
goto out ;
}
2012-06-28 18:03:02 +02:00
list_for_each_entry ( glist , & qg - > groups , next_group ) {
2013-05-06 11:03:27 +00:00
ret = ulist_add ( fs_info - > qgroup_ulist ,
glist - > group - > qgroupid ,
2013-04-17 14:00:36 +00:00
( uintptr_t ) glist - > group , GFP_ATOMIC ) ;
if ( ret < 0 )
goto out ;
2012-06-28 18:03:02 +02:00
}
}
2013-04-17 14:00:36 +00:00
ret = 0 ;
2012-06-28 18:03:02 +02:00
/*
* no limits exceeded , now record the reservation into all qgroups
*/
ULIST_ITER_INIT ( & uiter ) ;
2013-05-06 11:03:27 +00:00
while ( ( unode = ulist_next ( fs_info - > qgroup_ulist , & uiter ) ) ) {
2012-06-28 18:03:02 +02:00
struct btrfs_qgroup * qg ;
2016-10-26 16:23:50 +02:00
qg = unode_aux_to_qgroup ( unode ) ;
2012-06-28 18:03:02 +02:00
2017-03-13 15:52:08 +08:00
trace_qgroup_update_reserve ( fs_info , qg , num_bytes ) ;
2015-02-06 10:26:52 -05:00
qg - > reserved + = num_bytes ;
2012-06-28 18:03:02 +02:00
}
out :
spin_unlock ( & fs_info - > qgroup_lock ) ;
return ret ;
}
2015-09-08 17:08:37 +08:00
void btrfs_qgroup_free_refroot ( struct btrfs_fs_info * fs_info ,
u64 ref_root , u64 num_bytes )
2012-06-28 18:03:02 +02:00
{
struct btrfs_root * quota_root ;
struct btrfs_qgroup * qgroup ;
struct ulist_node * unode ;
struct ulist_iterator uiter ;
2013-04-17 14:00:36 +00:00
int ret = 0 ;
2012-06-28 18:03:02 +02:00
if ( ! is_fstree ( ref_root ) )
return ;
if ( num_bytes = = 0 )
return ;
spin_lock ( & fs_info - > qgroup_lock ) ;
quota_root = fs_info - > quota_root ;
if ( ! quota_root )
goto out ;
qgroup = find_qgroup_rb ( fs_info , ref_root ) ;
if ( ! qgroup )
goto out ;
2013-05-06 11:03:27 +00:00
ulist_reinit ( fs_info - > qgroup_ulist ) ;
ret = ulist_add ( fs_info - > qgroup_ulist , qgroup - > qgroupid ,
2013-04-17 14:00:36 +00:00
( uintptr_t ) qgroup , GFP_ATOMIC ) ;
if ( ret < 0 )
goto out ;
2012-06-28 18:03:02 +02:00
ULIST_ITER_INIT ( & uiter ) ;
2013-05-06 11:03:27 +00:00
while ( ( unode = ulist_next ( fs_info - > qgroup_ulist , & uiter ) ) ) {
2012-06-28 18:03:02 +02:00
struct btrfs_qgroup * qg ;
struct btrfs_qgroup_list * glist ;
2016-10-26 16:23:50 +02:00
qg = unode_aux_to_qgroup ( unode ) ;
2012-06-28 18:03:02 +02:00
2017-03-13 15:52:08 +08:00
trace_qgroup_update_reserve ( fs_info , qg , - ( s64 ) num_bytes ) ;
2017-04-18 17:00:12 +02:00
if ( qg - > reserved < num_bytes )
2016-10-20 10:28:41 +08:00
report_reserved_underflow ( fs_info , qg , num_bytes ) ;
else
qg - > reserved - = num_bytes ;
2012-06-28 18:03:02 +02:00
list_for_each_entry ( glist , & qg - > groups , next_group ) {
2013-05-06 11:03:27 +00:00
ret = ulist_add ( fs_info - > qgroup_ulist ,
glist - > group - > qgroupid ,
2013-04-17 14:00:36 +00:00
( uintptr_t ) glist - > group , GFP_ATOMIC ) ;
if ( ret < 0 )
goto out ;
2012-06-28 18:03:02 +02:00
}
}
out :
spin_unlock ( & fs_info - > qgroup_lock ) ;
}
2013-04-25 16:04:51 +00:00
/*
* returns < 0 on error , 0 when more leafs are to be scanned .
2015-02-27 16:24:24 +08:00
* returns 1 when done .
2013-04-25 16:04:51 +00:00
*/
static int
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
qgroup_rescan_leaf ( struct btrfs_fs_info * fs_info , struct btrfs_path * path ,
2015-10-26 09:19:43 +08:00
struct btrfs_trans_handle * trans )
2013-04-25 16:04:51 +00:00
{
struct btrfs_key found ;
2015-10-26 09:19:43 +08:00
struct extent_buffer * scratch_leaf = NULL ;
2013-04-25 16:04:51 +00:00
struct ulist * roots = NULL ;
2015-02-25 15:47:32 +01:00
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT ( tree_mod_seq_elem ) ;
2014-05-13 17:30:47 -07:00
u64 num_bytes ;
2013-04-25 16:04:51 +00:00
int slot ;
int ret ;
mutex_lock ( & fs_info - > qgroup_rescan_lock ) ;
ret = btrfs_search_slot_for_read ( fs_info - > extent_root ,
& fs_info - > qgroup_rescan_progress ,
path , 1 , 0 ) ;
2016-09-20 10:05:02 -04:00
btrfs_debug ( fs_info ,
" current progress key (%llu %u %llu), search_slot ret %d " ,
fs_info - > qgroup_rescan_progress . objectid ,
fs_info - > qgroup_rescan_progress . type ,
fs_info - > qgroup_rescan_progress . offset , ret ) ;
2013-04-25 16:04:51 +00:00
if ( ret ) {
/*
* The rescan is about to end , we will not be scanning any
* further blocks . We cannot unset the RESCAN flag here , because
* we want to commit the transaction if everything went well .
* To make the live accounting work in this phase , we set our
* scan progress pointer such that every real extent objectid
* will be smaller .
*/
fs_info - > qgroup_rescan_progress . objectid = ( u64 ) - 1 ;
btrfs_release_path ( path ) ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
return ret ;
}
btrfs_item_key_to_cpu ( path - > nodes [ 0 ] , & found ,
btrfs_header_nritems ( path - > nodes [ 0 ] ) - 1 ) ;
fs_info - > qgroup_rescan_progress . objectid = found . objectid + 1 ;
btrfs_get_tree_mod_seq ( fs_info , & tree_mod_seq_elem ) ;
2015-10-26 09:19:43 +08:00
scratch_leaf = btrfs_clone_extent_buffer ( path - > nodes [ 0 ] ) ;
if ( ! scratch_leaf ) {
ret = - ENOMEM ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
goto out ;
}
extent_buffer_get ( scratch_leaf ) ;
btrfs_tree_read_lock ( scratch_leaf ) ;
btrfs_set_lock_blocking_rw ( scratch_leaf , BTRFS_READ_LOCK ) ;
2013-04-25 16:04:51 +00:00
slot = path - > slots [ 0 ] ;
btrfs_release_path ( path ) ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
for ( ; slot < btrfs_header_nritems ( scratch_leaf ) ; + + slot ) {
btrfs_item_key_to_cpu ( scratch_leaf , & found , slot ) ;
2014-01-23 16:45:10 -05:00
if ( found . type ! = BTRFS_EXTENT_ITEM_KEY & &
found . type ! = BTRFS_METADATA_ITEM_KEY )
2013-04-25 16:04:51 +00:00
continue ;
2014-01-23 16:45:10 -05:00
if ( found . type = = BTRFS_METADATA_ITEM_KEY )
2016-06-15 09:22:56 -04:00
num_bytes = fs_info - > nodesize ;
2014-01-23 16:45:10 -05:00
else
num_bytes = found . offset ;
2014-05-13 17:30:47 -07:00
ret = btrfs_find_all_roots ( NULL , fs_info , found . objectid , 0 ,
& roots ) ;
2013-04-25 16:04:51 +00:00
if ( ret < 0 )
goto out ;
2015-04-13 11:02:16 +08:00
/* For rescan, just pass old_roots as NULL */
ret = btrfs_qgroup_account_extent ( trans , fs_info ,
found . objectid , num_bytes , NULL , roots ) ;
if ( ret < 0 )
2014-05-13 17:30:47 -07:00
goto out ;
2013-04-25 16:04:51 +00:00
}
out :
2015-10-26 09:19:43 +08:00
if ( scratch_leaf ) {
btrfs_tree_read_unlock_blocking ( scratch_leaf ) ;
free_extent_buffer ( scratch_leaf ) ;
}
2013-04-25 16:04:51 +00:00
btrfs_put_tree_mod_seq ( fs_info , & tree_mod_seq_elem ) ;
return ret ;
}
2014-02-28 10:46:19 +08:00
static void btrfs_qgroup_rescan_worker ( struct btrfs_work * work )
2013-04-25 16:04:51 +00:00
{
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
struct btrfs_fs_info * fs_info = container_of ( work , struct btrfs_fs_info ,
qgroup_rescan_work ) ;
2013-04-25 16:04:51 +00:00
struct btrfs_path * path ;
struct btrfs_trans_handle * trans = NULL ;
int err = - ENOMEM ;
2015-02-27 16:24:25 +08:00
int ret = 0 ;
2013-04-25 16:04:51 +00:00
path = btrfs_alloc_path ( ) ;
if ( ! path )
goto out ;
err = 0 ;
2015-11-04 15:56:16 -08:00
while ( ! err & & ! btrfs_fs_closing ( fs_info ) ) {
2013-04-25 16:04:51 +00:00
trans = btrfs_start_transaction ( fs_info - > fs_root , 0 ) ;
if ( IS_ERR ( trans ) ) {
err = PTR_ERR ( trans ) ;
break ;
}
2016-09-02 15:40:02 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) ) {
2013-04-25 16:04:51 +00:00
err = - EINTR ;
} else {
2015-10-26 09:19:43 +08:00
err = qgroup_rescan_leaf ( fs_info , path , trans ) ;
2013-04-25 16:04:51 +00:00
}
if ( err > 0 )
2016-09-09 21:39:03 -04:00
btrfs_commit_transaction ( trans ) ;
2013-04-25 16:04:51 +00:00
else
2016-09-09 21:39:03 -04:00
btrfs_end_transaction ( trans ) ;
2013-04-25 16:04:51 +00:00
}
out :
btrfs_free_path ( path ) ;
mutex_lock ( & fs_info - > qgroup_rescan_lock ) ;
2015-11-04 15:56:16 -08:00
if ( ! btrfs_fs_closing ( fs_info ) )
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_RESCAN ;
2013-04-25 16:04:51 +00:00
2015-02-27 16:24:24 +08:00
if ( err > 0 & &
2013-04-25 16:04:51 +00:00
fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ) {
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
} else if ( err < 0 ) {
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT ;
}
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
2015-02-27 16:24:25 +08:00
/*
2016-05-19 21:18:45 -04:00
* only update status , since the previous part has already updated the
2015-02-27 16:24:25 +08:00
* qgroup info .
*/
trans = btrfs_start_transaction ( fs_info - > quota_root , 1 ) ;
if ( IS_ERR ( trans ) ) {
err = PTR_ERR ( trans ) ;
btrfs_err ( fs_info ,
2017-07-13 15:32:18 +02:00
" fail to start transaction for status update: %d " ,
2015-02-27 16:24:25 +08:00
err ) ;
goto done ;
}
ret = update_qgroup_status_item ( trans , fs_info , fs_info - > quota_root ) ;
if ( ret < 0 ) {
err = ret ;
2016-09-20 10:05:02 -04:00
btrfs_err ( fs_info , " fail to update qgroup status: %d " , err ) ;
2015-02-27 16:24:25 +08:00
}
2016-09-09 21:39:03 -04:00
btrfs_end_transaction ( trans ) ;
2015-02-27 16:24:25 +08:00
2015-11-04 15:56:16 -08:00
if ( btrfs_fs_closing ( fs_info ) ) {
btrfs_info ( fs_info , " qgroup scan paused " ) ;
} else if ( err > = 0 ) {
2013-12-20 11:37:06 -05:00
btrfs_info ( fs_info , " qgroup scan completed%s " ,
2015-02-27 16:24:24 +08:00
err > 0 ? " (inconsistency flag cleared) " : " " ) ;
2013-04-25 16:04:51 +00:00
} else {
2013-12-20 11:37:06 -05:00
btrfs_err ( fs_info , " qgroup scan failed with %d " , err ) ;
2013-04-25 16:04:51 +00:00
}
2013-05-06 19:14:17 +00:00
2015-02-27 16:24:25 +08:00
done :
2016-08-15 12:10:33 -04:00
mutex_lock ( & fs_info - > qgroup_rescan_lock ) ;
fs_info - > qgroup_rescan_running = false ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
2013-05-06 19:14:17 +00:00
complete_all ( & fs_info - > qgroup_rescan_completion ) ;
2013-04-25 16:04:51 +00:00
}
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
/*
* Checks that ( a ) no rescan is running and ( b ) quota is enabled . Allocates all
* memory required for the rescan context .
*/
static int
qgroup_rescan_init ( struct btrfs_fs_info * fs_info , u64 progress_objectid ,
int init_flags )
2013-04-25 16:04:51 +00:00
{
int ret = 0 ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
if ( ! init_flags & &
( ! ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN ) | |
! ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON ) ) ) {
ret = - EINVAL ;
goto err ;
}
2013-04-25 16:04:51 +00:00
mutex_lock ( & fs_info - > qgroup_rescan_lock ) ;
spin_lock ( & fs_info - > qgroup_lock ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
if ( init_flags ) {
if ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN )
ret = - EINPROGRESS ;
else if ( ! ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON ) )
ret = - EINVAL ;
if ( ret ) {
spin_unlock ( & fs_info - > qgroup_lock ) ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
goto err ;
}
fs_info - > qgroup_flags | = BTRFS_QGROUP_STATUS_FLAG_RESCAN ;
2013-04-25 16:04:51 +00:00
}
memset ( & fs_info - > qgroup_rescan_progress , 0 ,
sizeof ( fs_info - > qgroup_rescan_progress ) ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
fs_info - > qgroup_rescan_progress . objectid = progress_objectid ;
2015-11-05 10:06:23 +00:00
init_completion ( & fs_info - > qgroup_rescan_completion ) ;
2016-11-24 02:09:04 +00:00
fs_info - > qgroup_rescan_running = true ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
memset ( & fs_info - > qgroup_rescan_work , 0 ,
sizeof ( fs_info - > qgroup_rescan_work ) ) ;
2014-02-28 10:46:16 +08:00
btrfs_init_work ( & fs_info - > qgroup_rescan_work ,
Btrfs: fix task hang under heavy compressed write
This has been reported and discussed for a long time, and this hang occurs in
both 3.15 and 3.16.
Btrfs now migrates to use kernel workqueue, but it introduces this hang problem.
Btrfs has a kind of work queued as an ordered way, which means that its
ordered_func() must be processed in the way of FIFO, so it usually looks like --
normal_work_helper(arg)
work = container_of(arg, struct btrfs_work, normal_work);
work->func() <---- (we name it work X)
for ordered_work in wq->ordered_list
ordered_work->ordered_func()
ordered_work->ordered_free()
The hang is a rare case, first when we find free space, we get an uncached block
group, then we go to read its free space cache inode for free space information,
so it will
file a readahead request
btrfs_readpages()
for page that is not in page cache
__do_readpage()
submit_extent_page()
btrfs_submit_bio_hook()
btrfs_bio_wq_end_io()
submit_bio()
end_workqueue_bio() <--(ret by the 1st endio)
queue a work(named work Y) for the 2nd
also the real endio()
So the hang occurs when work Y's work_struct and work X's work_struct happens
to share the same address.
A bit more explanation,
A,B,C -- struct btrfs_work
arg -- struct work_struct
kthread:
worker_thread()
pick up a work_struct from @worklist
process_one_work(arg)
worker->current_work = arg; <-- arg is A->normal_work
worker->current_func(arg)
normal_work_helper(arg)
A = container_of(arg, struct btrfs_work, normal_work);
A->func()
A->ordered_func()
A->ordered_free() <-- A gets freed
B->ordered_func()
submit_compressed_extents()
find_free_extent()
load_free_space_inode()
... <-- (the above readhead stack)
end_workqueue_bio()
btrfs_queue_work(work C)
B->ordered_free()
As if work A has a high priority in wq->ordered_list and there are more ordered
works queued after it, such as B->ordered_func(), its memory could have been
freed before normal_work_helper() returns, which means that kernel workqueue
code worker_thread() still has worker->current_work pointer to be work
A->normal_work's, ie. arg's address.
Meanwhile, work C is allocated after work A is freed, work C->normal_work
and work A->normal_work are likely to share the same address(I confirmed this
with ftrace output, so I'm not just guessing, it's rare though).
When another kthread picks up work C->normal_work to process, and finds our
kthread is processing it(see find_worker_executing_work()), it'll think
work C as a collision and skip then, which ends up nobody processing work C.
So the situation is that our kthread is waiting forever on work C.
Besides, there're other cases that can lead to deadlock, but the real problem
is that all btrfs workqueue shares one work->func, -- normal_work_helper,
so this makes each workqueue to have its own helper function, but only a
wraper pf normal_work_helper.
With this patch, I no long hit the above hang.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <clm@fb.com>
2014-08-15 23:36:53 +08:00
btrfs_qgroup_rescan_helper ,
2014-02-28 10:46:16 +08:00
btrfs_qgroup_rescan_worker , NULL , NULL ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
if ( ret ) {
err :
2013-12-20 11:37:06 -05:00
btrfs_info ( fs_info , " qgroup_rescan_init failed with %d " , ret ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
return ret ;
}
return 0 ;
}
static void
qgroup_rescan_zero_tracking ( struct btrfs_fs_info * fs_info )
{
struct rb_node * n ;
struct btrfs_qgroup * qgroup ;
spin_lock ( & fs_info - > qgroup_lock ) ;
2013-04-25 16:04:51 +00:00
/* clear all current qgroup tracking information */
for ( n = rb_first ( & fs_info - > qgroup_tree ) ; n ; n = rb_next ( n ) ) {
qgroup = rb_entry ( n , struct btrfs_qgroup , node ) ;
qgroup - > rfer = 0 ;
qgroup - > rfer_cmpr = 0 ;
qgroup - > excl = 0 ;
qgroup - > excl_cmpr = 0 ;
}
spin_unlock ( & fs_info - > qgroup_lock ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
}
2013-04-25 16:04:51 +00:00
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
int
btrfs_qgroup_rescan ( struct btrfs_fs_info * fs_info )
{
int ret = 0 ;
struct btrfs_trans_handle * trans ;
ret = qgroup_rescan_init ( fs_info , 0 , 1 ) ;
if ( ret )
return ret ;
/*
* We have set the rescan_progress to 0 , which means no more
* delayed refs will be accounted by btrfs_qgroup_account_ref .
* However , btrfs_qgroup_account_ref may be right after its call
* to btrfs_find_all_roots , in which case it would still do the
* accounting .
* To solve this , we ' re committing the transaction , which will
* ensure we run all delayed refs and only after that , we are
* going to clear all tracking information for a clean start .
*/
trans = btrfs_join_transaction ( fs_info - > fs_root ) ;
if ( IS_ERR ( trans ) ) {
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_RESCAN ;
return PTR_ERR ( trans ) ;
}
2016-09-09 21:39:03 -04:00
ret = btrfs_commit_transaction ( trans ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
if ( ret ) {
fs_info - > qgroup_flags & = ~ BTRFS_QGROUP_STATUS_FLAG_RESCAN ;
return ret ;
}
qgroup_rescan_zero_tracking ( fs_info ) ;
2014-02-28 10:46:16 +08:00
btrfs_queue_work ( fs_info - > qgroup_rescan_workers ,
& fs_info - > qgroup_rescan_work ) ;
2013-04-25 16:04:51 +00:00
return 0 ;
}
2013-05-06 19:14:17 +00:00
2016-08-08 22:08:06 -04:00
int btrfs_qgroup_wait_for_completion ( struct btrfs_fs_info * fs_info ,
bool interruptible )
2013-05-06 19:14:17 +00:00
{
int running ;
int ret = 0 ;
mutex_lock ( & fs_info - > qgroup_rescan_lock ) ;
spin_lock ( & fs_info - > qgroup_lock ) ;
2016-08-15 12:10:33 -04:00
running = fs_info - > qgroup_rescan_running ;
2013-05-06 19:14:17 +00:00
spin_unlock ( & fs_info - > qgroup_lock ) ;
mutex_unlock ( & fs_info - > qgroup_rescan_lock ) ;
2016-08-08 22:08:06 -04:00
if ( ! running )
return 0 ;
if ( interruptible )
2013-05-06 19:14:17 +00:00
ret = wait_for_completion_interruptible (
& fs_info - > qgroup_rescan_completion ) ;
2016-08-08 22:08:06 -04:00
else
wait_for_completion ( & fs_info - > qgroup_rescan_completion ) ;
2013-05-06 19:14:17 +00:00
return ret ;
}
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
/*
* this is only called from open_ctree where we ' re still single threaded , thus
* locking is omitted here .
*/
void
btrfs_qgroup_rescan_resume ( struct btrfs_fs_info * fs_info )
{
if ( fs_info - > qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN )
2014-02-28 10:46:16 +08:00
btrfs_queue_work ( fs_info - > qgroup_rescan_workers ,
& fs_info - > qgroup_rescan_work ) ;
Btrfs: fix qgroup rescan resume on mount
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.
First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.
Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable
Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]
qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.
We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.
As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-05-28 15:47:24 +00:00
}
2015-10-12 16:05:40 +08:00
/*
* Reserve qgroup space for range [ start , start + len ) .
*
* This function will either reserve space from related qgroups or doing
* nothing if the range is already reserved .
*
* Return 0 for successful reserve
* Return < 0 for error ( including - EQUOT )
*
* NOTE : this function may sleep for memory allocation .
2017-02-27 15:10:38 +08:00
* if btrfs_qgroup_reserve_data ( ) is called multiple times with
* same @ reserved , caller must ensure when error happens it ' s OK
* to free * ALL * reserved space .
2015-10-12 16:05:40 +08:00
*/
2017-02-27 15:10:38 +08:00
int btrfs_qgroup_reserve_data ( struct inode * inode ,
struct extent_changeset * * reserved_ret , u64 start ,
u64 len )
2015-10-12 16:05:40 +08:00
{
struct btrfs_root * root = BTRFS_I ( inode ) - > root ;
struct ulist_node * unode ;
struct ulist_iterator uiter ;
2017-02-27 15:10:38 +08:00
struct extent_changeset * reserved ;
u64 orig_reserved ;
u64 to_reserve ;
2015-10-12 16:05:40 +08:00
int ret ;
2016-09-02 15:40:02 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & root - > fs_info - > flags ) | |
! is_fstree ( root - > objectid ) | | len = = 0 )
2015-10-12 16:05:40 +08:00
return 0 ;
2017-02-27 15:10:38 +08:00
/* @reserved parameter is mandatory for qgroup */
if ( WARN_ON ( ! reserved_ret ) )
return - EINVAL ;
if ( ! * reserved_ret ) {
* reserved_ret = extent_changeset_alloc ( ) ;
if ( ! * reserved_ret )
return - ENOMEM ;
}
reserved = * reserved_ret ;
/* Record already reserved space */
orig_reserved = reserved - > bytes_changed ;
2015-10-12 16:05:40 +08:00
ret = set_record_extent_bits ( & BTRFS_I ( inode ) - > io_tree , start ,
2017-02-27 15:10:38 +08:00
start + len - 1 , EXTENT_QGROUP_RESERVED , reserved ) ;
/* Newly reserved space */
to_reserve = reserved - > bytes_changed - orig_reserved ;
2015-09-28 16:57:53 +08:00
trace_btrfs_qgroup_reserve_data ( inode , start , len ,
2017-02-27 15:10:38 +08:00
to_reserve , QGROUP_RESERVE ) ;
2015-10-12 16:05:40 +08:00
if ( ret < 0 )
goto cleanup ;
2017-02-27 15:10:38 +08:00
ret = qgroup_reserve ( root , to_reserve , true ) ;
2015-10-12 16:05:40 +08:00
if ( ret < 0 )
goto cleanup ;
return ret ;
cleanup :
2017-02-27 15:10:38 +08:00
/* cleanup *ALL* already reserved ranges */
2015-10-12 16:05:40 +08:00
ULIST_ITER_INIT ( & uiter ) ;
2017-02-27 15:10:38 +08:00
while ( ( unode = ulist_next ( & reserved - > range_changed , & uiter ) ) )
2015-10-12 16:05:40 +08:00
clear_extent_bit ( & BTRFS_I ( inode ) - > io_tree , unode - > val ,
unode - > aux , EXTENT_QGROUP_RESERVED , 0 , 0 , NULL ,
GFP_NOFS ) ;
2017-02-27 15:10:38 +08:00
extent_changeset_release ( reserved ) ;
2015-10-12 16:05:40 +08:00
return ret ;
}
2015-10-12 16:28:06 +08:00
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-27 15:10:39 +08:00
/* Free ranges specified by @reserved, normally in error path */
static int qgroup_free_reserved_data ( struct inode * inode ,
struct extent_changeset * reserved , u64 start , u64 len )
{
struct btrfs_root * root = BTRFS_I ( inode ) - > root ;
struct ulist_node * unode ;
struct ulist_iterator uiter ;
struct extent_changeset changeset ;
int freed = 0 ;
int ret ;
extent_changeset_init ( & changeset ) ;
len = round_up ( start + len , root - > fs_info - > sectorsize ) ;
start = round_down ( start , root - > fs_info - > sectorsize ) ;
ULIST_ITER_INIT ( & uiter ) ;
while ( ( unode = ulist_next ( & reserved - > range_changed , & uiter ) ) ) {
u64 range_start = unode - > val ;
/* unode->aux is the inclusive end */
u64 range_len = unode - > aux - range_start + 1 ;
u64 free_start ;
u64 free_len ;
extent_changeset_release ( & changeset ) ;
/* Only free range in range [start, start + len) */
if ( range_start > = start + len | |
range_start + range_len < = start )
continue ;
free_start = max ( range_start , start ) ;
free_len = min ( start + len , range_start + range_len ) -
free_start ;
/*
* TODO : To also modify reserved - > ranges_reserved to reflect
* the modification .
*
* However as long as we free qgroup reserved according to
* EXTENT_QGROUP_RESERVED , we won ' t double free .
* So not need to rush .
*/
ret = clear_record_extent_bits ( & BTRFS_I ( inode ) - > io_failure_tree ,
free_start , free_start + free_len - 1 ,
EXTENT_QGROUP_RESERVED , & changeset ) ;
if ( ret < 0 )
goto out ;
freed + = changeset . bytes_changed ;
}
btrfs_qgroup_free_refroot ( root - > fs_info , root - > objectid , freed ) ;
ret = freed ;
out :
extent_changeset_release ( & changeset ) ;
return ret ;
}
static int __btrfs_qgroup_release_data ( struct inode * inode ,
struct extent_changeset * reserved , u64 start , u64 len ,
int free )
2015-10-12 16:28:06 +08:00
{
struct extent_changeset changeset ;
2015-09-28 16:57:53 +08:00
int trace_op = QGROUP_RELEASE ;
2015-10-12 16:28:06 +08:00
int ret ;
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-27 15:10:39 +08:00
/* In release case, we shouldn't have @reserved */
WARN_ON ( ! free & & reserved ) ;
if ( free & & reserved )
return qgroup_free_reserved_data ( inode , reserved , start , len ) ;
2017-02-27 15:10:38 +08:00
extent_changeset_init ( & changeset ) ;
2015-10-12 16:28:06 +08:00
ret = clear_record_extent_bits ( & BTRFS_I ( inode ) - > io_tree , start ,
2016-04-26 23:54:39 +02:00
start + len - 1 , EXTENT_QGROUP_RESERVED , & changeset ) ;
2015-10-12 16:28:06 +08:00
if ( ret < 0 )
goto out ;
2017-03-13 15:52:09 +08:00
if ( free )
2015-09-28 16:57:53 +08:00
trace_op = QGROUP_FREE ;
trace_btrfs_qgroup_release_data ( inode , start , len ,
changeset . bytes_changed , trace_op ) ;
2017-03-13 15:52:09 +08:00
if ( free )
btrfs_qgroup_free_refroot ( BTRFS_I ( inode ) - > root - > fs_info ,
BTRFS_I ( inode ) - > root - > objectid ,
changeset . bytes_changed ) ;
2017-02-27 15:10:36 +08:00
ret = changeset . bytes_changed ;
2015-10-12 16:28:06 +08:00
out :
2017-02-27 15:10:38 +08:00
extent_changeset_release ( & changeset ) ;
2015-10-12 16:28:06 +08:00
return ret ;
}
/*
* Free a reserved space range from io_tree and related qgroups
*
* Should be called when a range of pages get invalidated before reaching disk .
* Or for error cleanup case .
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-27 15:10:39 +08:00
* if @ reserved is given , only reserved range in [ @ start , @ start + @ len ) will
* be freed .
2015-10-12 16:28:06 +08:00
*
* For data written to disk , use btrfs_qgroup_release_data ( ) .
*
* NOTE : This function may sleep for memory allocation .
*/
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-27 15:10:39 +08:00
int btrfs_qgroup_free_data ( struct inode * inode ,
struct extent_changeset * reserved , u64 start , u64 len )
2015-10-12 16:28:06 +08:00
{
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-27 15:10:39 +08:00
return __btrfs_qgroup_release_data ( inode , reserved , start , len , 1 ) ;
2015-10-12 16:28:06 +08:00
}
/*
* Release a reserved space range from io_tree only .
*
* Should be called when a range of pages get written to disk and corresponding
* FILE_EXTENT is inserted into corresponding root .
*
* Since new qgroup accounting framework will only update qgroup numbers at
* commit_transaction ( ) time , its reserved space shouldn ' t be freed from
* related qgroups .
*
* But we should release the range from io_tree , to allow further write to be
* COWed .
*
* NOTE : This function may sleep for memory allocation .
*/
int btrfs_qgroup_release_data ( struct inode * inode , u64 start , u64 len )
{
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-27 15:10:39 +08:00
return __btrfs_qgroup_release_data ( inode , NULL , start , len , 0 ) ;
2015-10-12 16:28:06 +08:00
}
2015-09-08 17:08:38 +08:00
2017-01-25 09:50:33 -05:00
int btrfs_qgroup_reserve_meta ( struct btrfs_root * root , int num_bytes ,
bool enforce )
2015-09-08 17:08:38 +08:00
{
2016-06-22 18:54:23 -04:00
struct btrfs_fs_info * fs_info = root - > fs_info ;
2015-09-08 17:08:38 +08:00
int ret ;
2016-06-22 18:54:23 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) | |
2016-09-02 15:40:02 -04:00
! is_fstree ( root - > objectid ) | | num_bytes = = 0 )
2015-09-08 17:08:38 +08:00
return 0 ;
2016-06-22 18:54:23 -04:00
BUG_ON ( num_bytes ! = round_down ( num_bytes , fs_info - > nodesize ) ) ;
2017-03-13 15:52:08 +08:00
trace_qgroup_meta_reserve ( root , ( s64 ) num_bytes ) ;
2017-01-25 09:50:33 -05:00
ret = qgroup_reserve ( root , num_bytes , enforce ) ;
2015-09-08 17:08:38 +08:00
if ( ret < 0 )
return ret ;
2017-03-14 05:25:09 -05:00
atomic64_add ( num_bytes , & root - > qgroup_meta_rsv ) ;
2015-09-08 17:08:38 +08:00
return ret ;
}
void btrfs_qgroup_free_meta_all ( struct btrfs_root * root )
{
2016-06-22 18:54:23 -04:00
struct btrfs_fs_info * fs_info = root - > fs_info ;
2017-03-14 05:25:09 -05:00
u64 reserved ;
2015-09-08 17:08:38 +08:00
2016-06-22 18:54:23 -04:00
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) | |
2016-09-02 15:40:02 -04:00
! is_fstree ( root - > objectid ) )
2015-09-08 17:08:38 +08:00
return ;
2017-03-14 05:25:09 -05:00
reserved = atomic64_xchg ( & root - > qgroup_meta_rsv , 0 ) ;
2015-09-08 17:08:38 +08:00
if ( reserved = = 0 )
return ;
2017-03-13 15:52:08 +08:00
trace_qgroup_meta_reserve ( root , - ( s64 ) reserved ) ;
2017-02-13 14:24:35 +01:00
btrfs_qgroup_free_refroot ( fs_info , root - > objectid , reserved ) ;
2015-09-08 17:08:38 +08:00
}
void btrfs_qgroup_free_meta ( struct btrfs_root * root , int num_bytes )
{
2016-06-22 18:54:23 -04:00
struct btrfs_fs_info * fs_info = root - > fs_info ;
if ( ! test_bit ( BTRFS_FS_QUOTA_ENABLED , & fs_info - > flags ) | |
2016-09-02 15:40:02 -04:00
! is_fstree ( root - > objectid ) )
2015-09-08 17:08:38 +08:00
return ;
2016-06-22 18:54:23 -04:00
BUG_ON ( num_bytes ! = round_down ( num_bytes , fs_info - > nodesize ) ) ;
2017-03-14 05:25:09 -05:00
WARN_ON ( atomic64_read ( & root - > qgroup_meta_rsv ) < num_bytes ) ;
atomic64_sub ( num_bytes , & root - > qgroup_meta_rsv ) ;
2017-03-13 15:52:08 +08:00
trace_qgroup_meta_reserve ( root , - ( s64 ) num_bytes ) ;
2017-02-13 14:24:35 +01:00
btrfs_qgroup_free_refroot ( fs_info , root - > objectid , num_bytes ) ;
2015-09-08 17:08:38 +08:00
}
2015-10-13 09:53:10 +08:00
/*
2016-05-19 21:18:45 -04:00
* Check qgroup reserved space leaking , normally at destroy inode
2015-10-13 09:53:10 +08:00
* time
*/
void btrfs_qgroup_check_reserved_leak ( struct inode * inode )
{
struct extent_changeset changeset ;
struct ulist_node * unode ;
struct ulist_iterator iter ;
int ret ;
2017-02-27 15:10:38 +08:00
extent_changeset_init ( & changeset ) ;
2015-10-13 09:53:10 +08:00
ret = clear_record_extent_bits ( & BTRFS_I ( inode ) - > io_tree , 0 , ( u64 ) - 1 ,
2016-04-26 23:54:39 +02:00
EXTENT_QGROUP_RESERVED , & changeset ) ;
2015-10-13 09:53:10 +08:00
WARN_ON ( ret < 0 ) ;
if ( WARN_ON ( changeset . bytes_changed ) ) {
ULIST_ITER_INIT ( & iter ) ;
2017-02-13 13:42:29 +01:00
while ( ( unode = ulist_next ( & changeset . range_changed , & iter ) ) ) {
2015-10-13 09:53:10 +08:00
btrfs_warn ( BTRFS_I ( inode ) - > root - > fs_info ,
" leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu " ,
inode - > i_ino , unode - > val , unode - > aux ) ;
}
2017-02-13 14:24:35 +01:00
btrfs_qgroup_free_refroot ( BTRFS_I ( inode ) - > root - > fs_info ,
BTRFS_I ( inode ) - > root - > objectid ,
changeset . bytes_changed ) ;
2015-10-13 09:53:10 +08:00
}
2017-02-27 15:10:38 +08:00
extent_changeset_release ( & changeset ) ;
2015-10-13 09:53:10 +08:00
}