2011-11-01 00:19:11 +04:00
/*
* Copyright ( C ) 2011 Red Hat , Inc .
*
* This file is released under the GPL .
*/
# include "dm-btree.h"
# include "dm-btree-internal.h"
# include "dm-transaction-manager.h"
2011-09-29 02:29:32 +04:00
# include <linux/export.h>
2021-12-10 16:36:06 +03:00
# include <linux/device-mapper.h>
# define DM_MSG_PREFIX "btree"
2011-11-01 00:19:11 +04:00
/*
* Removing an entry from a btree
* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
*
* A very important constraint for our btree is that no node , except the
* root , may have fewer than a certain number of entries .
* ( MIN_ENTRIES < = nr_entries < = MAX_ENTRIES ) .
*
* Ensuring this is complicated by the way we want to only ever hold the
* locks on 2 nodes concurrently , and only change nodes in a top to bottom
* fashion .
*
* Each node may have a left or right sibling . When decending the spine ,
* if a node contains only MIN_ENTRIES then we try and increase this to at
* least MIN_ENTRIES + 1. We do this in the following ways :
*
* [ A ] No siblings = > this can only happen if the node is the root , in which
* case we copy the childs contents over the root .
*
* [ B ] No left sibling
* = = > rebalance ( node , right sibling )
*
* [ C ] No right sibling
* = = > rebalance ( left sibling , node )
*
* [ D ] Both siblings , total_entries ( left , node , right ) < = DEL_THRESHOLD
* = = > delete node adding it ' s contents to left and right
*
* [ E ] Both siblings , total_entries ( left , node , right ) > DEL_THRESHOLD
* = = > rebalance ( left , node , right )
*
* After these operations it ' s possible that the our original node no
* longer contains the desired sub tree . For this reason this rebalancing
* is performed on the children of the current node . This also avoids
* having a special case for the root .
*
* Once this rebalancing has occurred we can then step into the child node
* for internal nodes . Or delete the entry for leaf nodes .
*/
/*
* Some little utilities for moving node data around .
*/
2012-12-22 00:23:30 +04:00
static void node_shift ( struct btree_node * n , int shift )
2011-11-01 00:19:11 +04:00
{
uint32_t nr_entries = le32_to_cpu ( n - > header . nr_entries ) ;
uint32_t value_size = le32_to_cpu ( n - > header . value_size ) ;
if ( shift < 0 ) {
shift = - shift ;
BUG_ON ( shift > nr_entries ) ;
2012-03-28 21:41:25 +04:00
BUG_ON ( ( void * ) key_ptr ( n , shift ) > = value_ptr ( n , shift ) ) ;
2011-11-01 00:19:11 +04:00
memmove ( key_ptr ( n , 0 ) ,
key_ptr ( n , shift ) ,
( nr_entries - shift ) * sizeof ( __le64 ) ) ;
2012-03-28 21:41:25 +04:00
memmove ( value_ptr ( n , 0 ) ,
value_ptr ( n , shift ) ,
2011-11-01 00:19:11 +04:00
( nr_entries - shift ) * value_size ) ;
} else {
BUG_ON ( nr_entries + shift > le32_to_cpu ( n - > header . max_entries ) ) ;
memmove ( key_ptr ( n , shift ) ,
key_ptr ( n , 0 ) ,
nr_entries * sizeof ( __le64 ) ) ;
2012-03-28 21:41:25 +04:00
memmove ( value_ptr ( n , shift ) ,
value_ptr ( n , 0 ) ,
2011-11-01 00:19:11 +04:00
nr_entries * value_size ) ;
}
}
2021-12-10 16:36:06 +03:00
static int node_copy ( struct btree_node * left , struct btree_node * right , int shift )
2011-11-01 00:19:11 +04:00
{
uint32_t nr_left = le32_to_cpu ( left - > header . nr_entries ) ;
uint32_t value_size = le32_to_cpu ( left - > header . value_size ) ;
2021-12-10 16:36:06 +03:00
if ( value_size ! = le32_to_cpu ( right - > header . value_size ) ) {
DMERR ( " mismatched value size " ) ;
return - EILSEQ ;
}
2011-11-01 00:19:11 +04:00
if ( shift < 0 ) {
shift = - shift ;
2021-12-10 16:36:06 +03:00
if ( nr_left + shift > le32_to_cpu ( left - > header . max_entries ) ) {
DMERR ( " bad shift " ) ;
return - EINVAL ;
}
2011-11-01 00:19:11 +04:00
memcpy ( key_ptr ( left , nr_left ) ,
key_ptr ( right , 0 ) ,
shift * sizeof ( __le64 ) ) ;
2012-03-28 21:41:25 +04:00
memcpy ( value_ptr ( left , nr_left ) ,
value_ptr ( right , 0 ) ,
2011-11-01 00:19:11 +04:00
shift * value_size ) ;
} else {
2021-12-10 16:36:06 +03:00
if ( shift > le32_to_cpu ( right - > header . max_entries ) ) {
DMERR ( " bad shift " ) ;
return - EINVAL ;
}
2011-11-01 00:19:11 +04:00
memcpy ( key_ptr ( right , 0 ) ,
key_ptr ( left , nr_left - shift ) ,
shift * sizeof ( __le64 ) ) ;
2012-03-28 21:41:25 +04:00
memcpy ( value_ptr ( right , 0 ) ,
value_ptr ( left , nr_left - shift ) ,
2011-11-01 00:19:11 +04:00
shift * value_size ) ;
}
2021-12-10 16:36:06 +03:00
return 0 ;
2011-11-01 00:19:11 +04:00
}
/*
* Delete a specific entry from a leaf node .
*/
2012-12-22 00:23:30 +04:00
static void delete_at ( struct btree_node * n , unsigned index )
2011-11-01 00:19:11 +04:00
{
unsigned nr_entries = le32_to_cpu ( n - > header . nr_entries ) ;
unsigned nr_to_copy = nr_entries - ( index + 1 ) ;
uint32_t value_size = le32_to_cpu ( n - > header . value_size ) ;
BUG_ON ( index > = nr_entries ) ;
if ( nr_to_copy ) {
memmove ( key_ptr ( n , index ) ,
key_ptr ( n , index + 1 ) ,
nr_to_copy * sizeof ( __le64 ) ) ;
2012-03-28 21:41:25 +04:00
memmove ( value_ptr ( n , index ) ,
value_ptr ( n , index + 1 ) ,
2011-11-01 00:19:11 +04:00
nr_to_copy * value_size ) ;
}
n - > header . nr_entries = cpu_to_le32 ( nr_entries - 1 ) ;
}
2012-12-22 00:23:30 +04:00
static unsigned merge_threshold ( struct btree_node * n )
2011-11-01 00:19:11 +04:00
{
2012-03-28 21:41:23 +04:00
return le32_to_cpu ( n - > header . max_entries ) / 3 ;
2011-11-01 00:19:11 +04:00
}
struct child {
unsigned index ;
struct dm_block * block ;
2012-12-22 00:23:30 +04:00
struct btree_node * n ;
2011-11-01 00:19:11 +04:00
} ;
2013-03-20 21:21:24 +04:00
static int init_child ( struct dm_btree_info * info , struct dm_btree_value_type * vt ,
struct btree_node * parent ,
2011-11-01 00:19:11 +04:00
unsigned index , struct child * result )
{
int r , inc ;
dm_block_t root ;
result - > index = index ;
root = value64 ( parent , index ) ;
r = dm_tm_shadow_block ( info - > tm , root , & btree_node_validator ,
& result - > block , & inc ) ;
if ( r )
return r ;
result - > n = dm_block_data ( result - > block ) ;
if ( inc )
2013-03-20 21:21:24 +04:00
inc_children ( info - > tm , result - > n , vt ) ;
2011-11-01 00:19:11 +04:00
2012-03-28 21:41:25 +04:00
* ( ( __le64 * ) value_ptr ( parent , index ) ) =
2011-11-01 00:19:11 +04:00
cpu_to_le64 ( dm_block_location ( result - > block ) ) ;
return 0 ;
}
2015-10-22 23:46:59 +03:00
static void exit_child ( struct dm_btree_info * info , struct child * c )
2011-11-01 00:19:11 +04:00
{
2015-10-22 23:46:59 +03:00
dm_tm_unlock ( info - > tm , c - > block ) ;
2011-11-01 00:19:11 +04:00
}
2021-12-10 16:36:06 +03:00
static int shift ( struct btree_node * left , struct btree_node * right , int count )
2011-11-01 00:19:11 +04:00
{
2021-12-10 16:36:06 +03:00
int r ;
2012-03-28 21:41:23 +04:00
uint32_t nr_left = le32_to_cpu ( left - > header . nr_entries ) ;
uint32_t nr_right = le32_to_cpu ( right - > header . nr_entries ) ;
uint32_t max_entries = le32_to_cpu ( left - > header . max_entries ) ;
uint32_t r_max_entries = le32_to_cpu ( right - > header . max_entries ) ;
2021-12-10 16:36:06 +03:00
if ( max_entries ! = r_max_entries ) {
DMERR ( " node max_entries mismatch " ) ;
return - EILSEQ ;
}
if ( nr_left - count > max_entries ) {
DMERR ( " node shift out of bounds " ) ;
return - EINVAL ;
}
if ( nr_right + count > max_entries ) {
DMERR ( " node shift out of bounds " ) ;
return - EINVAL ;
}
2012-03-28 21:41:23 +04:00
2011-11-01 00:19:11 +04:00
if ( ! count )
2021-12-10 16:36:06 +03:00
return 0 ;
2011-11-01 00:19:11 +04:00
if ( count > 0 ) {
node_shift ( right , count ) ;
2021-12-10 16:36:06 +03:00
r = node_copy ( left , right , count ) ;
if ( r )
return r ;
2011-11-01 00:19:11 +04:00
} else {
2021-12-10 16:36:06 +03:00
r = node_copy ( left , right , count ) ;
if ( r )
return r ;
2011-11-01 00:19:11 +04:00
node_shift ( right , count ) ;
}
2012-03-28 21:41:23 +04:00
left - > header . nr_entries = cpu_to_le32 ( nr_left - count ) ;
right - > header . nr_entries = cpu_to_le32 ( nr_right + count ) ;
2021-12-10 16:36:06 +03:00
return 0 ;
2011-11-01 00:19:11 +04:00
}
2021-12-10 16:36:06 +03:00
static int __rebalance2 ( struct dm_btree_info * info , struct btree_node * parent ,
struct child * l , struct child * r )
2011-11-01 00:19:11 +04:00
{
2021-12-10 16:36:06 +03:00
int ret ;
2012-12-22 00:23:30 +04:00
struct btree_node * left = l - > n ;
struct btree_node * right = r - > n ;
2011-11-01 00:19:11 +04:00
uint32_t nr_left = le32_to_cpu ( left - > header . nr_entries ) ;
uint32_t nr_right = le32_to_cpu ( right - > header . nr_entries ) ;
2019-12-03 14:42:58 +03:00
/*
* Ensure the number of entries in each child will be greater
* than or equal to ( max_entries / 3 + 1 ) , so no matter which
* child is used for removal , the number will still be not
* less than ( max_entries / 3 ) .
*/
unsigned int threshold = 2 * ( merge_threshold ( left ) + 1 ) ;
2011-11-01 00:19:11 +04:00
2012-03-28 21:41:23 +04:00
if ( nr_left + nr_right < threshold ) {
2011-11-01 00:19:11 +04:00
/*
* Merge
*/
node_copy ( left , right , - nr_right ) ;
left - > header . nr_entries = cpu_to_le32 ( nr_left + nr_right ) ;
delete_at ( parent , r - > index ) ;
/*
* We need to decrement the right block , but not it ' s
* children , since they ' re still referenced by left .
*/
dm_tm_dec ( info - > tm , dm_block_location ( r - > block ) ) ;
} else {
/*
* Rebalance .
*/
unsigned target_left = ( nr_left + nr_right ) / 2 ;
2021-12-10 16:36:06 +03:00
ret = shift ( left , right , nr_left - target_left ) ;
if ( ret )
return ret ;
2011-11-01 00:19:11 +04:00
* key_ptr ( parent , r - > index ) = right - > keys [ 0 ] ;
}
2021-12-10 16:36:06 +03:00
return 0 ;
2011-11-01 00:19:11 +04:00
}
static int rebalance2 ( struct shadow_spine * s , struct dm_btree_info * info ,
2013-03-20 21:21:24 +04:00
struct dm_btree_value_type * vt , unsigned left_index )
2011-11-01 00:19:11 +04:00
{
int r ;
2012-12-22 00:23:30 +04:00
struct btree_node * parent ;
2011-11-01 00:19:11 +04:00
struct child left , right ;
parent = dm_block_data ( shadow_current ( s ) ) ;
2013-03-20 21:21:24 +04:00
r = init_child ( info , vt , parent , left_index , & left ) ;
2011-11-01 00:19:11 +04:00
if ( r )
return r ;
2013-03-20 21:21:24 +04:00
r = init_child ( info , vt , parent , left_index + 1 , & right ) ;
2011-11-01 00:19:11 +04:00
if ( r ) {
exit_child ( info , & left ) ;
return r ;
}
2021-12-10 16:36:06 +03:00
r = __rebalance2 ( info , parent , & left , & right ) ;
2011-11-01 00:19:11 +04:00
2015-10-22 23:46:59 +03:00
exit_child ( info , & left ) ;
exit_child ( info , & right ) ;
2011-11-01 00:19:11 +04:00
2021-12-10 16:36:06 +03:00
return r ;
2011-11-01 00:19:11 +04:00
}
2012-03-28 21:41:23 +04:00
/*
* We dump as many entries from center as possible into left , then the rest
* in right , then rebalance2 . This wastes some cpu , but I want something
* simple atm .
*/
2021-12-10 16:36:06 +03:00
static int delete_center_node ( struct dm_btree_info * info , struct btree_node * parent ,
struct child * l , struct child * c , struct child * r ,
struct btree_node * left , struct btree_node * center , struct btree_node * right ,
uint32_t nr_left , uint32_t nr_center , uint32_t nr_right )
2012-03-28 21:41:23 +04:00
{
uint32_t max_entries = le32_to_cpu ( left - > header . max_entries ) ;
unsigned shift = min ( max_entries - nr_left , nr_center ) ;
2021-12-10 16:36:06 +03:00
if ( nr_left + shift > max_entries ) {
DMERR ( " node shift out of bounds " ) ;
return - EINVAL ;
}
2012-03-28 21:41:23 +04:00
node_copy ( left , center , - shift ) ;
left - > header . nr_entries = cpu_to_le32 ( nr_left + shift ) ;
if ( shift ! = nr_center ) {
shift = nr_center - shift ;
2021-12-10 16:36:06 +03:00
if ( ( nr_right + shift ) > max_entries ) {
DMERR ( " node shift out of bounds " ) ;
return - EINVAL ;
}
2012-03-28 21:41:23 +04:00
node_shift ( right , shift ) ;
node_copy ( center , right , shift ) ;
right - > header . nr_entries = cpu_to_le32 ( nr_right + shift ) ;
}
* key_ptr ( parent , r - > index ) = right - > keys [ 0 ] ;
delete_at ( parent , c - > index ) ;
r - > index - - ;
dm_tm_dec ( info - > tm , dm_block_location ( c - > block ) ) ;
2021-12-10 16:36:06 +03:00
return __rebalance2 ( info , parent , l , r ) ;
2012-03-28 21:41:23 +04:00
}
/*
* Redistributes entries among 3 sibling nodes .
*/
2021-12-10 16:36:06 +03:00
static int redistribute3 ( struct dm_btree_info * info , struct btree_node * parent ,
struct child * l , struct child * c , struct child * r ,
struct btree_node * left , struct btree_node * center , struct btree_node * right ,
uint32_t nr_left , uint32_t nr_center , uint32_t nr_right )
2012-03-28 21:41:23 +04:00
{
2021-12-10 16:36:06 +03:00
int s , ret ;
2012-03-28 21:41:23 +04:00
uint32_t max_entries = le32_to_cpu ( left - > header . max_entries ) ;
2015-10-21 20:36:49 +03:00
unsigned total = nr_left + nr_center + nr_right ;
unsigned target_right = total / 3 ;
unsigned remainder = ( target_right * 3 ) ! = total ;
unsigned target_left = target_right + remainder ;
BUG_ON ( target_left > max_entries ) ;
BUG_ON ( target_right > max_entries ) ;
2012-03-28 21:41:23 +04:00
if ( nr_left < nr_right ) {
2015-10-21 20:36:49 +03:00
s = nr_left - target_left ;
2012-03-28 21:41:23 +04:00
if ( s < 0 & & nr_center < - s ) {
/* not enough in central node */
2021-12-10 16:36:06 +03:00
ret = shift ( left , center , - nr_center ) ;
if ( ret )
return ret ;
2012-03-28 21:41:23 +04:00
2021-12-10 16:36:06 +03:00
s + = nr_center ;
ret = shift ( left , right , s ) ;
if ( ret )
return ret ;
2012-03-28 21:41:23 +04:00
2021-12-10 16:36:06 +03:00
nr_right + = s ;
} else {
ret = shift ( left , center , s ) ;
if ( ret )
return ret ;
}
ret = shift ( center , right , target_right - nr_right ) ;
if ( ret )
return ret ;
2012-03-28 21:41:23 +04:00
} else {
2015-10-21 20:36:49 +03:00
s = target_right - nr_right ;
2012-03-28 21:41:23 +04:00
if ( s > 0 & & nr_center < s ) {
/* not enough in central node */
2021-12-10 16:36:06 +03:00
ret = shift ( center , right , nr_center ) ;
if ( ret )
return ret ;
2015-06-26 17:25:48 +03:00
s - = nr_center ;
2021-12-10 16:36:06 +03:00
ret = shift ( left , right , s ) ;
if ( ret )
return ret ;
2012-03-28 21:41:23 +04:00
nr_left - = s ;
2021-12-10 16:36:06 +03:00
} else {
ret = shift ( center , right , s ) ;
if ( ret )
return ret ;
}
ret = shift ( left , center , nr_left - target_left ) ;
if ( ret )
return ret ;
2012-03-28 21:41:23 +04:00
}
* key_ptr ( parent , c - > index ) = center - > keys [ 0 ] ;
* key_ptr ( parent , r - > index ) = right - > keys [ 0 ] ;
2021-12-10 16:36:06 +03:00
return 0 ;
2012-03-28 21:41:23 +04:00
}
2021-12-10 16:36:06 +03:00
static int __rebalance3 ( struct dm_btree_info * info , struct btree_node * parent ,
struct child * l , struct child * c , struct child * r )
2011-11-01 00:19:11 +04:00
{
2012-12-22 00:23:30 +04:00
struct btree_node * left = l - > n ;
struct btree_node * center = c - > n ;
struct btree_node * right = r - > n ;
2011-11-01 00:19:11 +04:00
uint32_t nr_left = le32_to_cpu ( left - > header . nr_entries ) ;
uint32_t nr_center = le32_to_cpu ( center - > header . nr_entries ) ;
uint32_t nr_right = le32_to_cpu ( right - > header . nr_entries ) ;
2012-03-28 21:41:23 +04:00
unsigned threshold = merge_threshold ( left ) * 4 + 1 ;
2011-11-01 00:19:11 +04:00
2021-12-10 16:36:06 +03:00
if ( ( left - > header . max_entries ! = center - > header . max_entries ) | |
( center - > header . max_entries ! = right - > header . max_entries ) ) {
DMERR ( " bad btree metadata, max_entries differ " ) ;
return - EILSEQ ;
}
2011-11-01 00:19:11 +04:00
2021-12-10 16:36:06 +03:00
if ( ( nr_left + nr_center + nr_right ) < threshold ) {
return delete_center_node ( info , parent , l , c , r , left , center , right ,
nr_left , nr_center , nr_right ) ;
}
return redistribute3 ( info , parent , l , c , r , left , center , right ,
nr_left , nr_center , nr_right ) ;
2011-11-01 00:19:11 +04:00
}
static int rebalance3 ( struct shadow_spine * s , struct dm_btree_info * info ,
2013-03-20 21:21:24 +04:00
struct dm_btree_value_type * vt , unsigned left_index )
2011-11-01 00:19:11 +04:00
{
int r ;
2012-12-22 00:23:30 +04:00
struct btree_node * parent = dm_block_data ( shadow_current ( s ) ) ;
2011-11-01 00:19:11 +04:00
struct child left , center , right ;
/*
* FIXME : fill out an array ?
*/
2013-03-20 21:21:24 +04:00
r = init_child ( info , vt , parent , left_index , & left ) ;
2011-11-01 00:19:11 +04:00
if ( r )
return r ;
2013-03-20 21:21:24 +04:00
r = init_child ( info , vt , parent , left_index + 1 , & center ) ;
2011-11-01 00:19:11 +04:00
if ( r ) {
exit_child ( info , & left ) ;
return r ;
}
2013-03-20 21:21:24 +04:00
r = init_child ( info , vt , parent , left_index + 2 , & right ) ;
2011-11-01 00:19:11 +04:00
if ( r ) {
exit_child ( info , & left ) ;
exit_child ( info , & center ) ;
return r ;
}
2021-12-10 16:36:06 +03:00
r = __rebalance3 ( info , parent , & left , & center , & right ) ;
2011-11-01 00:19:11 +04:00
2015-10-22 23:46:59 +03:00
exit_child ( info , & left ) ;
exit_child ( info , & center ) ;
exit_child ( info , & right ) ;
2011-11-01 00:19:11 +04:00
2021-12-10 16:36:06 +03:00
return r ;
2011-11-01 00:19:11 +04:00
}
static int rebalance_children ( struct shadow_spine * s ,
2013-03-20 21:21:24 +04:00
struct dm_btree_info * info ,
struct dm_btree_value_type * vt , uint64_t key )
2011-11-01 00:19:11 +04:00
{
int i , r , has_left_sibling , has_right_sibling ;
2012-12-22 00:23:30 +04:00
struct btree_node * n ;
2011-11-01 00:19:11 +04:00
n = dm_block_data ( shadow_current ( s ) ) ;
if ( le32_to_cpu ( n - > header . nr_entries ) = = 1 ) {
struct dm_block * child ;
dm_block_t b = value64 ( n , 0 ) ;
r = dm_tm_read_lock ( info - > tm , b , & btree_node_validator , & child ) ;
if ( r )
return r ;
memcpy ( n , dm_block_data ( child ) ,
dm_bm_block_size ( dm_tm_get_bm ( info - > tm ) ) ) ;
dm_tm_dec ( info - > tm , dm_block_location ( child ) ) ;
2021-11-24 20:07:39 +03:00
dm_tm_unlock ( info - > tm , child ) ;
2011-11-01 00:19:11 +04:00
return 0 ;
}
i = lower_bound ( n , key ) ;
if ( i < 0 )
return - ENODATA ;
has_left_sibling = i > 0 ;
has_right_sibling = i < ( le32_to_cpu ( n - > header . nr_entries ) - 1 ) ;
if ( ! has_left_sibling )
2013-03-20 21:21:24 +04:00
r = rebalance2 ( s , info , vt , i ) ;
2011-11-01 00:19:11 +04:00
else if ( ! has_right_sibling )
2013-03-20 21:21:24 +04:00
r = rebalance2 ( s , info , vt , i - 1 ) ;
2011-11-01 00:19:11 +04:00
else
2013-03-20 21:21:24 +04:00
r = rebalance3 ( s , info , vt , i - 1 ) ;
2011-11-01 00:19:11 +04:00
return r ;
}
2012-12-22 00:23:30 +04:00
static int do_leaf ( struct btree_node * n , uint64_t key , unsigned * index )
2011-11-01 00:19:11 +04:00
{
int i = lower_bound ( n , key ) ;
if ( ( i < 0 ) | |
( i > = le32_to_cpu ( n - > header . nr_entries ) ) | |
( le64_to_cpu ( n - > keys [ i ] ) ! = key ) )
return - ENODATA ;
* index = i ;
return 0 ;
}
/*
* Prepares for removal from one level of the hierarchy . The caller must
* call delete_at ( ) to remove the entry at index .
*/
static int remove_raw ( struct shadow_spine * s , struct dm_btree_info * info ,
struct dm_btree_value_type * vt , dm_block_t root ,
uint64_t key , unsigned * index )
{
int i = * index , r ;
2012-12-22 00:23:30 +04:00
struct btree_node * n ;
2011-11-01 00:19:11 +04:00
for ( ; ; ) {
r = shadow_step ( s , root , vt ) ;
if ( r < 0 )
break ;
/*
* We have to patch up the parent node , ugly , but I don ' t
* see a way to do this automatically as part of the spine
* op .
*/
if ( shadow_has_parent ( s ) ) {
__le64 location = cpu_to_le64 ( dm_block_location ( shadow_current ( s ) ) ) ;
2012-03-28 21:41:25 +04:00
memcpy ( value_ptr ( dm_block_data ( shadow_parent ( s ) ) , i ) ,
2011-11-01 00:19:11 +04:00
& location , sizeof ( __le64 ) ) ;
}
n = dm_block_data ( shadow_current ( s ) ) ;
if ( le32_to_cpu ( n - > header . flags ) & LEAF_NODE )
return do_leaf ( n , key , index ) ;
2013-03-20 21:21:24 +04:00
r = rebalance_children ( s , info , vt , key ) ;
2011-11-01 00:19:11 +04:00
if ( r )
break ;
n = dm_block_data ( shadow_current ( s ) ) ;
if ( le32_to_cpu ( n - > header . flags ) & LEAF_NODE )
return do_leaf ( n , key , index ) ;
i = lower_bound ( n , key ) ;
/*
* We know the key is present , or else
* rebalance_children would have returned
* - ENODATA
*/
root = value64 ( n , i ) ;
}
return r ;
}
int dm_btree_remove ( struct dm_btree_info * info , dm_block_t root ,
uint64_t * keys , dm_block_t * new_root )
{
unsigned level , last_level = info - > levels - 1 ;
int index = 0 , r = 0 ;
struct shadow_spine spine ;
2012-12-22 00:23:30 +04:00
struct btree_node * n ;
2015-08-12 17:12:09 +03:00
struct dm_btree_value_type le64_vt ;
2011-11-01 00:19:11 +04:00
2015-08-12 17:12:09 +03:00
init_le64_type ( info - > tm , & le64_vt ) ;
2011-11-01 00:19:11 +04:00
init_shadow_spine ( & spine , info ) ;
for ( level = 0 ; level < info - > levels ; level + + ) {
r = remove_raw ( & spine , info ,
( level = = last_level ?
2015-08-12 17:12:09 +03:00
& info - > value_type : & le64_vt ) ,
2011-11-01 00:19:11 +04:00
root , keys [ level ] , ( unsigned * ) & index ) ;
if ( r < 0 )
break ;
n = dm_block_data ( shadow_current ( & spine ) ) ;
if ( level ! = last_level ) {
root = value64 ( n , index ) ;
continue ;
}
BUG_ON ( index < 0 | | index > = le32_to_cpu ( n - > header . nr_entries ) ) ;
if ( info - > value_type . dec )
info - > value_type . dec ( info - > value_type . context ,
2021-04-13 13:03:45 +03:00
value_ptr ( n , index ) , 1 ) ;
2011-11-01 00:19:11 +04:00
delete_at ( n , index ) ;
}
2021-06-17 10:45:47 +03:00
if ( ! r )
* new_root = shadow_root ( & spine ) ;
2011-11-01 00:19:11 +04:00
exit_shadow_spine ( & spine ) ;
return r ;
}
EXPORT_SYMBOL_GPL ( dm_btree_remove ) ;
2015-04-13 11:41:44 +03:00
/*----------------------------------------------------------------*/
static int remove_nearest ( struct shadow_spine * s , struct dm_btree_info * info ,
struct dm_btree_value_type * vt , dm_block_t root ,
uint64_t key , int * index )
{
int i = * index , r ;
struct btree_node * n ;
for ( ; ; ) {
r = shadow_step ( s , root , vt ) ;
if ( r < 0 )
break ;
/*
* We have to patch up the parent node , ugly , but I don ' t
* see a way to do this automatically as part of the spine
* op .
*/
if ( shadow_has_parent ( s ) ) {
__le64 location = cpu_to_le64 ( dm_block_location ( shadow_current ( s ) ) ) ;
memcpy ( value_ptr ( dm_block_data ( shadow_parent ( s ) ) , i ) ,
& location , sizeof ( __le64 ) ) ;
}
n = dm_block_data ( shadow_current ( s ) ) ;
if ( le32_to_cpu ( n - > header . flags ) & LEAF_NODE ) {
* index = lower_bound ( n , key ) ;
return 0 ;
}
r = rebalance_children ( s , info , vt , key ) ;
if ( r )
break ;
n = dm_block_data ( shadow_current ( s ) ) ;
if ( le32_to_cpu ( n - > header . flags ) & LEAF_NODE ) {
* index = lower_bound ( n , key ) ;
return 0 ;
}
i = lower_bound ( n , key ) ;
/*
* We know the key is present , or else
* rebalance_children would have returned
* - ENODATA
*/
root = value64 ( n , i ) ;
}
return r ;
}
static int remove_one ( struct dm_btree_info * info , dm_block_t root ,
uint64_t * keys , uint64_t end_key ,
dm_block_t * new_root , unsigned * nr_removed )
{
unsigned level , last_level = info - > levels - 1 ;
int index = 0 , r = 0 ;
struct shadow_spine spine ;
struct btree_node * n ;
2015-08-12 17:12:09 +03:00
struct dm_btree_value_type le64_vt ;
2015-04-13 11:41:44 +03:00
uint64_t k ;
2015-08-12 17:12:09 +03:00
init_le64_type ( info - > tm , & le64_vt ) ;
2015-04-13 11:41:44 +03:00
init_shadow_spine ( & spine , info ) ;
for ( level = 0 ; level < last_level ; level + + ) {
2015-08-12 17:12:09 +03:00
r = remove_raw ( & spine , info , & le64_vt ,
2015-04-13 11:41:44 +03:00
root , keys [ level ] , ( unsigned * ) & index ) ;
if ( r < 0 )
goto out ;
n = dm_block_data ( shadow_current ( & spine ) ) ;
root = value64 ( n , index ) ;
}
r = remove_nearest ( & spine , info , & info - > value_type ,
root , keys [ last_level ] , & index ) ;
if ( r < 0 )
goto out ;
n = dm_block_data ( shadow_current ( & spine ) ) ;
if ( index < 0 )
index = 0 ;
if ( index > = le32_to_cpu ( n - > header . nr_entries ) ) {
r = - ENODATA ;
goto out ;
}
k = le64_to_cpu ( n - > keys [ index ] ) ;
if ( k > = keys [ last_level ] & & k < end_key ) {
if ( info - > value_type . dec )
info - > value_type . dec ( info - > value_type . context ,
2021-04-13 13:03:45 +03:00
value_ptr ( n , index ) , 1 ) ;
2015-04-13 11:41:44 +03:00
delete_at ( n , index ) ;
2015-08-07 18:33:01 +03:00
keys [ last_level ] = k + 1ull ;
2015-04-13 11:41:44 +03:00
} else
r = - ENODATA ;
out :
* new_root = shadow_root ( & spine ) ;
exit_shadow_spine ( & spine ) ;
return r ;
}
int dm_btree_remove_leaves ( struct dm_btree_info * info , dm_block_t root ,
uint64_t * first_key , uint64_t end_key ,
dm_block_t * new_root , unsigned * nr_removed )
{
int r ;
* nr_removed = 0 ;
do {
r = remove_one ( info , root , first_key , end_key , & root , nr_removed ) ;
if ( ! r )
( * nr_removed ) + + ;
} while ( ! r ) ;
* new_root = root ;
return r = = - ENODATA ? 0 : r ;
}
EXPORT_SYMBOL_GPL ( dm_btree_remove_leaves ) ;