2017-01-11 17:10:19 +13:00
/*
ldb database library using mdb back end
Copyright ( C ) Jakub Hrozek 2014
Copyright ( C ) Catalyst . Net Ltd 2017
* * NOTE ! The following LGPL license applies to the ldb
* * library . This does NOT imply that all of Samba is released
* * under the LGPL
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 3 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , see < http : //www.gnu.org/licenses/>.
*/
# include "ldb_mdb.h"
2018-07-23 10:02:16 +12:00
# include "../ldb_key_value/ldb_kv.h"
2017-01-11 17:10:19 +13:00
# include "include/dlinklist.h"
# define MDB_URL_PREFIX "mdb: //"
# define MDB_URL_PREFIX_SIZE (sizeof(MDB_URL_PREFIX)-1)
2018-03-06 15:27:51 +13:00
# define LDB_MDB_MAX_KEY_LENGTH 511
2017-01-11 17:10:19 +13:00
# define GIGABYTE (1024*1024*1024)
int ldb_mdb_err_map ( int lmdb_err )
{
switch ( lmdb_err ) {
case MDB_SUCCESS :
return LDB_SUCCESS ;
case EIO :
return LDB_ERR_OPERATIONS_ERROR ;
2018-07-26 02:29:10 +02:00
# ifdef EBADE
2018-03-21 11:38:22 +13:00
case EBADE :
2018-07-26 02:29:10 +02:00
# endif
2017-01-11 17:10:19 +13:00
case MDB_INCOMPATIBLE :
case MDB_CORRUPTED :
case MDB_INVALID :
return LDB_ERR_UNAVAILABLE ;
case MDB_BAD_TXN :
case MDB_BAD_VALSIZE :
# ifdef MDB_BAD_DBI
case MDB_BAD_DBI :
# endif
case MDB_PANIC :
case EINVAL :
return LDB_ERR_PROTOCOL_ERROR ;
case MDB_MAP_FULL :
case MDB_DBS_FULL :
case MDB_READERS_FULL :
case MDB_TLS_FULL :
case MDB_TXN_FULL :
case EAGAIN :
return LDB_ERR_BUSY ;
case MDB_KEYEXIST :
return LDB_ERR_ENTRY_ALREADY_EXISTS ;
case MDB_NOTFOUND :
case ENOENT :
return LDB_ERR_NO_SUCH_OBJECT ;
case EACCES :
return LDB_ERR_INSUFFICIENT_ACCESS_RIGHTS ;
default :
break ;
}
return LDB_ERR_OTHER ;
}
# define ldb_mdb_error(ldb, ecode) lmdb_error_at(ldb, ecode, __FILE__, __LINE__)
static int lmdb_error_at ( struct ldb_context * ldb ,
int ecode ,
const char * file ,
int line )
{
int ldb_err = ldb_mdb_err_map ( ecode ) ;
char * reason = mdb_strerror ( ecode ) ;
ldb_asprintf_errstring ( ldb ,
" (%d) - %s at %s:%d " ,
ecode ,
reason ,
file ,
line ) ;
return ldb_err ;
}
2018-07-20 11:53:21 +12:00
static bool lmdb_transaction_active ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
2018-07-20 11:53:21 +12:00
return ldb_kv - > lmdb_private - > txlist ! = NULL ;
2017-01-11 17:10:19 +13:00
}
static MDB_txn * lmdb_trans_get_tx ( struct lmdb_trans * ltx )
{
if ( ltx = = NULL ) {
return NULL ;
}
return ltx - > tx ;
}
static void trans_push ( struct lmdb_private * lmdb , struct lmdb_trans * ltx )
{
if ( lmdb - > txlist ) {
talloc_steal ( lmdb - > txlist , ltx ) ;
}
DLIST_ADD ( lmdb - > txlist , ltx ) ;
}
static void trans_finished ( struct lmdb_private * lmdb , struct lmdb_trans * ltx )
{
DLIST_REMOVE ( lmdb - > txlist , ltx ) ;
talloc_free ( ltx ) ;
}
static struct lmdb_trans * lmdb_private_trans_head ( struct lmdb_private * lmdb )
{
struct lmdb_trans * ltx ;
ltx = lmdb - > txlist ;
return ltx ;
}
2018-03-23 11:29:25 +13:00
2017-01-11 17:10:19 +13:00
static MDB_txn * get_current_txn ( struct lmdb_private * lmdb )
{
MDB_txn * txn = NULL ;
2018-03-23 11:29:25 +13:00
txn = lmdb_trans_get_tx ( lmdb_private_trans_head ( lmdb ) ) ;
if ( txn ! = NULL ) {
return txn ;
}
2017-01-11 17:10:19 +13:00
if ( lmdb - > read_txn ! = NULL ) {
return lmdb - > read_txn ;
}
2018-03-23 11:29:25 +13:00
lmdb - > error = MDB_BAD_TXN ;
ldb_set_errstring ( lmdb - > ldb , __location__ " :No active transaction \n " ) ;
return NULL ;
2017-01-11 17:10:19 +13:00
}
2018-07-20 11:53:21 +12:00
static int lmdb_store ( struct ldb_kv_private * ldb_kv ,
2017-01-11 17:10:19 +13:00
struct ldb_val key ,
2018-07-20 11:54:39 +12:00
struct ldb_val data ,
int flags )
2017-01-11 17:10:19 +13:00
{
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
MDB_val mdb_key ;
MDB_val mdb_data ;
int mdb_flags ;
MDB_txn * txn = NULL ;
MDB_dbi dbi = 0 ;
2018-07-20 11:53:21 +12:00
if ( ldb_kv - > read_only ) {
2018-03-07 12:05:34 +13:00
return LDB_ERR_UNWILLING_TO_PERFORM ;
}
2017-01-11 17:10:19 +13:00
txn = lmdb_trans_get_tx ( lmdb_private_trans_head ( lmdb ) ) ;
if ( txn = = NULL ) {
ldb_debug ( lmdb - > ldb , LDB_DEBUG_FATAL , " No transaction " ) ;
lmdb - > error = MDB_PANIC ;
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_dbi_open ( txn , NULL , 0 , & dbi ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
mdb_key . mv_size = key . length ;
mdb_key . mv_data = key . data ;
mdb_data . mv_size = data . length ;
mdb_data . mv_data = data . data ;
if ( flags = = TDB_INSERT ) {
mdb_flags = MDB_NOOVERWRITE ;
2020-05-01 10:42:03 +12:00
} else if ( flags = = TDB_MODIFY ) {
2017-01-11 17:10:19 +13:00
/*
* Modifying a record , ensure that it exists .
* This mimics the TDB semantics
*/
MDB_val value ;
lmdb - > error = mdb_get ( txn , dbi , & mdb_key , & value ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
mdb_flags = 0 ;
} else {
mdb_flags = 0 ;
}
lmdb - > error = mdb_put ( txn , dbi , & mdb_key , & mdb_data , mdb_flags ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
return ldb_mdb_err_map ( lmdb - > error ) ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_delete ( struct ldb_kv_private * ldb_kv , struct ldb_val key )
2017-01-11 17:10:19 +13:00
{
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
MDB_val mdb_key ;
MDB_txn * txn = NULL ;
MDB_dbi dbi = 0 ;
2018-07-20 11:53:21 +12:00
if ( ldb_kv - > read_only ) {
2018-03-07 12:05:34 +13:00
return LDB_ERR_UNWILLING_TO_PERFORM ;
}
2017-01-11 17:10:19 +13:00
txn = lmdb_trans_get_tx ( lmdb_private_trans_head ( lmdb ) ) ;
if ( txn = = NULL ) {
ldb_debug ( lmdb - > ldb , LDB_DEBUG_FATAL , " No transaction " ) ;
lmdb - > error = MDB_PANIC ;
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_dbi_open ( txn , NULL , 0 , & dbi ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
mdb_key . mv_size = key . length ;
mdb_key . mv_data = key . data ;
lmdb - > error = mdb_del ( txn , dbi , & mdb_key , NULL ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
return ldb_mdb_err_map ( lmdb - > error ) ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_traverse_fn ( struct ldb_kv_private * ldb_kv ,
2017-01-11 17:10:19 +13:00
ldb_kv_traverse_fn fn ,
void * ctx )
{
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
MDB_val mdb_key ;
MDB_val mdb_data ;
MDB_txn * txn = NULL ;
MDB_dbi dbi = 0 ;
MDB_cursor * cursor = NULL ;
int ret ;
txn = get_current_txn ( lmdb ) ;
if ( txn = = NULL ) {
ldb_debug ( lmdb - > ldb , LDB_DEBUG_FATAL , " No transaction " ) ;
lmdb - > error = MDB_PANIC ;
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_dbi_open ( txn , NULL , 0 , & dbi ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_cursor_open ( txn , dbi , & cursor ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
goto done ;
}
while ( ( lmdb - > error = mdb_cursor_get (
cursor , & mdb_key ,
& mdb_data , MDB_NEXT ) ) = = MDB_SUCCESS ) {
struct ldb_val key = {
. length = mdb_key . mv_size ,
. data = mdb_key . mv_data ,
} ;
struct ldb_val data = {
. length = mdb_data . mv_size ,
. data = mdb_data . mv_data ,
} ;
2018-07-20 11:53:21 +12:00
ret = fn ( ldb_kv , key , data , ctx ) ;
2017-01-11 17:10:19 +13:00
if ( ret ! = 0 ) {
2019-04-04 12:04:47 +13:00
/*
* NOTE : This DOES NOT set lmdb - > error !
*
* This means that the caller will get success .
* This matches TDB traverse behaviour , where callbacks
* may terminate the traverse , but do not change the
* return code from success .
*
* Callers SHOULD store their own error codes .
*/
2017-01-11 17:10:19 +13:00
goto done ;
}
}
if ( lmdb - > error = = MDB_NOTFOUND ) {
lmdb - > error = MDB_SUCCESS ;
}
done :
if ( cursor ! = NULL ) {
mdb_cursor_close ( cursor ) ;
}
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
return ldb_mdb_err_map ( lmdb - > error ) ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_update_in_iterate ( struct ldb_kv_private * ldb_kv ,
2017-01-11 17:10:19 +13:00
struct ldb_val key ,
struct ldb_val key2 ,
struct ldb_val data ,
void * state )
{
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
struct ldb_val copy ;
int ret = LDB_SUCCESS ;
/*
* Need to take a copy of the data as the delete operation alters the
* data , as it is in private lmdb memory .
*/
copy . length = data . length ;
2018-07-20 11:53:21 +12:00
copy . data = talloc_memdup ( ldb_kv , data . data , data . length ) ;
2017-01-11 17:10:19 +13:00
if ( copy . data = = NULL ) {
lmdb - > error = MDB_PANIC ;
return ldb_oom ( lmdb - > ldb ) ;
}
2018-07-20 11:53:21 +12:00
lmdb - > error = lmdb_delete ( ldb_kv , key ) ;
2017-01-11 17:10:19 +13:00
if ( lmdb - > error ! = MDB_SUCCESS ) {
ldb_debug (
lmdb - > ldb ,
LDB_DEBUG_ERROR ,
" Failed to delete %*.*s "
" for rekey as %*.*s: %s " ,
( int ) key . length , ( int ) key . length ,
( const char * ) key . data ,
( int ) key2 . length , ( int ) key2 . length ,
( const char * ) key . data ,
mdb_strerror ( lmdb - > error ) ) ;
ret = ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
goto done ;
}
2018-07-20 11:53:21 +12:00
lmdb - > error = lmdb_store ( ldb_kv , key2 , copy , 0 ) ;
2017-01-11 17:10:19 +13:00
if ( lmdb - > error ! = MDB_SUCCESS ) {
ldb_debug (
lmdb - > ldb ,
LDB_DEBUG_ERROR ,
" Failed to rekey %*.*s as %*.*s: %s " ,
( int ) key . length , ( int ) key . length ,
( const char * ) key . data ,
( int ) key2 . length , ( int ) key2 . length ,
( const char * ) key . data ,
mdb_strerror ( lmdb - > error ) ) ;
ret = ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
goto done ;
}
done :
if ( copy . data ! = NULL ) {
TALLOC_FREE ( copy . data ) ;
copy . length = 0 ;
}
/*
2023-04-06 16:01:33 +02:00
* Explicitly invalidate the data , as the delete has done this
2017-01-11 17:10:19 +13:00
*/
data . length = 0 ;
data . data = NULL ;
return ret ;
}
/* Handles only a single record */
2018-07-20 11:54:39 +12:00
static int lmdb_parse_record ( struct ldb_kv_private * ldb_kv ,
struct ldb_val key ,
int ( * parser ) ( struct ldb_val key ,
struct ldb_val data ,
2017-01-11 17:10:19 +13:00
void * private_data ) ,
void * ctx )
{
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
MDB_val mdb_key ;
MDB_val mdb_data ;
MDB_txn * txn = NULL ;
MDB_dbi dbi ;
struct ldb_val data ;
txn = get_current_txn ( lmdb ) ;
if ( txn = = NULL ) {
ldb_debug ( lmdb - > ldb , LDB_DEBUG_FATAL , " No transaction active " ) ;
lmdb - > error = MDB_PANIC ;
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_dbi_open ( txn , NULL , 0 , & dbi ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
mdb_key . mv_size = key . length ;
mdb_key . mv_data = key . data ;
lmdb - > error = mdb_get ( txn , dbi , & mdb_key , & mdb_data ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
/* TODO closing a handle should not even be necessary */
mdb_dbi_close ( lmdb - > env , dbi ) ;
if ( lmdb - > error = = MDB_NOTFOUND ) {
return LDB_ERR_NO_SUCH_OBJECT ;
}
2023-02-14 16:31:18 +01:00
if ( lmdb - > error = = MDB_CORRUPTED ) {
ldb_debug ( lmdb - > ldb , LDB_DEBUG_ERROR ,
__location__
" : MDB corrupted for key [%*.*s] \n " ,
( int ) key . length ,
( int ) key . length ,
key . data ) ;
}
2017-01-11 17:10:19 +13:00
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
data . data = mdb_data . mv_data ;
data . length = mdb_data . mv_size ;
/* TODO closing a handle should not even be necessary */
mdb_dbi_close ( lmdb - > env , dbi ) ;
return parser ( key , data , ctx ) ;
}
2019-03-04 12:50:24 +13:00
/*
* Exactly the same as iterate , except we have a start key and an end key
* ( which are both included in the results if present ) .
*
* If start > end , return MDB_PANIC .
*/
static int lmdb_iterate_range ( struct ldb_kv_private * ldb_kv ,
struct ldb_val start_key ,
struct ldb_val end_key ,
ldb_kv_traverse_fn fn ,
void * ctx )
{
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
MDB_val mdb_key ;
MDB_val mdb_data ;
MDB_txn * txn = NULL ;
MDB_dbi dbi = 0 ;
MDB_cursor * cursor = NULL ;
int ret ;
MDB_val mdb_s_key ;
MDB_val mdb_e_key ;
txn = get_current_txn ( lmdb ) ;
if ( txn = = NULL ) {
ldb_debug ( lmdb - > ldb , LDB_DEBUG_FATAL , " No transaction " ) ;
lmdb - > error = MDB_PANIC ;
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_dbi_open ( txn , NULL , 0 , & dbi ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
mdb_s_key . mv_size = start_key . length ;
mdb_s_key . mv_data = start_key . data ;
mdb_e_key . mv_size = end_key . length ;
mdb_e_key . mv_data = end_key . data ;
if ( mdb_cmp ( txn , dbi , & mdb_s_key , & mdb_e_key ) > 0 ) {
lmdb - > error = MDB_PANIC ;
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
lmdb - > error = mdb_cursor_open ( txn , dbi , & cursor ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
goto done ;
}
lmdb - > error = mdb_cursor_get ( cursor , & mdb_s_key , & mdb_data , MDB_SET_RANGE ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
if ( lmdb - > error = = MDB_NOTFOUND ) {
lmdb - > error = MDB_SUCCESS ;
}
goto done ;
} else {
struct ldb_val key = {
. length = mdb_s_key . mv_size ,
. data = mdb_s_key . mv_data ,
} ;
struct ldb_val data = {
. length = mdb_data . mv_size ,
. data = mdb_data . mv_data ,
} ;
if ( mdb_cmp ( txn , dbi , & mdb_s_key , & mdb_e_key ) > 0 ) {
goto done ;
}
ret = fn ( ldb_kv , key , data , ctx ) ;
if ( ret ! = 0 ) {
2019-04-04 12:04:47 +13:00
/*
* NOTE : This DOES NOT set lmdb - > error !
*
* This means that the caller will get success .
* This matches TDB traverse behaviour , where callbacks
* may terminate the traverse , but do not change the
* return code from success .
*
* Callers SHOULD store their own error codes .
*/
2019-03-04 12:50:24 +13:00
goto done ;
}
}
while ( ( lmdb - > error = mdb_cursor_get (
cursor , & mdb_key ,
& mdb_data , MDB_NEXT ) ) = = MDB_SUCCESS ) {
struct ldb_val key = {
. length = mdb_key . mv_size ,
. data = mdb_key . mv_data ,
} ;
struct ldb_val data = {
. length = mdb_data . mv_size ,
. data = mdb_data . mv_data ,
} ;
if ( mdb_cmp ( txn , dbi , & mdb_key , & mdb_e_key ) > 0 ) {
goto done ;
}
ret = fn ( ldb_kv , key , data , ctx ) ;
if ( ret ! = 0 ) {
2019-04-04 12:04:47 +13:00
/*
* NOTE : This DOES NOT set lmdb - > error !
*
* This means that the caller will get success .
* This matches TDB traverse behaviour , where callbacks
* may terminate the traverse , but do not change the
* return code from success .
*
* Callers SHOULD store their own error codes .
*/
2019-03-04 12:50:24 +13:00
goto done ;
}
}
if ( lmdb - > error = = MDB_NOTFOUND ) {
lmdb - > error = MDB_SUCCESS ;
}
done :
if ( cursor ! = NULL ) {
mdb_cursor_close ( cursor ) ;
}
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
return ldb_mdb_err_map ( lmdb - > error ) ;
}
2017-01-11 17:10:19 +13:00
static int lmdb_lock_read ( struct ldb_module * module )
{
void * data = ldb_module_get_private ( module ) ;
2018-07-20 11:54:39 +12:00
struct ldb_kv_private * ldb_kv =
talloc_get_type ( data , struct ldb_kv_private ) ;
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2018-03-13 15:08:10 +13:00
pid_t pid = getpid ( ) ;
if ( pid ! = lmdb - > pid ) {
ldb_asprintf_errstring (
lmdb - > ldb ,
__location__ " : Reusing ldb opened by pid %d in "
" process %d \n " ,
lmdb - > pid ,
pid ) ;
lmdb - > error = MDB_BAD_TXN ;
return LDB_ERR_PROTOCOL_ERROR ;
}
2017-01-11 17:10:19 +13:00
lmdb - > error = MDB_SUCCESS ;
2018-07-20 11:53:21 +12:00
if ( lmdb_transaction_active ( ldb_kv ) = = false & &
ldb_kv - > read_lock_count = = 0 ) {
2017-01-11 17:10:19 +13:00
lmdb - > error = mdb_txn_begin ( lmdb - > env ,
NULL ,
MDB_RDONLY ,
& lmdb - > read_txn ) ;
}
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
2018-07-20 11:53:21 +12:00
ldb_kv - > read_lock_count + + ;
2017-01-11 17:10:19 +13:00
return ldb_mdb_err_map ( lmdb - > error ) ;
}
static int lmdb_unlock_read ( struct ldb_module * module )
{
void * data = ldb_module_get_private ( module ) ;
2018-07-20 11:54:39 +12:00
struct ldb_kv_private * ldb_kv =
talloc_get_type ( data , struct ldb_kv_private ) ;
2017-01-11 17:10:19 +13:00
2018-07-20 11:54:39 +12:00
if ( lmdb_transaction_active ( ldb_kv ) = = false & &
ldb_kv - > read_lock_count = = 1 ) {
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
mdb_txn_commit ( lmdb - > read_txn ) ;
lmdb - > read_txn = NULL ;
2018-07-20 11:53:21 +12:00
ldb_kv - > read_lock_count - - ;
2017-01-11 17:10:19 +13:00
return LDB_SUCCESS ;
}
2018-07-20 11:53:21 +12:00
ldb_kv - > read_lock_count - - ;
2017-01-11 17:10:19 +13:00
return LDB_SUCCESS ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_transaction_start ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
struct lmdb_trans * ltx ;
struct lmdb_trans * ltx_head ;
MDB_txn * tx_parent ;
2018-03-13 15:08:10 +13:00
pid_t pid = getpid ( ) ;
2017-01-11 17:10:19 +13:00
2018-03-07 12:05:34 +13:00
/* Do not take out the transaction lock on a read-only DB */
2018-07-20 11:53:21 +12:00
if ( ldb_kv - > read_only ) {
2018-03-07 12:05:34 +13:00
return LDB_ERR_UNWILLING_TO_PERFORM ;
}
2017-01-11 17:10:19 +13:00
ltx = talloc_zero ( lmdb , struct lmdb_trans ) ;
if ( ltx = = NULL ) {
return ldb_oom ( lmdb - > ldb ) ;
}
2018-03-13 15:08:10 +13:00
if ( pid ! = lmdb - > pid ) {
ldb_asprintf_errstring (
lmdb - > ldb ,
__location__ " : Reusing ldb opened by pid %d in "
" process %d \n " ,
lmdb - > pid ,
pid ) ;
lmdb - > error = MDB_BAD_TXN ;
return LDB_ERR_PROTOCOL_ERROR ;
}
2020-03-30 12:08:30 +13:00
/*
* Clear out any stale readers
*/
{
2020-04-01 08:22:08 +13:00
int stale = 0 ;
2020-03-30 12:08:30 +13:00
mdb_reader_check ( lmdb - > env , & stale ) ;
if ( stale > 0 ) {
ldb_debug (
lmdb - > ldb ,
LDB_DEBUG_ERROR ,
" LMDB Stale readers, deleted (%d) " ,
stale ) ;
}
}
2017-01-11 17:10:19 +13:00
ltx_head = lmdb_private_trans_head ( lmdb ) ;
tx_parent = lmdb_trans_get_tx ( ltx_head ) ;
lmdb - > error = mdb_txn_begin ( lmdb - > env , tx_parent , 0 , & ltx - > tx ) ;
if ( lmdb - > error ! = MDB_SUCCESS ) {
return ldb_mdb_error ( lmdb - > ldb , lmdb - > error ) ;
}
trans_push ( lmdb , ltx ) ;
return ldb_mdb_err_map ( lmdb - > error ) ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_transaction_cancel ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
struct lmdb_trans * ltx ;
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
ltx = lmdb_private_trans_head ( lmdb ) ;
if ( ltx = = NULL ) {
return LDB_ERR_OPERATIONS_ERROR ;
}
mdb_txn_abort ( ltx - > tx ) ;
trans_finished ( lmdb , ltx ) ;
return LDB_SUCCESS ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_transaction_prepare_commit ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
/* No need to prepare a commit */
return LDB_SUCCESS ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_transaction_commit ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
struct lmdb_trans * ltx ;
2018-07-20 11:53:21 +12:00
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
2017-01-11 17:10:19 +13:00
ltx = lmdb_private_trans_head ( lmdb ) ;
if ( ltx = = NULL ) {
return LDB_ERR_OPERATIONS_ERROR ;
}
lmdb - > error = mdb_txn_commit ( ltx - > tx ) ;
trans_finished ( lmdb , ltx ) ;
return lmdb - > error ;
}
2018-07-20 11:53:21 +12:00
static int lmdb_error ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
2018-07-20 11:53:21 +12:00
return ldb_mdb_err_map ( ldb_kv - > lmdb_private - > error ) ;
2017-01-11 17:10:19 +13:00
}
2018-07-20 11:53:21 +12:00
static const char * lmdb_errorstr ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
2018-07-20 11:53:21 +12:00
return mdb_strerror ( ldb_kv - > lmdb_private - > error ) ;
2017-01-11 17:10:19 +13:00
}
2018-07-20 11:54:39 +12:00
static const char * lmdb_name ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
return " lmdb " ;
}
2018-07-20 11:53:21 +12:00
static bool lmdb_changed ( struct ldb_kv_private * ldb_kv )
2017-01-11 17:10:19 +13:00
{
/*
* lmdb does no provide a quick way to determine if the database
* has changed . This function always returns true .
*
* Note that tdb uses a sequence number that allows this function
* to be implemented efficiently .
*/
return true ;
}
2019-04-01 15:27:32 +13:00
/*
* Get the number of records in the database .
*
* The mdb_env_stat call returns an accurate count , so we return the actual
* number of records in the database rather than an estimate .
*/
2019-04-11 12:53:45 +12:00
static size_t lmdb_get_size ( struct ldb_kv_private * ldb_kv )
{
2019-04-01 15:27:32 +13:00
struct MDB_stat stats = { 0 } ;
struct lmdb_private * lmdb = ldb_kv - > lmdb_private ;
int ret = 0 ;
ret = mdb_env_stat ( lmdb - > env , & stats ) ;
if ( ret ! = 0 ) {
return 0 ;
}
return stats . ms_entries ;
}
2019-03-07 10:18:00 +13:00
/*
* Start a sub transaction
* As lmdb supports nested transactions we can start a new transaction
*/
static int lmdb_nested_transaction_start ( struct ldb_kv_private * ldb_kv )
{
int ret = lmdb_transaction_start ( ldb_kv ) ;
return ret ;
}
/*
* Commit a sub transaction
* As lmdb supports nested transactions we can commit the nested transaction
*/
static int lmdb_nested_transaction_commit ( struct ldb_kv_private * ldb_kv )
{
int ret = lmdb_transaction_commit ( ldb_kv ) ;
return ret ;
}
/*
* Cancel a sub transaction
* As lmdb supports nested transactions we can cancel the nested transaction
*/
static int lmdb_nested_transaction_cancel ( struct ldb_kv_private * ldb_kv )
{
int ret = lmdb_transaction_cancel ( ldb_kv ) ;
return ret ;
}
2017-01-11 17:10:19 +13:00
static struct kv_db_ops lmdb_key_value_ops = {
2019-03-07 16:45:46 +13:00
. options = LDB_KV_OPTION_STABLE_READ_LOCK ,
2017-01-11 17:10:19 +13:00
. store = lmdb_store ,
. delete = lmdb_delete ,
. iterate = lmdb_traverse_fn ,
. update_in_iterate = lmdb_update_in_iterate ,
. fetch_and_parse = lmdb_parse_record ,
2019-03-04 12:50:24 +13:00
. iterate_range = lmdb_iterate_range ,
2017-01-11 17:10:19 +13:00
. lock_read = lmdb_lock_read ,
. unlock_read = lmdb_unlock_read ,
. begin_write = lmdb_transaction_start ,
. prepare_write = lmdb_transaction_prepare_commit ,
. finish_write = lmdb_transaction_commit ,
. abort_write = lmdb_transaction_cancel ,
. error = lmdb_error ,
. errorstr = lmdb_errorstr ,
. name = lmdb_name ,
. has_changed = lmdb_changed ,
. transaction_active = lmdb_transaction_active ,
2019-04-01 15:27:32 +13:00
. get_size = lmdb_get_size ,
2019-03-07 10:18:00 +13:00
. begin_nested_write = lmdb_nested_transaction_start ,
. finish_nested_write = lmdb_nested_transaction_commit ,
. abort_nested_write = lmdb_nested_transaction_cancel ,
2017-01-11 17:10:19 +13:00
} ;
static const char * lmdb_get_path ( const char * url )
{
const char * path ;
/* parse the url */
if ( strchr ( url , ' : ' ) ) {
if ( strncmp ( url , MDB_URL_PREFIX , MDB_URL_PREFIX_SIZE ) ! = 0 ) {
return NULL ;
}
path = url + MDB_URL_PREFIX_SIZE ;
} else {
path = url ;
}
return path ;
}
static int lmdb_pvt_destructor ( struct lmdb_private * lmdb )
{
struct lmdb_trans * ltx = NULL ;
2018-03-05 16:04:03 +13:00
/* Check if this is a forked child */
if ( getpid ( ) ! = lmdb - > pid ) {
int fd = 0 ;
/*
* We cannot call mdb_env_close or commit any transactions ,
* otherwise they might appear finished in the parent .
*
*/
if ( mdb_env_get_fd ( lmdb - > env , & fd ) = = 0 ) {
close ( fd ) ;
}
/* Remove the pointer, so that no access should occur */
lmdb - > env = NULL ;
return 0 ;
}
2017-01-11 17:10:19 +13:00
/*
* Close the read transaction if it ' s open
*/
if ( lmdb - > read_txn ! = NULL ) {
mdb_txn_abort ( lmdb - > read_txn ) ;
}
if ( lmdb - > env = = NULL ) {
return 0 ;
}
/*
* Abort any currently active transactions
*/
ltx = lmdb_private_trans_head ( lmdb ) ;
while ( ltx ! = NULL ) {
mdb_txn_abort ( ltx - > tx ) ;
trans_finished ( lmdb , ltx ) ;
ltx = lmdb_private_trans_head ( lmdb ) ;
}
lmdb - > env = NULL ;
return 0 ;
}
2018-03-07 12:05:34 +13:00
struct mdb_env_wrap {
struct mdb_env_wrap * next , * prev ;
dev_t device ;
ino_t inode ;
MDB_env * env ;
2018-03-13 15:08:10 +13:00
pid_t pid ;
2018-03-07 12:05:34 +13:00
} ;
static struct mdb_env_wrap * mdb_list ;
/* destroy the last connection to an mdb */
static int mdb_env_wrap_destructor ( struct mdb_env_wrap * w )
{
mdb_env_close ( w - > env ) ;
DLIST_REMOVE ( mdb_list , w ) ;
return 0 ;
}
static int lmdb_open_env ( TALLOC_CTX * mem_ctx ,
MDB_env * * env ,
2017-01-11 17:10:19 +13:00
struct ldb_context * ldb ,
const char * path ,
2019-06-25 16:22:15 +12:00
const size_t env_map_size ,
2018-03-07 12:05:34 +13:00
unsigned int flags )
2017-01-11 17:10:19 +13:00
{
int ret ;
2018-03-07 12:05:34 +13:00
unsigned int mdb_flags = MDB_NOSUBDIR | MDB_NOTLS ;
/*
* MDB_NOSUBDIR implies there is a separate file called path and a
* separate lockfile called path - lock
*/
2017-01-11 17:10:19 +13:00
2018-03-07 12:05:34 +13:00
struct mdb_env_wrap * w ;
struct stat st ;
2018-03-13 15:08:10 +13:00
pid_t pid = getpid ( ) ;
2018-05-17 13:10:25 +12:00
int fd = 0 ;
unsigned v ;
2018-03-07 12:05:34 +13:00
if ( stat ( path , & st ) = = 0 ) {
for ( w = mdb_list ; w ; w = w - > next ) {
2018-03-13 15:08:10 +13:00
if ( st . st_dev = = w - > device & &
st . st_ino = = w - > inode & &
pid = = w - > pid ) {
2018-03-07 12:05:34 +13:00
/*
* We must have only one MDB_env per process
*/
if ( ! talloc_reference ( mem_ctx , w ) ) {
return ldb_oom ( ldb ) ;
}
* env = w - > env ;
return LDB_SUCCESS ;
}
2017-01-11 17:10:19 +13:00
}
}
2018-03-07 12:05:34 +13:00
w = talloc ( mem_ctx , struct mdb_env_wrap ) ;
if ( w = = NULL ) {
return ldb_oom ( ldb ) ;
}
ret = mdb_env_create ( env ) ;
2017-01-11 17:10:19 +13:00
if ( ret ! = 0 ) {
ldb_asprintf_errstring (
ldb ,
" Could not create MDB environment %s: %s \n " ,
path ,
mdb_strerror ( ret ) ) ;
2018-03-07 12:05:34 +13:00
return ldb_mdb_err_map ( ret ) ;
2017-01-11 17:10:19 +13:00
}
2019-06-25 16:22:15 +12:00
if ( env_map_size > 0 ) {
ret = mdb_env_set_mapsize ( * env , env_map_size ) ;
if ( ret ! = 0 ) {
ldb_asprintf_errstring (
ldb ,
" Could not set MDB mmap() size to %llu "
" on %s: %s \n " ,
( unsigned long long ) ( env_map_size ) ,
path ,
mdb_strerror ( ret ) ) ;
TALLOC_FREE ( w ) ;
return ldb_mdb_err_map ( ret ) ;
}
2017-01-11 17:10:19 +13:00
}
2018-03-07 12:05:34 +13:00
mdb_env_set_maxreaders ( * env , 100000 ) ;
/*
* As we ensure that there is only one MDB_env open per database per
* process . We can not use the MDB_RDONLY flag , as another ldb may be
* opened in read write mode
2017-01-11 17:10:19 +13:00
*/
2018-03-01 16:53:07 +13:00
if ( flags & LDB_FLG_NOSYNC ) {
mdb_flags | = MDB_NOSYNC ;
}
2018-03-07 12:05:34 +13:00
ret = mdb_env_open ( * env , path , mdb_flags , 0644 ) ;
2017-01-11 17:10:19 +13:00
if ( ret ! = 0 ) {
ldb_asprintf_errstring ( ldb ,
" Could not open DB %s: %s \n " ,
path , mdb_strerror ( ret ) ) ;
2018-03-07 12:05:34 +13:00
TALLOC_FREE ( w ) ;
2017-01-11 17:10:19 +13:00
return ldb_mdb_err_map ( ret ) ;
}
2019-06-25 16:22:15 +12:00
{
MDB_envinfo stat = { 0 } ;
ret = mdb_env_info ( * env , & stat ) ;
if ( ret ! = 0 ) {
ldb_asprintf_errstring (
ldb ,
" Could not get MDB environment stats %s: %s \n " ,
path ,
mdb_strerror ( ret ) ) ;
return ldb_mdb_err_map ( ret ) ;
}
}
2018-05-17 13:10:25 +12:00
ret = mdb_env_get_fd ( * env , & fd ) ;
if ( ret ! = 0 ) {
ldb_asprintf_errstring ( ldb ,
" Could not obtain DB FD %s: %s \n " ,
path , mdb_strerror ( ret ) ) ;
TALLOC_FREE ( w ) ;
return ldb_mdb_err_map ( ret ) ;
}
/* Just as for TDB: on exec, don't inherit the fd */
v = fcntl ( fd , F_GETFD , 0 ) ;
2019-05-08 12:56:25 +12:00
if ( v = = - 1 ) {
TALLOC_FREE ( w ) ;
return LDB_ERR_OPERATIONS_ERROR ;
}
ret = fcntl ( fd , F_SETFD , v | FD_CLOEXEC ) ;
if ( ret = = - 1 ) {
TALLOC_FREE ( w ) ;
return LDB_ERR_OPERATIONS_ERROR ;
}
2018-05-17 13:10:25 +12:00
if ( fstat ( fd , & st ) ! = 0 ) {
2018-03-07 12:05:34 +13:00
ldb_asprintf_errstring (
ldb ,
" Could not stat %s: \n " ,
path ) ;
TALLOC_FREE ( w ) ;
return LDB_ERR_OPERATIONS_ERROR ;
}
w - > env = * env ;
w - > device = st . st_dev ;
w - > inode = st . st_ino ;
2018-03-13 15:08:10 +13:00
w - > pid = pid ;
2018-03-07 12:05:34 +13:00
talloc_set_destructor ( w , mdb_env_wrap_destructor ) ;
DLIST_ADD ( mdb_list , w ) ;
return LDB_SUCCESS ;
}
static int lmdb_pvt_open ( struct lmdb_private * lmdb ,
struct ldb_context * ldb ,
const char * path ,
2019-06-25 16:22:15 +12:00
const size_t env_map_size ,
2018-03-07 12:05:34 +13:00
unsigned int flags )
{
int ret ;
int lmdb_max_key_length ;
if ( flags & LDB_FLG_DONT_CREATE_DB ) {
struct stat st ;
if ( stat ( path , & st ) ! = 0 ) {
return LDB_ERR_UNAVAILABLE ;
}
}
2019-06-25 16:22:15 +12:00
ret = lmdb_open_env ( lmdb , & lmdb - > env , ldb , path , env_map_size , flags ) ;
2018-03-07 12:05:34 +13:00
if ( ret ! = 0 ) {
return ret ;
}
/* Close when lmdb is released */
talloc_set_destructor ( lmdb , lmdb_pvt_destructor ) ;
2018-03-05 16:04:03 +13:00
/* Store the original pid during the LMDB open */
lmdb - > pid = getpid ( ) ;
2018-03-06 15:27:51 +13:00
lmdb_max_key_length = mdb_env_get_maxkeysize ( lmdb - > env ) ;
/* This will never happen, but if it does make sure to freak out */
if ( lmdb_max_key_length < LDB_MDB_MAX_KEY_LENGTH ) {
return ldb_operr ( ldb ) ;
}
2017-01-11 17:10:19 +13:00
return LDB_SUCCESS ;
}
int lmdb_connect ( struct ldb_context * ldb ,
const char * url ,
unsigned int flags ,
const char * options [ ] ,
struct ldb_module * * _module )
{
const char * path = NULL ;
struct lmdb_private * lmdb = NULL ;
2018-07-20 11:53:21 +12:00
struct ldb_kv_private * ldb_kv = NULL ;
2017-01-11 17:10:19 +13:00
int ret ;
2019-06-25 16:22:15 +12:00
size_t env_map_size = 0 ;
2017-01-11 17:10:19 +13:00
/*
* We hold locks , so we must use a private event context
* on each returned handle
*/
ldb_set_require_private_event_context ( ldb ) ;
path = lmdb_get_path ( url ) ;
if ( path = = NULL ) {
ldb_debug ( ldb , LDB_DEBUG_ERROR , " Invalid mdb URL '%s' " , url ) ;
return LDB_ERR_OPERATIONS_ERROR ;
}
2018-07-20 11:53:21 +12:00
ldb_kv = talloc_zero ( ldb , struct ldb_kv_private ) ;
if ( ! ldb_kv ) {
2017-01-11 17:10:19 +13:00
ldb_oom ( ldb ) ;
return LDB_ERR_OPERATIONS_ERROR ;
}
2018-07-20 11:53:21 +12:00
lmdb = talloc_zero ( ldb_kv , struct lmdb_private ) ;
2017-01-11 17:10:19 +13:00
if ( lmdb = = NULL ) {
2018-07-20 11:53:21 +12:00
TALLOC_FREE ( ldb_kv ) ;
2018-03-07 12:05:34 +13:00
return ldb_oom ( ldb ) ;
2017-01-11 17:10:19 +13:00
}
lmdb - > ldb = ldb ;
2018-07-20 11:53:21 +12:00
ldb_kv - > kv_ops = & lmdb_key_value_ops ;
2017-01-11 17:10:19 +13:00
2019-06-25 16:22:15 +12:00
{
const char * size = ldb_options_find (
ldb , ldb - > options , " lmdb_env_size " ) ;
if ( size ! = NULL ) {
env_map_size = strtoull ( size , NULL , 0 ) ;
}
}
ret = lmdb_pvt_open ( lmdb , ldb , path , env_map_size , flags ) ;
2017-01-11 17:10:19 +13:00
if ( ret ! = LDB_SUCCESS ) {
2018-07-20 11:53:21 +12:00
TALLOC_FREE ( ldb_kv ) ;
2017-01-11 17:10:19 +13:00
return ret ;
}
2018-07-20 11:53:21 +12:00
ldb_kv - > lmdb_private = lmdb ;
2017-01-11 17:10:19 +13:00
if ( flags & LDB_FLG_RDONLY ) {
2018-07-20 11:53:21 +12:00
ldb_kv - > read_only = true ;
2017-01-11 17:10:19 +13:00
}
2018-03-06 15:27:51 +13:00
/*
* This maximum length becomes encoded in the index values so
* must never change even if LMDB starts to allow longer keys .
* The override option is max_key_len_for_self_test , and is
* used for testing only .
*/
2018-07-20 11:53:21 +12:00
ldb_kv - > max_key_length = LDB_MDB_MAX_KEY_LENGTH ;
2018-03-06 15:27:51 +13:00
2018-07-20 07:23:10 +12:00
return ldb_kv_init_store (
2018-07-20 11:53:21 +12:00
ldb_kv , " ldb_mdb backend " , ldb , options , _module ) ;
2017-01-11 17:10:19 +13:00
}