2009-07-23 16:03:39 +10:00
/*
ctdb vacuuming events
Copyright ( C ) Ronnie Sahlberg 2009
2013-02-22 16:12:17 +01:00
Copyright ( C ) Michael Adam 2010 - 2013
2011-11-26 01:06:13 +01:00
Copyright ( C ) Stefan Metzmacher 2010 - 2011
2009-07-23 16:03:39 +10:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
*/
2015-10-26 16:50:46 +11:00
# include "replace.h"
2009-07-23 16:03:39 +10:00
# include "system/network.h"
# include "system/filesys.h"
2015-10-26 16:50:46 +11:00
# include "system/time.h"
# include <talloc.h>
# include <tevent.h>
2014-08-15 15:46:33 +10:00
# include "lib/tdb_wrap/tdb_wrap.h"
2009-07-23 16:03:39 +10:00
# include "lib/util/dlinklist.h"
2015-10-26 16:50:46 +11:00
# include "lib/util/debug.h"
# include "lib/util/samba_util.h"
2016-11-29 12:55:06 +11:00
# include "lib/util/sys_rw.h"
2015-09-23 16:10:59 -07:00
# include "lib/util/util_process.h"
2015-10-26 16:50:46 +11:00
# include "ctdb_private.h"
# include "ctdb_client.h"
2019-07-30 14:17:11 +10:00
# include "protocol/protocol_private.h"
2015-10-26 16:50:46 +11:00
# include "common/rb_tree.h"
2015-10-23 14:17:34 +11:00
# include "common/common.h"
2015-11-11 15:41:10 +11:00
# include "common/logging.h"
2009-07-23 16:03:39 +10:00
2018-02-16 15:30:13 +11:00
# include "protocol/protocol_api.h"
2009-09-29 13:20:18 +10:00
# define TIMELIMIT() timeval_current_ofs(10, 0)
2009-07-23 16:03:39 +10:00
enum vacuum_child_status { VACUUM_RUNNING , VACUUM_OK , VACUUM_ERROR , VACUUM_TIMEOUT } ;
struct ctdb_vacuum_child_context {
struct ctdb_vacuum_handle * vacuum_handle ;
2010-07-21 12:29:55 +09:30
/* fd child writes status to */
2009-07-23 16:03:39 +10:00
int fd [ 2 ] ;
pid_t child_pid ;
enum vacuum_child_status status ;
struct timeval start_time ;
2019-10-15 16:36:44 +11:00
bool scheduled ;
2009-07-23 16:03:39 +10:00
} ;
struct ctdb_vacuum_handle {
struct ctdb_db_context * ctdb_db ;
2010-12-20 17:44:02 +01:00
uint32_t fast_path_count ;
2020-03-27 14:38:09 +11:00
uint32_t vacuum_interval ;
2009-07-23 16:03:39 +10:00
} ;
2009-07-28 17:45:31 +03:00
2009-09-29 13:20:18 +10:00
/* a list of records to possibly delete */
struct vacuum_data {
struct ctdb_context * ctdb ;
struct ctdb_db_context * ctdb_db ;
2009-07-28 17:45:31 +03:00
struct tdb_context * dest_db ;
2011-12-16 10:53:26 +01:00
trbt_tree_t * delete_list ;
2011-12-16 10:49:41 +01:00
struct ctdb_marshall_buffer * * vacuum_fetch_list ;
2009-09-29 13:20:18 +10:00
struct timeval start ;
bool traverse_error ;
bool vacuum ;
2014-02-20 00:32:08 +01:00
struct {
struct {
uint32_t added_to_vacuum_fetch_list ;
uint32_t added_to_delete_list ;
uint32_t deleted ;
uint32_t skipped ;
uint32_t error ;
uint32_t total ;
} delete_queue ;
struct {
uint32_t scheduled ;
uint32_t skipped ;
uint32_t error ;
uint32_t total ;
} db_traverse ;
struct {
uint32_t total ;
uint32_t remote_error ;
uint32_t local_error ;
uint32_t deleted ;
uint32_t skipped ;
uint32_t left ;
} delete_list ;
struct {
uint32_t vacuumed ;
uint32_t copied ;
} repack ;
} count ;
2009-09-29 13:20:18 +10:00
} ;
/* this structure contains the information for one record to be deleted */
struct delete_record_data {
struct ctdb_context * ctdb ;
struct ctdb_db_context * ctdb_db ;
struct ctdb_ltdb_header hdr ;
2018-02-14 15:18:17 +11:00
uint32_t remote_fail_count ;
2009-09-29 13:20:18 +10:00
TDB_DATA key ;
2012-11-22 15:27:51 +01:00
uint8_t keydata [ 1 ] ;
2009-07-28 17:45:31 +03:00
} ;
2009-09-29 13:20:18 +10:00
struct delete_records_list {
struct ctdb_marshall_buffer * records ;
2013-01-05 01:20:18 +01:00
struct vacuum_data * vdata ;
2009-09-29 13:20:18 +10:00
} ;
2018-02-16 15:30:13 +11:00
struct fetch_record_data {
TDB_DATA key ;
uint8_t keydata [ 1 ] ;
} ;
2014-02-14 18:38:31 +01:00
static int insert_record_into_delete_queue ( struct ctdb_db_context * ctdb_db ,
const struct ctdb_ltdb_header * hdr ,
TDB_DATA key ) ;
2010-12-21 14:19:00 +01:00
/**
* Store key and header in a tree , indexed by the key hash .
*/
static int insert_delete_record_data_into_tree ( struct ctdb_context * ctdb ,
struct ctdb_db_context * ctdb_db ,
trbt_tree_t * tree ,
const struct ctdb_ltdb_header * hdr ,
TDB_DATA key )
2010-12-20 17:11:27 +01:00
{
2010-12-21 11:22:50 +01:00
struct delete_record_data * dd ;
2010-12-21 14:19:00 +01:00
uint32_t hash ;
2012-11-22 15:27:51 +01:00
size_t len ;
2010-12-20 17:11:27 +01:00
2012-11-22 15:27:51 +01:00
len = offsetof ( struct delete_record_data , keydata ) + key . dsize ;
dd = ( struct delete_record_data * ) talloc_size ( tree , len ) ;
2010-12-21 11:22:50 +01:00
if ( dd = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
return - 1 ;
}
2012-11-22 15:27:51 +01:00
talloc_set_name_const ( dd , " struct delete_record_data " ) ;
2010-12-21 14:19:00 +01:00
2010-12-21 11:22:50 +01:00
dd - > ctdb = ctdb ;
dd - > ctdb_db = ctdb_db ;
dd - > key . dsize = key . dsize ;
2012-11-22 15:27:51 +01:00
dd - > key . dptr = dd - > keydata ;
memcpy ( dd - > keydata , key . dptr , key . dsize ) ;
2010-12-21 11:22:50 +01:00
dd - > hdr = * hdr ;
2018-02-14 15:18:17 +11:00
dd - > remote_fail_count = 0 ;
2010-12-21 11:22:50 +01:00
2010-12-21 14:19:00 +01:00
hash = ctdb_hash ( & key ) ;
trbt_insert32 ( tree , hash , dd ) ;
return 0 ;
}
2011-12-16 10:53:26 +01:00
static int add_record_to_delete_list ( struct vacuum_data * vdata , TDB_DATA key ,
2010-12-21 14:19:00 +01:00
struct ctdb_ltdb_header * hdr )
{
struct ctdb_context * ctdb = vdata - > ctdb ;
struct ctdb_db_context * ctdb_db = vdata - > ctdb_db ;
uint32_t hash ;
int ret ;
hash = ctdb_hash ( & key ) ;
2011-12-16 10:53:26 +01:00
if ( trbt_lookup32 ( vdata - > delete_list , hash ) ) {
2014-02-13 16:44:04 +01:00
DEBUG ( DEBUG_INFO , ( __location__ " Hash collision when vacuuming, skipping this record. \n " ) ) ;
2010-12-21 14:19:00 +01:00
return 0 ;
}
ret = insert_delete_record_data_into_tree ( ctdb , ctdb_db ,
2011-12-16 10:53:26 +01:00
vdata - > delete_list ,
2010-12-21 14:19:00 +01:00
hdr , key ) ;
if ( ret ! = 0 ) {
return - 1 ;
}
2010-12-21 11:22:50 +01:00
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . total + + ;
2010-12-21 11:22:50 +01:00
2010-12-20 17:11:27 +01:00
return 0 ;
}
2010-12-20 16:31:27 +01:00
/**
* Add a record to the list of records to be sent
* to their lmaster with VACUUM_FETCH .
*/
static int add_record_to_vacuum_fetch_list ( struct vacuum_data * vdata ,
TDB_DATA key )
{
struct ctdb_context * ctdb = vdata - > ctdb ;
uint32_t lmaster ;
2011-12-16 10:43:06 +01:00
struct ctdb_marshall_buffer * vfl ;
2010-12-20 16:31:27 +01:00
lmaster = ctdb_lmaster ( ctdb , & key ) ;
2011-12-16 10:49:41 +01:00
vfl = vdata - > vacuum_fetch_list [ lmaster ] ;
2011-12-16 10:43:06 +01:00
2014-05-06 18:39:25 +10:00
vfl = ctdb_marshall_add ( ctdb , vfl , vfl - > db_id , ctdb - > pnn ,
key , NULL , tdb_null ) ;
if ( vfl = = NULL ) {
2010-12-20 16:31:27 +01:00
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
vdata - > traverse_error = true ;
return - 1 ;
}
2011-12-16 10:43:06 +01:00
vdata - > vacuum_fetch_list [ lmaster ] = vfl ;
2010-12-20 16:31:27 +01:00
return 0 ;
}
2015-10-26 16:50:09 +11:00
static void ctdb_vacuum_event ( struct tevent_context * ev ,
struct tevent_timer * te ,
2011-11-25 13:35:05 +01:00
struct timeval t , void * private_data ) ;
2009-09-29 13:20:18 +10:00
2014-02-14 15:28:22 +01:00
static int vacuum_record_parser ( TDB_DATA key , TDB_DATA data , void * private_data )
{
struct ctdb_ltdb_header * header =
( struct ctdb_ltdb_header * ) private_data ;
if ( data . dsize ! = sizeof ( struct ctdb_ltdb_header ) ) {
return - 1 ;
}
* header = * ( struct ctdb_ltdb_header * ) data . dptr ;
return 0 ;
}
2009-09-29 13:20:18 +10:00
2009-07-28 17:45:31 +03:00
/*
2009-09-29 13:20:18 +10:00
* traverse function for gathering the records that can be deleted
2009-07-28 17:45:31 +03:00
*/
2014-02-14 18:07:55 +01:00
static int vacuum_traverse ( struct tdb_context * tdb , TDB_DATA key , TDB_DATA data ,
void * private_data )
2009-07-28 17:45:31 +03:00
{
2014-02-14 18:07:55 +01:00
struct vacuum_data * vdata = talloc_get_type ( private_data ,
struct vacuum_data ) ;
2009-09-29 13:20:18 +10:00
struct ctdb_context * ctdb = vdata - > ctdb ;
2014-02-14 18:38:31 +01:00
struct ctdb_db_context * ctdb_db = vdata - > ctdb_db ;
2009-09-29 13:20:18 +10:00
uint32_t lmaster ;
struct ctdb_ltdb_header * hdr ;
2010-12-20 16:41:13 +01:00
int res = 0 ;
2010-12-20 16:31:27 +01:00
2014-02-20 00:32:08 +01:00
vdata - > count . db_traverse . total + + ;
2010-12-23 00:27:27 +01:00
2009-09-29 13:20:18 +10:00
lmaster = ctdb_lmaster ( ctdb , & key ) ;
2011-02-03 12:15:41 +01:00
if ( lmaster > = ctdb - > num_nodes ) {
2014-02-20 00:32:08 +01:00
vdata - > count . db_traverse . error + + ;
2011-02-03 12:15:41 +01:00
DEBUG ( DEBUG_CRIT , ( __location__
" lmaster[%u] >= ctdb->num_nodes[%u] for key "
" with hash[%u]! \n " ,
( unsigned ) lmaster ,
( unsigned ) ctdb - > num_nodes ,
( unsigned ) ctdb_hash ( & key ) ) ) ;
return - 1 ;
2009-09-29 13:20:18 +10:00
}
if ( data . dsize ! = sizeof ( struct ctdb_ltdb_header ) ) {
2011-11-25 13:34:15 +01:00
/* it is not a deleted record */
2014-02-20 00:32:08 +01:00
vdata - > count . db_traverse . skipped + + ;
2009-09-29 13:20:18 +10:00
return 0 ;
}
hdr = ( struct ctdb_ltdb_header * ) data . dptr ;
if ( hdr - > dmaster ! = ctdb - > pnn ) {
2014-02-20 00:32:08 +01:00
vdata - > count . db_traverse . skipped + + ;
2009-09-29 13:20:18 +10:00
return 0 ;
}
2014-02-14 18:38:31 +01:00
/*
* Add the record to this process ' s delete_queue for processing
* in the subsequent traverse in the fast vacuum run .
*/
res = insert_record_into_delete_queue ( ctdb_db , hdr , key ) ;
if ( res ! = 0 ) {
2014-02-20 00:32:08 +01:00
vdata - > count . db_traverse . error + + ;
2010-12-20 16:41:13 +01:00
} else {
2014-02-20 00:32:08 +01:00
vdata - > count . db_traverse . scheduled + + ;
2009-09-29 13:20:18 +10:00
}
2014-02-14 18:38:31 +01:00
return 0 ;
2009-07-28 17:45:31 +03:00
}
/*
2009-09-29 13:20:18 +10:00
* traverse the tree of records to delete and marshall them into
* a blob
2009-07-28 17:45:31 +03:00
*/
2011-12-13 15:59:38 +01:00
static int delete_marshall_traverse ( void * param , void * data )
2009-07-28 17:45:31 +03:00
{
2009-09-29 13:20:18 +10:00
struct delete_record_data * dd = talloc_get_type ( data , struct delete_record_data ) ;
struct delete_records_list * recs = talloc_get_type ( param , struct delete_records_list ) ;
2014-05-06 18:39:25 +10:00
struct ctdb_marshall_buffer * m ;
2009-09-29 13:20:18 +10:00
2014-05-06 18:39:25 +10:00
m = ctdb_marshall_add ( recs , recs - > records , recs - > records - > db_id ,
recs - > records - > db_id ,
dd - > key , & dd - > hdr , tdb_null ) ;
if ( m = = NULL ) {
2009-09-29 13:20:18 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " failed to marshall record \n " ) ) ;
2014-07-10 18:38:13 +10:00
return - 1 ;
2009-09-29 13:20:18 +10:00
}
2014-05-06 18:39:25 +10:00
recs - > records = m ;
2011-11-02 13:33:28 +11:00
return 0 ;
2009-09-29 13:20:18 +10:00
}
2018-02-16 17:00:40 +11:00
struct fetch_queue_state {
struct ctdb_db_context * ctdb_db ;
int count ;
} ;
struct fetch_record_migrate_state {
struct fetch_queue_state * fetch_queue ;
TDB_DATA key ;
} ;
static void fetch_record_migrate_callback ( struct ctdb_client_call_state * state )
{
struct fetch_record_migrate_state * fetch = talloc_get_type_abort (
state - > async . private_data , struct fetch_record_migrate_state ) ;
struct fetch_queue_state * fetch_queue = fetch - > fetch_queue ;
struct ctdb_ltdb_header hdr ;
struct ctdb_call call = { 0 } ;
int ret ;
ret = ctdb_call_recv ( state , & call ) ;
fetch_queue - > count - - ;
if ( ret ! = 0 ) {
D_ERR ( " Failed to migrate record for vacuuming \n " ) ;
goto done ;
}
ret = tdb_chainlock_nonblock ( fetch_queue - > ctdb_db - > ltdb - > tdb ,
fetch - > key ) ;
if ( ret ! = 0 ) {
goto done ;
}
ret = tdb_parse_record ( fetch_queue - > ctdb_db - > ltdb - > tdb ,
fetch - > key ,
vacuum_record_parser ,
& hdr ) ;
tdb_chainunlock ( fetch_queue - > ctdb_db - > ltdb - > tdb , fetch - > key ) ;
if ( ret ! = 0 ) {
goto done ;
}
D_INFO ( " Vacuum Fetch record, key=%.*s \n " ,
( int ) fetch - > key . dsize ,
fetch - > key . dptr ) ;
( void ) ctdb_local_schedule_for_deletion ( fetch_queue - > ctdb_db ,
& hdr ,
fetch - > key ) ;
done :
talloc_free ( fetch ) ;
}
static int fetch_record_parser ( TDB_DATA key , TDB_DATA data , void * private_data )
{
struct ctdb_ltdb_header * header =
( struct ctdb_ltdb_header * ) private_data ;
if ( data . dsize < sizeof ( struct ctdb_ltdb_header ) ) {
return - 1 ;
}
memcpy ( header , data . dptr , sizeof ( * header ) ) ;
return 0 ;
}
/**
* traverse function for the traversal of the fetch_queue .
*
* Send a record migration request .
*/
static int fetch_queue_traverse ( void * param , void * data )
{
struct fetch_record_data * rd = talloc_get_type_abort (
data , struct fetch_record_data ) ;
struct fetch_queue_state * fetch_queue =
( struct fetch_queue_state * ) param ;
struct ctdb_db_context * ctdb_db = fetch_queue - > ctdb_db ;
struct ctdb_client_call_state * state ;
struct fetch_record_migrate_state * fetch ;
struct ctdb_call call = { 0 } ;
struct ctdb_ltdb_header header ;
int ret ;
ret = tdb_chainlock_nonblock ( ctdb_db - > ltdb - > tdb , rd - > key ) ;
if ( ret ! = 0 ) {
return 0 ;
}
ret = tdb_parse_record ( ctdb_db - > ltdb - > tdb ,
rd - > key ,
fetch_record_parser ,
& header ) ;
tdb_chainunlock ( ctdb_db - > ltdb - > tdb , rd - > key ) ;
if ( ret ! = 0 ) {
goto skipped ;
}
if ( header . dmaster = = ctdb_db - > ctdb - > pnn ) {
/* If the record is already migrated, skip */
goto skipped ;
}
fetch = talloc_zero ( ctdb_db , struct fetch_record_migrate_state ) ;
if ( fetch = = NULL ) {
D_ERR ( " Failed to setup fetch record migrate state \n " ) ;
return 0 ;
}
fetch - > fetch_queue = fetch_queue ;
fetch - > key . dsize = rd - > key . dsize ;
fetch - > key . dptr = talloc_memdup ( fetch , rd - > key . dptr , rd - > key . dsize ) ;
if ( fetch - > key . dptr = = NULL ) {
D_ERR ( " Memory error in fetch_queue_traverse \n " ) ;
talloc_free ( fetch ) ;
return 0 ;
}
call . call_id = CTDB_NULL_FUNC ;
call . flags = CTDB_IMMEDIATE_MIGRATION |
CTDB_CALL_FLAG_VACUUM_MIGRATION ;
call . key = fetch - > key ;
state = ctdb_call_send ( ctdb_db , & call ) ;
if ( state = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to setup vacuum fetch call \n " ) ) ;
talloc_free ( fetch ) ;
return 0 ;
}
state - > async . fn = fetch_record_migrate_callback ;
state - > async . private_data = fetch ;
fetch_queue - > count + + ;
return 0 ;
skipped :
D_INFO ( " Skipped Fetch record, key=%.*s \n " ,
( int ) rd - > key . dsize ,
rd - > key . dptr ) ;
return 0 ;
}
/**
* Traverse the fetch .
* Records are migrated to the local node and
* added to delete queue for further processing .
*/
static void ctdb_process_fetch_queue ( struct ctdb_db_context * ctdb_db )
{
struct fetch_queue_state state ;
int ret ;
state . ctdb_db = ctdb_db ;
state . count = 0 ;
ret = trbt_traversearray32 ( ctdb_db - > fetch_queue , 1 ,
fetch_queue_traverse , & state ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Error traversing "
" the fetch queue. \n " ) ) ;
}
/* Wait for all migrations to complete */
while ( state . count > 0 ) {
tevent_loop_once ( ctdb_db - > ctdb - > ev ) ;
}
}
2010-12-20 17:24:32 +01:00
/**
* traverse function for the traversal of the delete_queue ,
* the fast - path vacuuming list .
*
* - If the record has been migrated off the node
* or has been revived ( filled with data ) on the node ,
* then skip the record .
*
* - If the current node is the record ' s lmaster and it is
* a record that has never been migrated with data , then
* delete the record from the local tdb .
*
* - If the current node is the record ' s lmaster and it has
* been migrated with data , then schedule it for the normal
* vacuuming procedure ( i . e . add it to the delete_list ) .
*
* - If the current node is NOT the record ' s lmaster then
* add it to the list of records that are to be sent to
* the lmaster with the VACUUM_FETCH message .
*/
2011-11-02 13:33:28 +11:00
static int delete_queue_traverse ( void * param , void * data )
2010-12-20 17:24:32 +01:00
{
struct delete_record_data * dd =
talloc_get_type ( data , struct delete_record_data ) ;
struct vacuum_data * vdata = talloc_get_type ( param , struct vacuum_data ) ;
struct ctdb_db_context * ctdb_db = dd - > ctdb_db ;
struct ctdb_context * ctdb = ctdb_db - > ctdb ; /* or dd->ctdb ??? */
int res ;
2014-02-14 15:28:22 +01:00
struct ctdb_ltdb_header header ;
2010-12-20 17:24:32 +01:00
uint32_t lmaster ;
2011-12-23 17:08:28 +01:00
uint32_t hash = ctdb_hash ( & ( dd - > key ) ) ;
2010-12-20 17:24:32 +01:00
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . total + + ;
2010-12-23 00:27:27 +01:00
2014-04-14 13:18:41 +10:00
res = tdb_chainlock_nonblock ( ctdb_db - > ltdb - > tdb , dd - > key ) ;
2010-12-20 17:24:32 +01:00
if ( res ! = 0 ) {
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . error + + ;
2011-11-02 13:33:28 +11:00
return 0 ;
2010-12-20 17:24:32 +01:00
}
2014-02-14 15:28:22 +01:00
res = tdb_parse_record ( ctdb_db - > ltdb - > tdb , dd - > key ,
vacuum_record_parser , & header ) ;
if ( res ! = 0 ) {
2010-12-23 00:27:27 +01:00
goto skipped ;
2010-12-20 17:24:32 +01:00
}
2014-02-14 15:28:22 +01:00
if ( header . dmaster ! = ctdb - > pnn ) {
2010-12-20 17:24:32 +01:00
/* The record has been migrated off the node. Skip. */
2010-12-23 00:27:27 +01:00
goto skipped ;
2010-12-20 17:24:32 +01:00
}
2014-02-14 15:28:22 +01:00
if ( header . rsn ! = dd - > hdr . rsn ) {
2010-12-20 17:24:32 +01:00
/*
* The record has been migrated off the node and back again .
* But not requeued for deletion . Skip it .
*/
2010-12-23 00:27:27 +01:00
goto skipped ;
2010-12-20 17:24:32 +01:00
}
/*
* We are dmaster , and the record has no data , and it has
* not been migrated after it has been queued for deletion .
*
* At this stage , the record could still have been revived locally
* and last been written with empty data . This can only be
* fixed with the addition of an active or delete flag . ( TODO )
*/
lmaster = ctdb_lmaster ( ctdb_db - > ctdb , & dd - > key ) ;
if ( lmaster ! = ctdb - > pnn ) {
res = add_record_to_vacuum_fetch_list ( vdata , dd - > key ) ;
if ( res ! = 0 ) {
DEBUG ( DEBUG_ERR ,
( __location__ " Error adding record to list "
" of records to send to lmaster. \n " ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . error + + ;
2010-12-23 00:27:27 +01:00
} else {
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . added_to_vacuum_fetch_list + + ;
2010-12-20 17:24:32 +01:00
}
goto done ;
}
/* use header->flags or dd->hdr.flags ?? */
if ( dd - > hdr . flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA ) {
2011-12-16 10:53:26 +01:00
res = add_record_to_delete_list ( vdata , dd - > key , & dd - > hdr ) ;
2010-12-20 17:24:32 +01:00
if ( res ! = 0 ) {
DEBUG ( DEBUG_ERR ,
( __location__ " Error adding record to list "
" of records for deletion on lmaster. \n " ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . error + + ;
2010-12-23 00:27:27 +01:00
} else {
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . added_to_delete_list + + ;
2010-12-20 17:24:32 +01:00
}
} else {
res = tdb_delete ( ctdb_db - > ltdb - > tdb , dd - > key ) ;
if ( res ! = 0 ) {
DEBUG ( DEBUG_ERR ,
2011-12-23 17:08:28 +01:00
( __location__ " Error deleting record with key "
" hash [0x%08x] from local data base db[%s]. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . error + + ;
2014-02-14 15:30:08 +01:00
goto done ;
2010-12-20 17:24:32 +01:00
}
2014-02-14 15:30:08 +01:00
DEBUG ( DEBUG_DEBUG ,
( __location__ " Deleted record with key hash "
" [0x%08x] from local data base db[%s]. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . deleted + + ;
2010-12-20 17:24:32 +01:00
}
2010-12-23 00:27:27 +01:00
goto done ;
skipped :
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . skipped + + ;
2010-12-23 00:27:27 +01:00
2010-12-20 17:24:32 +01:00
done :
tdb_chainunlock ( ctdb_db - > ltdb - > tdb , dd - > key ) ;
2011-11-02 13:33:28 +11:00
return 0 ;
2010-12-20 17:24:32 +01:00
}
2011-12-13 17:32:45 +01:00
/**
* Delete the records that we are lmaster and dmaster for and
* that could be deleted on all other nodes via the TRY_DELETE_RECORDS
* control .
*/
static int delete_record_traverse ( void * param , void * data )
{
struct delete_record_data * dd =
talloc_get_type ( data , struct delete_record_data ) ;
struct vacuum_data * vdata = talloc_get_type ( param , struct vacuum_data ) ;
struct ctdb_db_context * ctdb_db = dd - > ctdb_db ;
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
int res ;
2014-02-14 21:50:59 +01:00
struct ctdb_ltdb_header header ;
2011-12-13 17:32:45 +01:00
uint32_t lmaster ;
2011-12-23 17:22:46 +01:00
uint32_t hash = ctdb_hash ( & ( dd - > key ) ) ;
2011-12-13 17:32:45 +01:00
2018-02-14 15:18:17 +11:00
if ( dd - > remote_fail_count > 0 ) {
vdata - > count . delete_list . remote_error + + ;
vdata - > count . delete_list . left - - ;
talloc_free ( dd ) ;
return 0 ;
}
2011-12-13 17:32:45 +01:00
res = tdb_chainlock ( ctdb_db - > ltdb - > tdb , dd - > key ) ;
if ( res ! = 0 ) {
2011-12-23 17:22:46 +01:00
DEBUG ( DEBUG_ERR ,
( __location__ " Error getting chainlock on record with "
" key hash [0x%08x] on database db[%s]. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . local_error + + ;
vdata - > count . delete_list . left - - ;
2014-02-14 01:55:39 +01:00
talloc_free ( dd ) ;
2011-12-13 17:32:45 +01:00
return 0 ;
}
/*
* Verify that the record is still empty , its RSN has not
* changed and that we are still its lmaster and dmaster .
*/
2014-02-14 21:50:59 +01:00
res = tdb_parse_record ( ctdb_db - > ltdb - > tdb , dd - > key ,
vacuum_record_parser , & header ) ;
if ( res ! = 0 ) {
2014-02-14 01:55:39 +01:00
goto skip ;
2011-12-13 17:32:45 +01:00
}
2014-02-14 21:50:59 +01:00
if ( header . flags & CTDB_REC_RO_FLAGS ) {
2013-04-03 14:12:27 +02:00
DEBUG ( DEBUG_INFO , ( __location__ " : record with hash [0x%08x] "
" on database db[%s] has read-only flags. "
" skipping. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-14 01:55:39 +01:00
goto skip ;
2012-02-29 16:09:24 +11:00
}
2014-02-14 21:50:59 +01:00
if ( header . dmaster ! = ctdb - > pnn ) {
2013-04-03 14:12:27 +02:00
DEBUG ( DEBUG_INFO , ( __location__ " : record with hash [0x%08x] "
" on database db[%s] has been migrated away. "
" skipping. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-14 01:55:39 +01:00
goto skip ;
2011-12-13 17:32:45 +01:00
}
2018-02-14 14:50:40 +11:00
if ( header . rsn ! = dd - > hdr . rsn ) {
2011-12-13 17:32:45 +01:00
/*
* The record has been migrated off the node and back again .
* But not requeued for deletion . Skip it .
*/
2013-04-03 14:12:27 +02:00
DEBUG ( DEBUG_INFO , ( __location__ " : record with hash [0x%08x] "
" on database db[%s] seems to have been "
" migrated away and back again (with empty "
" data). skipping. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-14 01:55:39 +01:00
goto skip ;
2011-12-13 17:32:45 +01:00
}
lmaster = ctdb_lmaster ( ctdb_db - > ctdb , & dd - > key ) ;
if ( lmaster ! = ctdb - > pnn ) {
2013-04-03 14:12:27 +02:00
DEBUG ( DEBUG_INFO , ( __location__ " : not lmaster for record in "
" delete list (key hash [0x%08x], db[%s]). "
" Strange! skipping. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-14 01:55:39 +01:00
goto skip ;
2011-12-13 17:32:45 +01:00
}
res = tdb_delete ( ctdb_db - > ltdb - > tdb , dd - > key ) ;
if ( res ! = 0 ) {
DEBUG ( DEBUG_ERR ,
2011-12-23 17:22:46 +01:00
( __location__ " Error deleting record with key hash "
" [0x%08x] from local data base db[%s]. \n " ,
hash , ctdb_db - > db_name ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . local_error + + ;
2011-12-13 17:32:45 +01:00
goto done ;
}
2011-12-23 17:22:46 +01:00
DEBUG ( DEBUG_DEBUG ,
( __location__ " Deleted record with key hash [0x%08x] from "
" local data base db[%s]. \n " , hash , ctdb_db - > db_name ) ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . deleted + + ;
2014-02-14 01:55:39 +01:00
goto done ;
skip :
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . skipped + + ;
2014-02-14 01:55:39 +01:00
2011-12-13 17:32:45 +01:00
done :
tdb_chainunlock ( ctdb_db - > ltdb - > tdb , dd - > key ) ;
2014-02-14 01:55:39 +01:00
talloc_free ( dd ) ;
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . left - - ;
2011-12-13 17:32:45 +01:00
return 0 ;
}
2011-12-16 10:59:26 +01:00
/**
* Traverse the delete_queue .
2014-02-14 18:48:02 +01:00
* Records are either deleted directly or filled
* into the delete list or the vacuum fetch lists
* for further processing .
2011-12-16 10:59:26 +01:00
*/
2014-02-14 18:47:25 +01:00
static void ctdb_process_delete_queue ( struct ctdb_db_context * ctdb_db ,
struct vacuum_data * vdata )
2011-12-16 10:59:26 +01:00
{
2014-02-14 17:58:01 +01:00
uint32_t sum ;
2014-02-15 17:59:22 +01:00
int ret ;
ret = trbt_traversearray32 ( ctdb_db - > delete_queue , 1 ,
delete_queue_traverse , vdata ) ;
2014-02-14 17:58:01 +01:00
2014-02-15 17:59:22 +01:00
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Error traversing "
" the delete queue. \n " ) ) ;
}
2011-12-16 10:59:26 +01:00
2014-02-20 00:32:08 +01:00
sum = vdata - > count . delete_queue . deleted
+ vdata - > count . delete_queue . skipped
+ vdata - > count . delete_queue . error
+ vdata - > count . delete_queue . added_to_delete_list
+ vdata - > count . delete_queue . added_to_vacuum_fetch_list ;
2014-02-14 17:58:01 +01:00
2014-02-20 00:32:08 +01:00
if ( vdata - > count . delete_queue . total ! = sum ) {
2014-02-14 17:58:01 +01:00
DEBUG ( DEBUG_ERR , ( __location__ " Inconsistency in fast vacuum "
" counts for db[%s]: total[%u] != sum[%u] \n " ,
2014-02-20 00:32:08 +01:00
ctdb_db - > db_name ,
( unsigned ) vdata - > count . delete_queue . total ,
2014-02-14 17:58:01 +01:00
( unsigned ) sum ) ) ;
}
2014-02-20 00:32:08 +01:00
if ( vdata - > count . delete_queue . total > 0 ) {
2011-12-16 10:59:26 +01:00
DEBUG ( DEBUG_INFO ,
( __location__
" fast vacuuming delete_queue traverse statistics: "
" db[%s] "
" total[%u] "
" del[%u] "
" skp[%u] "
" err[%u] "
" adl[%u] "
" avf[%u] \n " ,
ctdb_db - > db_name ,
2014-02-20 00:32:08 +01:00
( unsigned ) vdata - > count . delete_queue . total ,
( unsigned ) vdata - > count . delete_queue . deleted ,
( unsigned ) vdata - > count . delete_queue . skipped ,
( unsigned ) vdata - > count . delete_queue . error ,
( unsigned ) vdata - > count . delete_queue . added_to_delete_list ,
( unsigned ) vdata - > count . delete_queue . added_to_vacuum_fetch_list ) ) ;
2011-12-16 10:59:26 +01:00
}
return ;
}
2011-12-16 11:04:13 +01:00
/**
* read - only traverse of the database , looking for records that
* might be able to be vacuumed .
*
* This is not done each time but only every tunable
* VacuumFastPathCount times .
*/
2014-02-16 00:35:34 +01:00
static void ctdb_vacuum_traverse_db ( struct ctdb_db_context * ctdb_db ,
struct vacuum_data * vdata )
2011-12-16 11:04:13 +01:00
{
int ret ;
ret = tdb_traverse_read ( ctdb_db - > ltdb - > tdb , vacuum_traverse , vdata ) ;
if ( ret = = - 1 | | vdata - > traverse_error ) {
DEBUG ( DEBUG_ERR , ( __location__ " Traverse error in vacuuming "
" '%s' \n " , ctdb_db - > db_name ) ) ;
2014-04-22 22:09:35 +02:00
return ;
2011-12-16 11:04:13 +01:00
}
2014-02-20 00:32:08 +01:00
if ( vdata - > count . db_traverse . total > 0 ) {
2011-12-16 11:04:13 +01:00
DEBUG ( DEBUG_INFO ,
( __location__
" full vacuuming db traverse statistics: "
" db[%s] "
" total[%u] "
" skp[%u] "
" err[%u] "
2014-02-14 18:38:31 +01:00
" sched[%u] \n " ,
2011-12-16 11:04:13 +01:00
ctdb_db - > db_name ,
2014-02-20 00:32:08 +01:00
( unsigned ) vdata - > count . db_traverse . total ,
( unsigned ) vdata - > count . db_traverse . skipped ,
( unsigned ) vdata - > count . db_traverse . error ,
( unsigned ) vdata - > count . db_traverse . scheduled ) ) ;
2011-12-16 11:04:13 +01:00
}
2014-02-16 00:35:34 +01:00
return ;
2011-12-16 11:04:13 +01:00
}
2011-12-16 17:00:07 +01:00
/**
* Process the vacuum fetch lists :
* For records for which we are not the lmaster , tell the lmaster to
* fetch the record .
*/
2014-02-16 00:37:43 +01:00
static void ctdb_process_vacuum_fetch_lists ( struct ctdb_db_context * ctdb_db ,
struct vacuum_data * vdata )
2011-12-16 17:00:07 +01:00
{
2019-06-08 06:38:56 +10:00
unsigned int i ;
2011-12-16 17:00:07 +01:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2018-02-16 17:01:21 +11:00
int ret , res ;
2011-12-16 17:00:07 +01:00
for ( i = 0 ; i < ctdb - > num_nodes ; i + + ) {
TDB_DATA data ;
struct ctdb_marshall_buffer * vfl = vdata - > vacuum_fetch_list [ i ] ;
if ( ctdb - > nodes [ i ] - > pnn = = ctdb - > pnn ) {
continue ;
}
if ( vfl - > count = = 0 ) {
continue ;
}
DEBUG ( DEBUG_INFO , ( " Found %u records for lmaster %u in '%s' \n " ,
vfl - > count , ctdb - > nodes [ i ] - > pnn ,
ctdb_db - > db_name ) ) ;
2014-05-06 18:52:54 +10:00
data = ctdb_marshall_finish ( vfl ) ;
2018-02-16 17:01:21 +11:00
ret = ctdb_control ( ctdb , ctdb - > nodes [ i ] - > pnn , 0 ,
CTDB_CONTROL_VACUUM_FETCH , 0 ,
data , NULL , NULL , & res , NULL , NULL ) ;
if ( ret ! = 0 | | res ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to send vacuum "
" fetch control to node %u \n " ,
2011-12-16 17:00:07 +01:00
ctdb - > nodes [ i ] - > pnn ) ) ;
}
}
}
2011-11-25 13:55:20 +01:00
/**
2012-11-16 14:33:41 +01:00
* Process the delete list :
2012-12-29 17:23:27 +01:00
*
* This is the last step of vacuuming that consistently deletes
* those records that have been migrated with data and can hence
* not be deleted when leaving a node .
*
* In this step , the lmaster does the final deletion of those empty
2023-03-22 09:36:23 +01:00
* records that it is also dmaster for . It has usually received
2012-12-29 17:23:27 +01:00
* at least some of these records previously from the former dmasters
* with the vacuum fetch message .
*
2018-02-14 14:50:40 +11:00
* 1 ) Send the records to all active nodes with the TRY_DELETE_RECORDS
2012-12-29 17:23:27 +01:00
* control . The remote notes delete their local copy .
2018-02-14 14:50:40 +11:00
* 2 ) The lmaster locally deletes its copies of all records that
2012-12-29 17:23:27 +01:00
* could successfully be deleted remotely in step # 2.
2009-09-29 13:20:18 +10:00
*/
2014-02-16 01:08:18 +01:00
static void ctdb_process_delete_list ( struct ctdb_db_context * ctdb_db ,
struct vacuum_data * vdata )
2009-09-29 13:20:18 +10:00
{
2011-12-20 15:50:51 +01:00
int ret , i ;
2009-09-29 13:20:18 +10:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2012-12-17 13:03:42 +01:00
struct delete_records_list * recs ;
2012-12-17 17:26:22 +01:00
TDB_DATA indata ;
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap ;
2012-12-17 13:03:42 +01:00
uint32_t * active_nodes ;
int num_active_nodes ;
2012-12-29 17:16:33 +01:00
TALLOC_CTX * tmp_ctx ;
2014-02-14 22:02:41 +01:00
uint32_t sum ;
2012-12-17 13:03:42 +01:00
2014-02-20 00:32:08 +01:00
if ( vdata - > count . delete_list . total = = 0 ) {
2014-02-16 01:08:18 +01:00
return ;
2012-12-17 13:03:42 +01:00
}
2009-09-29 13:20:18 +10:00
2012-12-29 17:16:33 +01:00
tmp_ctx = talloc_new ( vdata ) ;
if ( tmp_ctx = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
2014-02-16 01:08:18 +01:00
return ;
2012-12-29 17:16:33 +01:00
}
2014-02-20 00:32:08 +01:00
vdata - > count . delete_list . left = vdata - > count . delete_list . total ;
2011-12-22 15:46:49 +01:00
2012-12-29 18:32:39 +01:00
/*
2013-01-05 01:20:18 +01:00
* get the list of currently active nodes
2012-12-29 18:32:39 +01:00
*/
ret = ctdb_ctrl_getnodemap ( ctdb , TIMELIMIT ( ) ,
CTDB_CURRENT_NODE ,
tmp_ctx ,
& nodemap ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " unable to get node map \n " ) ) ;
goto done ;
}
active_nodes = list_of_active_nodes ( ctdb , nodemap ,
nodemap , /* talloc context */
false /* include self */ ) ;
/* yuck! ;-) */
num_active_nodes = talloc_get_size ( active_nodes ) / sizeof ( * active_nodes ) ;
2013-01-05 01:20:18 +01:00
/*
2018-02-14 14:50:40 +11:00
* Now delete the records all active nodes in a two - phase process :
* 1 ) tell all active remote nodes to delete all their copy
* 2 ) if all remote nodes deleted their record copy , delete it locally
2013-01-05 01:20:18 +01:00
*/
2012-12-29 17:16:33 +01:00
recs = talloc_zero ( tmp_ctx , struct delete_records_list ) ;
2012-12-17 13:03:42 +01:00
if ( recs = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
2012-12-29 17:16:33 +01:00
goto done ;
2012-12-17 13:03:42 +01:00
}
2009-09-29 13:20:18 +10:00
2012-12-17 13:03:42 +01:00
/*
2018-02-14 14:50:40 +11:00
* Step 1 :
* Send all records to all active nodes for deletion .
2013-01-05 01:20:18 +01:00
*/
/*
* Create a marshall blob from the remaining list of records to delete .
2012-12-17 13:03:42 +01:00
*/
2013-01-05 01:20:18 +01:00
recs - > records = ( struct ctdb_marshall_buffer * )
talloc_zero_size ( recs ,
offsetof ( struct ctdb_marshall_buffer , data ) ) ;
if ( recs - > records = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
goto done ;
}
recs - > records - > db_id = ctdb_db - > db_id ;
2014-02-15 18:06:09 +01:00
ret = trbt_traversearray32 ( vdata - > delete_list , 1 ,
delete_marshall_traverse , recs ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Error traversing the "
" delete list for second marshalling. \n " ) ) ;
2014-07-10 18:38:13 +10:00
goto done ;
2014-02-15 18:06:09 +01:00
}
2011-02-03 12:26:45 +01:00
2014-05-06 18:52:54 +10:00
indata = ctdb_marshall_finish ( recs - > records ) ;
2011-12-16 23:16:27 +01:00
2012-12-17 13:03:42 +01:00
for ( i = 0 ; i < num_active_nodes ; i + + ) {
struct ctdb_marshall_buffer * records ;
2015-10-29 17:30:30 +11:00
struct ctdb_rec_data_old * rec ;
2012-12-17 13:07:21 +01:00
int32_t res ;
2012-12-17 17:26:22 +01:00
TDB_DATA outdata ;
2012-12-17 13:03:42 +01:00
ret = ctdb_control ( ctdb , active_nodes [ i ] , 0 ,
CTDB_CONTROL_TRY_DELETE_RECORDS , 0 ,
indata , recs , & outdata , & res ,
NULL , NULL ) ;
if ( ret ! = 0 | | res ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to delete records on "
" node %u: ret[%d] res[%d] \n " ,
active_nodes [ i ] , ret , res ) ) ;
2012-12-17 17:31:55 +01:00
goto done ;
2011-02-03 12:26:45 +01:00
}
2012-12-17 13:03:42 +01:00
/*
* outdata contains the list of records coming back
* from the node : These are the records that the
2013-01-05 01:20:18 +01:00
* remote node could not delete . We remove these from
* the list to delete locally .
2012-12-17 13:03:42 +01:00
*/
records = ( struct ctdb_marshall_buffer * ) outdata . dptr ;
2015-10-29 17:30:30 +11:00
rec = ( struct ctdb_rec_data_old * ) & records - > data [ 0 ] ;
2019-09-30 16:34:35 +10:00
while ( records - > count - - > 0 ) {
2012-12-17 13:03:42 +01:00
TDB_DATA reckey , recdata ;
struct ctdb_ltdb_header * rechdr ;
struct delete_record_data * dd ;
reckey . dptr = & rec - > data [ 0 ] ;
reckey . dsize = rec - > keylen ;
recdata . dptr = & rec - > data [ reckey . dsize ] ;
recdata . dsize = rec - > datalen ;
if ( recdata . dsize < sizeof ( struct ctdb_ltdb_header ) ) {
DEBUG ( DEBUG_CRIT , ( __location__ " bad ltdb record \n " ) ) ;
2012-12-17 17:31:55 +01:00
goto done ;
2011-12-13 17:30:39 +01:00
}
2012-12-17 13:03:42 +01:00
rechdr = ( struct ctdb_ltdb_header * ) recdata . dptr ;
recdata . dptr + = sizeof ( * rechdr ) ;
recdata . dsize - = sizeof ( * rechdr ) ;
dd = ( struct delete_record_data * ) trbt_lookup32 (
vdata - > delete_list ,
ctdb_hash ( & reckey ) ) ;
if ( dd ! = NULL ) {
/*
2018-02-14 15:18:17 +11:00
* The remote node could not delete the
* record . Since other remote nodes can
* also fail , we just mark the record .
2012-12-17 13:03:42 +01:00
*/
2018-02-14 15:18:17 +11:00
dd - > remote_fail_count + + ;
2014-02-20 00:58:17 +01:00
} else {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to "
" find record with hash 0x%08x coming "
" back from TRY_DELETE_RECORDS "
" control in delete list. \n " ,
ctdb_hash ( & reckey ) ) ) ;
2011-12-20 15:50:51 +01:00
}
2015-10-29 17:30:30 +11:00
rec = ( struct ctdb_rec_data_old * ) ( rec - > length + ( uint8_t * ) rec ) ;
2012-12-17 13:03:42 +01:00
}
2011-12-13 17:32:45 +01:00
}
2011-02-03 12:26:45 +01:00
2013-01-05 01:20:18 +01:00
/*
2018-02-14 14:50:40 +11:00
* Step 2 :
2013-01-05 01:20:18 +01:00
* Delete the remaining records locally .
*
* These records have successfully been deleted on all
* active remote nodes .
*/
2014-02-15 18:06:09 +01:00
ret = trbt_traversearray32 ( vdata - > delete_list , 1 ,
delete_record_traverse , vdata ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Error traversing the "
" delete list for deletion. \n " ) ) ;
}
2013-01-05 01:20:18 +01:00
2014-02-20 00:32:08 +01:00
if ( vdata - > count . delete_list . left ! = 0 ) {
2023-03-22 09:36:23 +01:00
DEBUG ( DEBUG_ERR , ( __location__ " Vacuum db[%s] error: "
2014-02-14 22:01:38 +01:00
" there are %u records left for deletion after "
" processing delete list \n " ,
ctdb_db - > db_name ,
2014-02-20 00:32:08 +01:00
( unsigned ) vdata - > count . delete_list . left ) ) ;
2014-02-14 22:01:38 +01:00
}
2014-02-20 00:32:08 +01:00
sum = vdata - > count . delete_list . deleted
+ vdata - > count . delete_list . skipped
+ vdata - > count . delete_list . remote_error
+ vdata - > count . delete_list . local_error
+ vdata - > count . delete_list . left ;
2014-02-14 22:02:41 +01:00
2014-02-20 00:32:08 +01:00
if ( vdata - > count . delete_list . total ! = sum ) {
2014-02-14 22:02:41 +01:00
DEBUG ( DEBUG_ERR , ( __location__ " Inconsistency in vacuum "
" delete list counts for db[%s]: total[%u] != sum[%u] \n " ,
2014-02-20 00:32:08 +01:00
ctdb_db - > db_name ,
( unsigned ) vdata - > count . delete_list . total ,
2014-02-14 22:02:41 +01:00
( unsigned ) sum ) ) ;
}
2014-02-20 00:32:08 +01:00
if ( vdata - > count . delete_list . total > 0 ) {
2011-12-22 15:46:49 +01:00
DEBUG ( DEBUG_INFO ,
( __location__
" vacuum delete list statistics: "
" db[%s] "
2014-02-14 00:53:23 +01:00
" total[%u] "
" del[%u] "
" skip[%u] "
2011-12-22 15:46:49 +01:00
" rem.err[%u] "
" loc.err[%u] "
" left[%u] \n " ,
ctdb_db - > db_name ,
2014-02-20 00:32:08 +01:00
( unsigned ) vdata - > count . delete_list . total ,
( unsigned ) vdata - > count . delete_list . deleted ,
( unsigned ) vdata - > count . delete_list . skipped ,
( unsigned ) vdata - > count . delete_list . remote_error ,
( unsigned ) vdata - > count . delete_list . local_error ,
( unsigned ) vdata - > count . delete_list . left ) ) ;
2011-12-22 15:46:49 +01:00
}
2012-12-17 17:31:55 +01:00
done :
2012-12-29 17:16:33 +01:00
talloc_free ( tmp_ctx ) ;
2012-12-17 17:31:55 +01:00
2014-02-16 01:08:18 +01:00
return ;
2011-12-20 15:50:51 +01:00
}
2011-12-16 23:16:27 +01:00
2011-12-20 15:50:51 +01:00
/**
* initialize the vacuum_data
*/
2014-04-19 03:34:05 +02:00
static struct vacuum_data * ctdb_vacuum_init_vacuum_data (
struct ctdb_db_context * ctdb_db ,
TALLOC_CTX * mem_ctx )
2011-12-20 15:50:51 +01:00
{
2019-06-08 06:38:56 +10:00
unsigned int i ;
2011-12-20 15:50:51 +01:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2014-04-19 03:34:05 +02:00
struct vacuum_data * vdata ;
vdata = talloc_zero ( mem_ctx , struct vacuum_data ) ;
if ( vdata = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
return NULL ;
}
vdata - > ctdb = ctdb_db - > ctdb ;
vdata - > ctdb_db = ctdb_db ;
vdata - > delete_list = trbt_create ( vdata , 0 ) ;
if ( vdata - > delete_list = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
goto fail ;
}
vdata - > start = timeval_current ( ) ;
2011-12-20 15:50:51 +01:00
2014-02-20 00:32:08 +01:00
vdata - > count . delete_queue . added_to_delete_list = 0 ;
vdata - > count . delete_queue . added_to_vacuum_fetch_list = 0 ;
vdata - > count . delete_queue . deleted = 0 ;
vdata - > count . delete_queue . skipped = 0 ;
vdata - > count . delete_queue . error = 0 ;
vdata - > count . delete_queue . total = 0 ;
vdata - > count . db_traverse . scheduled = 0 ;
vdata - > count . db_traverse . skipped = 0 ;
vdata - > count . db_traverse . error = 0 ;
vdata - > count . db_traverse . total = 0 ;
vdata - > count . delete_list . total = 0 ;
vdata - > count . delete_list . left = 0 ;
vdata - > count . delete_list . remote_error = 0 ;
vdata - > count . delete_list . local_error = 0 ;
vdata - > count . delete_list . skipped = 0 ;
vdata - > count . delete_list . deleted = 0 ;
2011-12-20 15:50:51 +01:00
/* the list needs to be of length num_nodes */
2011-12-23 00:14:18 +01:00
vdata - > vacuum_fetch_list = talloc_zero_array ( vdata ,
2011-12-20 15:50:51 +01:00
struct ctdb_marshall_buffer * ,
ctdb - > num_nodes ) ;
if ( vdata - > vacuum_fetch_list = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
2014-04-19 03:34:05 +02:00
goto fail ;
2011-12-20 15:50:51 +01:00
}
for ( i = 0 ; i < ctdb - > num_nodes ; i + + ) {
vdata - > vacuum_fetch_list [ i ] = ( struct ctdb_marshall_buffer * )
talloc_zero_size ( vdata - > vacuum_fetch_list ,
offsetof ( struct ctdb_marshall_buffer , data ) ) ;
if ( vdata - > vacuum_fetch_list [ i ] = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Out of memory \n " ) ) ;
2014-04-19 03:34:05 +02:00
talloc_free ( vdata ) ;
return NULL ;
2011-12-20 15:50:51 +01:00
}
vdata - > vacuum_fetch_list [ i ] - > db_id = ctdb_db - > db_id ;
}
2011-12-16 23:16:27 +01:00
2014-04-19 03:34:05 +02:00
return vdata ;
fail :
talloc_free ( vdata ) ;
return NULL ;
2011-12-16 23:15:51 +01:00
}
/**
* Vacuum a DB :
* - Always do the fast vacuuming run , which traverses
2018-02-16 17:00:40 +11:00
* - the in - memory fetch queue : these records have been
* scheduled for migration
* - the in - memory delete queue : these records have been
* scheduled for deletion .
2011-12-16 23:15:51 +01:00
* - Only if explicitly requested , the database is traversed
* in order to use the traditional heuristics on empty records
* to trigger deletion .
* This is done only every VacuumFastPathCount ' th vacuuming run .
*
* The traverse runs fill two lists :
*
* - The delete_list :
* This is the list of empty records the current
* node is lmaster and dmaster for . These records are later
* deleted first on other nodes and then locally .
*
* The fast vacuuming run has a short cut for those records
* that have never been migrated with data : these records
* are immediately deleted locally , since they have left
* no trace on other nodes .
*
* - The vacuum_fetch lists
* ( one for each other lmaster node ) :
* The records in this list are sent for deletion to
2018-02-16 17:01:21 +11:00
* their lmaster in a bulk VACUUM_FETCH control .
2011-12-16 23:15:51 +01:00
*
* The lmaster then migrates all these records to itelf
* so that they can be vacuumed there .
*
* This executes in the child context .
*/
static int ctdb_vacuum_db ( struct ctdb_db_context * ctdb_db ,
bool full_vacuum_run )
{
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2011-12-20 15:50:51 +01:00
int ret , pnn ;
2014-04-19 03:34:05 +02:00
struct vacuum_data * vdata ;
TALLOC_CTX * tmp_ctx ;
2011-12-16 23:15:51 +01:00
DEBUG ( DEBUG_INFO , ( __location__ " Entering %s vacuum run for db "
" %s db_id[0x%08x] \n " ,
full_vacuum_run ? " full " : " fast " ,
ctdb_db - > db_name , ctdb_db - > db_id ) ) ;
ret = ctdb_ctrl_getvnnmap ( ctdb , TIMELIMIT ( ) , CTDB_CURRENT_NODE , ctdb , & ctdb - > vnn_map ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Unable to get vnnmap from local node \n " ) ) ;
return ret ;
}
pnn = ctdb_ctrl_getpnn ( ctdb , TIMELIMIT ( ) , CTDB_CURRENT_NODE ) ;
if ( pnn = = - 1 ) {
DEBUG ( DEBUG_ERR , ( " Unable to get pnn from local node \n " ) ) ;
return - 1 ;
}
ctdb - > pnn = pnn ;
2014-04-19 03:34:05 +02:00
tmp_ctx = talloc_new ( ctdb_db ) ;
if ( tmp_ctx = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Out of memory! \n " ) ) ;
return - 1 ;
}
vdata = ctdb_vacuum_init_vacuum_data ( ctdb_db , tmp_ctx ) ;
if ( vdata = = NULL ) {
talloc_free ( tmp_ctx ) ;
return - 1 ;
2011-12-16 23:15:51 +01:00
}
2014-02-14 18:03:02 +01:00
if ( full_vacuum_run ) {
2014-02-16 00:35:34 +01:00
ctdb_vacuum_traverse_db ( ctdb_db , vdata ) ;
2011-12-16 23:15:51 +01:00
}
2018-02-16 17:00:40 +11:00
ctdb_process_fetch_queue ( ctdb_db ) ;
2014-02-14 18:47:25 +01:00
ctdb_process_delete_queue ( ctdb_db , vdata ) ;
2014-02-14 18:27:14 +01:00
2014-02-16 00:37:43 +01:00
ctdb_process_vacuum_fetch_lists ( ctdb_db , vdata ) ;
2011-12-16 23:15:51 +01:00
2014-02-16 01:08:18 +01:00
ctdb_process_delete_list ( ctdb_db , vdata ) ;
2011-12-16 23:15:51 +01:00
2014-04-19 03:34:05 +02:00
talloc_free ( tmp_ctx ) ;
2009-09-29 13:20:18 +10:00
return 0 ;
}
/*
2023-03-22 09:36:23 +01:00
* repack and vacuum a db
2009-09-29 13:20:18 +10:00
* called from the child context
*/
2010-12-20 10:55:53 +01:00
static int ctdb_vacuum_and_repack_db ( struct ctdb_db_context * ctdb_db ,
2010-12-20 18:03:38 +01:00
bool full_vacuum_run )
2009-07-28 17:45:31 +03:00
{
2009-07-28 23:09:28 +03:00
uint32_t repack_limit = ctdb_db - > ctdb - > tunable . repack_limit ;
2009-07-28 17:45:31 +03:00
const char * name = ctdb_db - > db_name ;
2014-02-12 17:40:31 +01:00
int freelist_size = 0 ;
2014-04-19 02:59:51 +02:00
int ret ;
2009-07-28 17:45:31 +03:00
2014-04-19 03:34:05 +02:00
if ( ctdb_vacuum_db ( ctdb_db , full_vacuum_run ) ! = 0 ) {
2009-09-29 13:20:18 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to vacuum '%s' \n " , name ) ) ;
}
2014-02-15 01:36:06 +01:00
freelist_size = tdb_freelist_size ( ctdb_db - > ltdb - > tdb ) ;
if ( freelist_size = = - 1 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to get freelist size for '%s' \n " , name ) ) ;
return - 1 ;
2014-02-10 02:44:56 +01:00
}
2009-09-29 13:20:18 +10:00
/*
* decide if a repack is necessary
*/
2014-02-14 22:05:21 +01:00
if ( ( repack_limit = = 0 | | ( uint32_t ) freelist_size < repack_limit ) )
2011-11-25 13:23:23 +01:00
{
2009-07-28 17:45:31 +03:00
return 0 ;
}
2019-10-02 17:51:12 +10:00
D_NOTICE ( " Repacking %s with %u freelist entries \n " ,
name ,
freelist_size ) ;
2009-07-28 17:45:31 +03:00
2014-04-19 02:59:51 +02:00
ret = tdb_repack ( ctdb_db - > ltdb - > tdb ) ;
if ( ret ! = 0 ) {
2009-07-28 17:45:31 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to repack '%s' \n " , name ) ) ;
return - 1 ;
}
return 0 ;
}
2011-12-13 14:21:04 +01:00
static uint32_t get_vacuum_interval ( struct ctdb_db_context * ctdb_db )
2009-09-29 13:20:18 +10:00
{
2011-12-13 14:21:04 +01:00
uint32_t interval = ctdb_db - > ctdb - > tunable . vacuum_interval ;
2009-09-29 13:20:18 +10:00
return interval ;
}
2009-07-23 16:03:39 +10:00
static int vacuum_child_destructor ( struct ctdb_vacuum_child_context * child_ctx )
{
double l = timeval_elapsed ( & child_ctx - > start_time ) ;
2020-04-02 14:18:33 +11:00
struct ctdb_vacuum_handle * vacuum_handle = child_ctx - > vacuum_handle ;
struct ctdb_db_context * ctdb_db = vacuum_handle - > ctdb_db ;
2009-07-23 16:03:39 +10:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2014-02-11 14:23:28 +11:00
CTDB_UPDATE_DB_LATENCY ( ctdb_db , " vacuum " , vacuum . latency , l ) ;
2009-11-24 09:27:22 +11:00
DEBUG ( DEBUG_INFO , ( " Vacuuming took %.3f seconds for database %s \n " , l , ctdb_db - > db_name ) ) ;
2009-07-23 16:03:39 +10:00
if ( child_ctx - > child_pid ! = - 1 ) {
2012-05-03 11:42:41 +10:00
ctdb_kill ( ctdb , child_ctx - > child_pid , SIGKILL ) ;
2010-12-20 17:49:29 +01:00
} else {
/* Bump the number of successful fast-path runs. */
2020-04-02 14:18:33 +11:00
vacuum_handle - > fast_path_count + + ;
2009-07-23 16:03:39 +10:00
}
2019-10-04 12:06:21 +10:00
ctdb - > vacuumer = NULL ;
2010-07-21 12:29:55 +09:30
2019-10-15 16:36:44 +11:00
if ( child_ctx - > scheduled ) {
2020-03-27 14:38:09 +11:00
vacuum_handle - > vacuum_interval = get_vacuum_interval ( ctdb_db ) ;
2019-10-15 16:36:44 +11:00
tevent_add_timer (
ctdb - > ev ,
2020-04-02 14:18:33 +11:00
vacuum_handle ,
2020-03-27 14:38:09 +11:00
timeval_current_ofs ( vacuum_handle - > vacuum_interval , 0 ) ,
2019-10-15 16:36:44 +11:00
ctdb_vacuum_event ,
2020-04-02 14:18:33 +11:00
vacuum_handle ) ;
2019-10-15 16:36:44 +11:00
}
2009-07-23 16:03:39 +10:00
return 0 ;
}
/*
* this event is generated when a vacuum child process times out
*/
2015-10-26 16:50:09 +11:00
static void vacuum_child_timeout ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval t , void * private_data )
2009-07-23 16:03:39 +10:00
{
struct ctdb_vacuum_child_context * child_ctx = talloc_get_type ( private_data , struct ctdb_vacuum_child_context ) ;
DEBUG ( DEBUG_ERR , ( " Vacuuming child process timed out for db %s \n " , child_ctx - > vacuum_handle - > ctdb_db - > db_name ) ) ;
child_ctx - > status = VACUUM_TIMEOUT ;
talloc_free ( child_ctx ) ;
}
/*
* this event is generated when a vacuum child process has completed
*/
2015-10-26 16:50:09 +11:00
static void vacuum_child_handler ( struct tevent_context * ev ,
struct tevent_fd * fde ,
uint16_t flags , void * private_data )
2009-07-23 16:03:39 +10:00
{
struct ctdb_vacuum_child_context * child_ctx = talloc_get_type ( private_data , struct ctdb_vacuum_child_context ) ;
char c = 0 ;
int ret ;
2009-11-24 09:27:22 +11:00
DEBUG ( DEBUG_INFO , ( " Vacuuming child process %d finished for db %s \n " , child_ctx - > child_pid , child_ctx - > vacuum_handle - > ctdb_db - > db_name ) ) ;
2009-07-23 16:03:39 +10:00
child_ctx - > child_pid = - 1 ;
2014-07-30 21:03:53 +10:00
ret = sys_read ( child_ctx - > fd [ 0 ] , & c , 1 ) ;
2009-07-23 16:03:39 +10:00
if ( ret ! = 1 | | c ! = 0 ) {
child_ctx - > status = VACUUM_ERROR ;
DEBUG ( DEBUG_ERR , ( " A vacuum child process failed with an error for database %s. ret=%d c=%d \n " , child_ctx - > vacuum_handle - > ctdb_db - > db_name , ret , c ) ) ;
} else {
child_ctx - > status = VACUUM_OK ;
}
talloc_free ( child_ctx ) ;
}
/*
* this event is called every time we need to start a new vacuum process
*/
2019-07-30 14:16:13 +10:00
static int vacuum_db_child ( TALLOC_CTX * mem_ctx ,
struct ctdb_db_context * ctdb_db ,
2019-10-15 16:36:44 +11:00
bool scheduled ,
2019-07-30 14:16:13 +10:00
bool full_vacuum_run ,
struct ctdb_vacuum_child_context * * out )
2009-07-23 16:03:39 +10:00
{
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
struct ctdb_vacuum_child_context * child_ctx ;
2010-08-18 09:16:31 +09:30
struct tevent_fd * fde ;
2009-07-23 16:03:39 +10:00
int ret ;
2015-07-26 23:02:57 +02:00
/* we don't vacuum if we are in recovery mode, or db frozen */
2010-07-21 12:28:04 +09:30
if ( ctdb - > recovery_mode = = CTDB_RECOVERY_ACTIVE | |
2016-07-19 16:30:26 +10:00
ctdb_db_frozen ( ctdb_db ) ) {
2019-07-30 14:16:13 +10:00
D_INFO ( " Not vacuuming %s (%s) \n " , ctdb_db - > db_name ,
ctdb - > recovery_mode = = CTDB_RECOVERY_ACTIVE ?
" in recovery " : " frozen " ) ;
return EAGAIN ;
2009-07-23 16:03:39 +10:00
}
2014-02-21 14:58:00 +11:00
/* Do not allow multiple vacuuming child processes to be active at the
* same time . If there is vacuuming child process active , delay
* new vacuuming event to stagger vacuuming events .
*/
2019-10-04 12:06:21 +10:00
if ( ctdb - > vacuumer ! = NULL ) {
2019-07-30 14:16:13 +10:00
return EBUSY ;
2014-02-21 14:58:00 +11:00
}
2019-07-30 14:16:13 +10:00
child_ctx = talloc_zero ( mem_ctx , struct ctdb_vacuum_child_context ) ;
2009-07-23 16:03:39 +10:00
if ( child_ctx = = NULL ) {
2019-07-30 14:16:13 +10:00
DBG_ERR ( " Failed to allocate child context for vacuuming of %s \n " ,
ctdb_db - > db_name ) ;
return ENOMEM ;
2009-07-23 16:03:39 +10:00
}
ret = pipe ( child_ctx - > fd ) ;
if ( ret ! = 0 ) {
talloc_free ( child_ctx ) ;
2019-07-30 14:16:13 +10:00
D_ERR ( " Failed to create pipe for vacuum child process. \n " ) ;
return EAGAIN ;
2010-12-20 17:54:04 +01:00
}
2011-01-10 13:57:49 +11:00
child_ctx - > child_pid = ctdb_fork ( ctdb ) ;
2009-07-23 16:03:39 +10:00
if ( child_ctx - > child_pid = = ( pid_t ) - 1 ) {
close ( child_ctx - > fd [ 0 ] ) ;
close ( child_ctx - > fd [ 1 ] ) ;
talloc_free ( child_ctx ) ;
2019-07-30 14:16:13 +10:00
D_ERR ( " Failed to fork vacuum child process. \n " ) ;
return EAGAIN ;
2009-07-23 16:03:39 +10:00
}
if ( child_ctx - > child_pid = = 0 ) {
char cc = 0 ;
close ( child_ctx - > fd [ 0 ] ) ;
2019-07-30 14:16:13 +10:00
D_INFO ( " Vacuuming child process %d for db %s started \n " ,
getpid ( ) ,
ctdb_db - > db_name ) ;
2015-09-23 16:10:59 -07:00
prctl_set_comment ( " ctdb_vacuum " ) ;
2019-07-30 14:16:13 +10:00
ret = switch_from_server_to_client ( ctdb ) ;
if ( ret ! = 0 ) {
DBG_ERR ( " ERROR: failed to switch vacuum daemon "
" into client mode. \n " ) ;
return EIO ;
2009-09-29 13:20:18 +10:00
}
2014-04-19 03:36:49 +02:00
cc = ctdb_vacuum_and_repack_db ( ctdb_db , full_vacuum_run ) ;
2009-07-23 16:03:39 +10:00
2014-07-30 21:03:53 +10:00
sys_write ( child_ctx - > fd [ 1 ] , & cc , 1 ) ;
2009-07-23 16:03:39 +10:00
_exit ( 0 ) ;
}
set_close_on_exec ( child_ctx - > fd [ 0 ] ) ;
close ( child_ctx - > fd [ 1 ] ) ;
child_ctx - > status = VACUUM_RUNNING ;
2019-10-15 16:36:44 +11:00
child_ctx - > scheduled = scheduled ;
2009-07-23 16:03:39 +10:00
child_ctx - > start_time = timeval_current ( ) ;
2019-10-04 12:06:21 +10:00
ctdb - > vacuumer = child_ctx ;
2009-07-23 16:03:39 +10:00
talloc_set_destructor ( child_ctx , vacuum_child_destructor ) ;
2010-12-17 01:53:25 +01:00
/*
* Clear the fastpath vacuuming list in the parent .
*/
talloc_free ( ctdb_db - > delete_queue ) ;
ctdb_db - > delete_queue = trbt_create ( ctdb_db , 0 ) ;
if ( ctdb_db - > delete_queue = = NULL ) {
2019-07-30 14:16:13 +10:00
DBG_ERR ( " Out of memory when re-creating vacuum tree \n " ) ;
return ENOMEM ;
2010-12-17 01:53:25 +01:00
}
2018-02-16 17:00:40 +11:00
talloc_free ( ctdb_db - > fetch_queue ) ;
ctdb_db - > fetch_queue = trbt_create ( ctdb_db , 0 ) ;
if ( ctdb_db - > fetch_queue = = NULL ) {
ctdb_fatal ( ctdb , " Out of memory when re-create fetch queue "
" in parent context. Shutting down \n " ) ;
}
2015-10-26 16:50:09 +11:00
tevent_add_timer ( ctdb - > ev , child_ctx ,
2019-07-30 14:16:13 +10:00
timeval_current_ofs ( ctdb - > tunable . vacuum_max_run_time ,
0 ) ,
2015-10-26 16:50:09 +11:00
vacuum_child_timeout , child_ctx ) ;
2009-07-23 16:03:39 +10:00
2019-07-30 14:16:13 +10:00
DBG_DEBUG ( " Created PIPE FD:%d to child vacuum process \n " ,
child_ctx - > fd [ 0 ] ) ;
2009-10-15 11:24:54 +11:00
2015-10-26 16:50:09 +11:00
fde = tevent_add_fd ( ctdb - > ev , child_ctx , child_ctx - > fd [ 0 ] ,
TEVENT_FD_READ , vacuum_child_handler , child_ctx ) ;
2010-08-18 09:16:31 +09:30
tevent_fd_set_auto_close ( fde ) ;
2009-07-23 16:03:39 +10:00
2019-07-30 14:16:13 +10:00
child_ctx - > vacuum_handle = ctdb_db - > vacuum_handle ;
* out = child_ctx ;
return 0 ;
}
static void ctdb_vacuum_event ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval t , void * private_data )
{
struct ctdb_vacuum_handle * vacuum_handle = talloc_get_type (
private_data , struct ctdb_vacuum_handle ) ;
struct ctdb_db_context * ctdb_db = vacuum_handle - > ctdb_db ;
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
struct ctdb_vacuum_child_context * child_ctx = NULL ;
uint32_t fast_path_max = ctdb - > tunable . vacuum_fast_path_count ;
2020-03-27 14:38:09 +11:00
uint32_t vacuum_interval = get_vacuum_interval ( ctdb_db ) ;
2019-07-30 14:16:13 +10:00
bool full_vacuum_run = false ;
int ret ;
2020-04-02 14:42:21 +11:00
if ( vacuum_interval > vacuum_handle - > vacuum_interval ) {
uint32_t d = vacuum_interval - vacuum_handle - > vacuum_interval ;
DBG_INFO ( " Vacuum interval increased from "
" % " PRIu32 " to % " PRIu32 " , rescheduling \n " ,
vacuum_handle - > vacuum_interval ,
vacuum_interval ) ;
vacuum_handle - > vacuum_interval = vacuum_interval ;
tevent_add_timer ( ctdb - > ev ,
vacuum_handle ,
timeval_current_ofs ( d , 0 ) ,
ctdb_vacuum_event ,
vacuum_handle ) ;
return ;
}
2020-03-27 14:38:09 +11:00
vacuum_handle - > vacuum_interval = vacuum_interval ;
2019-07-30 14:16:13 +10:00
if ( vacuum_handle - > fast_path_count > = fast_path_max ) {
if ( fast_path_max > 0 ) {
full_vacuum_run = true ;
}
vacuum_handle - > fast_path_count = 0 ;
}
ret = vacuum_db_child ( vacuum_handle ,
ctdb_db ,
2019-10-15 16:36:44 +11:00
true ,
2019-07-30 14:16:13 +10:00
full_vacuum_run ,
& child_ctx ) ;
if ( ret = = 0 ) {
return ;
}
switch ( ret ) {
case EBUSY :
/* Stagger */
tevent_add_timer ( ctdb - > ev ,
vacuum_handle ,
timeval_current_ofs ( 0 , 500 * 1000 ) ,
ctdb_vacuum_event ,
vacuum_handle ) ;
break ;
default :
/* Temporary failure, schedule next attempt */
tevent_add_timer ( ctdb - > ev ,
vacuum_handle ,
timeval_current_ofs (
2020-03-27 14:38:09 +11:00
vacuum_handle - > vacuum_interval , 0 ) ,
2019-07-30 14:16:13 +10:00
ctdb_vacuum_event ,
vacuum_handle ) ;
}
2009-07-23 16:03:39 +10:00
}
2019-07-30 14:17:11 +10:00
struct vacuum_control_state {
struct ctdb_vacuum_child_context * child_ctx ;
struct ctdb_req_control_old * c ;
struct ctdb_context * ctdb ;
} ;
static int vacuum_control_state_destructor ( struct vacuum_control_state * state )
{
struct ctdb_vacuum_child_context * child_ctx = state - > child_ctx ;
int32_t status ;
status = ( child_ctx - > status = = VACUUM_OK ? 0 : - 1 ) ;
ctdb_request_control_reply ( state - > ctdb , state - > c , NULL , status , NULL ) ;
return 0 ;
}
int32_t ctdb_control_db_vacuum ( struct ctdb_context * ctdb ,
struct ctdb_req_control_old * c ,
TDB_DATA indata ,
bool * async_reply )
{
struct ctdb_db_context * ctdb_db ;
struct ctdb_vacuum_child_context * child_ctx = NULL ;
struct ctdb_db_vacuum * db_vacuum ;
struct vacuum_control_state * state ;
size_t np ;
int ret ;
ret = ctdb_db_vacuum_pull ( indata . dptr ,
indata . dsize ,
ctdb ,
& db_vacuum ,
& np ) ;
if ( ret ! = 0 ) {
DBG_ERR ( " Invalid data \n " ) ;
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , db_vacuum - > db_id ) ;
if ( ctdb_db = = NULL ) {
DBG_ERR ( " Unknown db id 0x%08x \n " , db_vacuum - > db_id ) ;
talloc_free ( db_vacuum ) ;
return - 1 ;
}
state = talloc ( ctdb , struct vacuum_control_state ) ;
if ( state = = NULL ) {
DBG_ERR ( " Memory allocation error \n " ) ;
return - 1 ;
}
ret = vacuum_db_child ( ctdb_db ,
ctdb_db ,
false ,
db_vacuum - > full_vacuum_run ,
& child_ctx ) ;
talloc_free ( db_vacuum ) ;
if ( ret = = 0 ) {
( void ) talloc_steal ( child_ctx , state ) ;
state - > child_ctx = child_ctx ;
state - > c = talloc_steal ( state , c ) ;
state - > ctdb = ctdb ;
talloc_set_destructor ( state , vacuum_control_state_destructor ) ;
* async_reply = true ;
return 0 ;
}
talloc_free ( state ) ;
switch ( ret ) {
case EBUSY :
DBG_WARNING ( " Vacuuming collision \n " ) ;
break ;
default :
DBG_ERR ( " Temporary vacuuming failure, ret=%d \n " , ret ) ;
}
return - 1 ;
}
2010-07-21 12:29:55 +09:30
void ctdb_stop_vacuuming ( struct ctdb_context * ctdb )
{
2019-10-04 12:06:21 +10:00
if ( ctdb - > vacuumer ! = NULL ) {
D_INFO ( " Aborting vacuuming for %s (%i) \n " ,
ctdb - > vacuumer - > vacuum_handle - > ctdb_db - > db_name ,
( int ) ctdb - > vacuumer - > child_pid ) ;
2010-07-21 12:29:55 +09:30
/* vacuum_child_destructor kills it, removes from list */
2019-10-04 12:06:21 +10:00
talloc_free ( ctdb - > vacuumer ) ;
2010-07-21 12:29:55 +09:30
}
}
2009-07-23 16:03:39 +10:00
/* this function initializes the vacuuming context for a database
* starts the vacuuming events
*/
int ctdb_vacuum_init ( struct ctdb_db_context * ctdb_db )
{
2020-04-02 14:18:33 +11:00
struct ctdb_vacuum_handle * vacuum_handle ;
2017-03-02 15:39:29 +11:00
if ( ! ctdb_db_volatile ( ctdb_db ) ) {
DEBUG ( DEBUG_ERR ,
( " Vacuuming is disabled for non-volatile database %s \n " ,
ctdb_db - > db_name ) ) ;
2009-11-03 10:48:27 +11:00
return 0 ;
}
2020-04-02 14:18:33 +11:00
vacuum_handle = talloc ( ctdb_db , struct ctdb_vacuum_handle ) ;
if ( vacuum_handle = = NULL ) {
DBG_ERR ( " Memory allocation error \n " ) ;
return - 1 ;
}
vacuum_handle - > ctdb_db = ctdb_db ;
vacuum_handle - > fast_path_count = 0 ;
2020-03-27 14:38:09 +11:00
vacuum_handle - > vacuum_interval = get_vacuum_interval ( ctdb_db ) ;
2009-07-23 16:03:39 +10:00
2020-04-02 14:18:33 +11:00
ctdb_db - > vacuum_handle = vacuum_handle ;
2009-07-23 16:03:39 +10:00
2020-04-02 14:18:33 +11:00
tevent_add_timer ( ctdb_db - > ctdb - > ev ,
vacuum_handle ,
2020-03-27 14:38:09 +11:00
timeval_current_ofs ( vacuum_handle - > vacuum_interval , 0 ) ,
2020-04-02 14:18:33 +11:00
ctdb_vacuum_event ,
vacuum_handle ) ;
2009-07-23 16:03:39 +10:00
return 0 ;
}
2010-12-21 14:25:48 +01:00
2011-04-07 12:17:16 +02:00
static void remove_record_from_delete_queue ( struct ctdb_db_context * ctdb_db ,
const struct ctdb_ltdb_header * hdr ,
const TDB_DATA key )
{
struct delete_record_data * kd ;
uint32_t hash ;
hash = ( uint32_t ) ctdb_hash ( & key ) ;
DEBUG ( DEBUG_DEBUG , ( __location__
2011-12-23 17:23:07 +01:00
" remove_record_from_delete_queue: "
" db[%s] "
2011-04-07 12:17:16 +02:00
" db_id[0x%08x] "
" key_hash[0x%08x] "
" lmaster[%u] "
" migrated_with_data[%s] \n " ,
ctdb_db - > db_name , ctdb_db - > db_id ,
hash ,
ctdb_lmaster ( ctdb_db - > ctdb , & key ) ,
hdr - > flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA ? " yes " : " no " ) ) ;
kd = ( struct delete_record_data * ) trbt_lookup32 ( ctdb_db - > delete_queue , hash ) ;
if ( kd = = NULL ) {
2011-12-23 17:23:07 +01:00
DEBUG ( DEBUG_DEBUG , ( __location__
" remove_record_from_delete_queue: "
" record not in queue (hash[0x%08x]) \n . " ,
hash ) ) ;
2011-04-07 12:17:16 +02:00
return ;
}
2011-12-23 17:23:07 +01:00
if ( ( kd - > key . dsize ! = key . dsize ) | |
( memcmp ( kd - > key . dptr , key . dptr , key . dsize ) ! = 0 ) )
{
DEBUG ( DEBUG_DEBUG , ( __location__
" remove_record_from_delete_queue: "
" hash collision for key with hash[0x%08x] "
" in db[%s] - skipping \n " ,
hash , ctdb_db - > db_name ) ) ;
2011-04-07 12:17:16 +02:00
return ;
}
2011-12-23 17:23:07 +01:00
DEBUG ( DEBUG_DEBUG , ( __location__
" remove_record_from_delete_queue: "
" removing key with hash[0x%08x] \n " ,
hash ) ) ;
2011-04-07 12:17:16 +02:00
talloc_free ( kd ) ;
return ;
}
2010-12-21 14:25:48 +01:00
/**
2011-03-11 15:55:52 +01:00
* Insert a record into the ctdb_db context ' s delete queue ,
* handling hash collisions .
2010-12-21 14:25:48 +01:00
*/
2011-03-11 15:55:52 +01:00
static int insert_record_into_delete_queue ( struct ctdb_db_context * ctdb_db ,
const struct ctdb_ltdb_header * hdr ,
TDB_DATA key )
2010-12-21 14:25:48 +01:00
{
struct delete_record_data * kd ;
2011-03-11 15:55:52 +01:00
uint32_t hash ;
int ret ;
2010-12-21 14:25:48 +01:00
hash = ( uint32_t ) ctdb_hash ( & key ) ;
2015-12-17 12:27:58 +11:00
DEBUG ( DEBUG_DEBUG , ( __location__ " schedule for deletion: db[%s] "
" db_id[0x%08x] "
" key_hash[0x%08x] "
" lmaster[%u] "
" migrated_with_data[%s] \n " ,
2011-03-11 15:55:52 +01:00
ctdb_db - > db_name , ctdb_db - > db_id ,
2010-12-21 14:25:48 +01:00
hash ,
ctdb_lmaster ( ctdb_db - > ctdb , & key ) ,
2011-03-11 15:55:52 +01:00
hdr - > flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA ? " yes " : " no " ) ) ;
2010-12-21 14:25:48 +01:00
kd = ( struct delete_record_data * ) trbt_lookup32 ( ctdb_db - > delete_queue , hash ) ;
if ( kd ! = NULL ) {
if ( ( kd - > key . dsize ! = key . dsize ) | |
( memcmp ( kd - > key . dptr , key . dptr , key . dsize ) ! = 0 ) )
{
DEBUG ( DEBUG_INFO ,
2011-12-23 10:37:25 +01:00
( __location__ " schedule for deletion: "
2011-12-23 17:10:20 +01:00
" hash collision for key hash [0x%08x]. "
" Skipping the record. \n " , hash ) ) ;
2010-12-21 14:25:48 +01:00
return 0 ;
} else {
2011-03-11 14:57:15 +01:00
DEBUG ( DEBUG_DEBUG ,
2011-12-23 10:37:25 +01:00
( __location__ " schedule for deletion: "
2011-12-23 17:10:20 +01:00
" updating entry for key with hash [0x%08x]. \n " ,
2011-12-23 10:37:25 +01:00
hash ) ) ;
2010-12-21 14:25:48 +01:00
}
}
2011-03-11 15:55:52 +01:00
ret = insert_delete_record_data_into_tree ( ctdb_db - > ctdb , ctdb_db ,
2010-12-21 14:25:48 +01:00
ctdb_db - > delete_queue ,
2011-03-11 15:55:52 +01:00
hdr , key ) ;
2010-12-21 14:25:48 +01:00
if ( ret ! = 0 ) {
2011-12-23 15:14:06 +01:00
DEBUG ( DEBUG_INFO ,
( __location__ " schedule for deletion: error "
2011-12-23 17:10:20 +01:00
" inserting key with hash [0x%08x] into delete queue \n " ,
2011-12-23 15:14:06 +01:00
hash ) ) ;
2010-12-21 14:25:48 +01:00
return - 1 ;
}
return 0 ;
}
2010-12-28 13:13:34 +01:00
2011-03-11 15:55:52 +01:00
/**
2023-09-07 15:47:05 +12:00
* Schedule a record for deletion .
2011-03-11 15:55:52 +01:00
* Called from the parent context .
*/
int32_t ctdb_control_schedule_for_deletion ( struct ctdb_context * ctdb ,
TDB_DATA indata )
{
struct ctdb_control_schedule_for_deletion * dd ;
struct ctdb_db_context * ctdb_db ;
int ret ;
TDB_DATA key ;
dd = ( struct ctdb_control_schedule_for_deletion * ) indata . dptr ;
ctdb_db = find_ctdb_db ( ctdb , dd - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db id 0x%08x \n " ,
dd - > db_id ) ) ;
return - 1 ;
}
key . dsize = dd - > keylen ;
key . dptr = dd - > key ;
ret = insert_record_into_delete_queue ( ctdb_db , & dd - > hdr , key ) ;
return ret ;
}
2010-12-28 13:13:34 +01:00
int32_t ctdb_local_schedule_for_deletion ( struct ctdb_db_context * ctdb_db ,
const struct ctdb_ltdb_header * hdr ,
TDB_DATA key )
{
int ret ;
struct ctdb_control_schedule_for_deletion * dd ;
TDB_DATA indata ;
int32_t status ;
if ( ctdb_db - > ctdb - > ctdbd_pid = = getpid ( ) ) {
/* main daemon - directly queue */
2011-03-11 15:57:45 +01:00
ret = insert_record_into_delete_queue ( ctdb_db , hdr , key ) ;
return ret ;
2010-12-28 13:13:34 +01:00
}
2015-07-26 23:02:57 +02:00
/* if we don't have a connection to the daemon we can not send
2012-02-21 07:03:44 +11:00
a control . For example sometimes from update_record control child
process .
*/
if ( ! ctdb_db - > ctdb - > can_send_controls ) {
return - 1 ;
}
2010-12-28 13:13:34 +01:00
2012-02-21 07:03:44 +11:00
/* child process: send the main daemon a control */
2010-12-28 13:13:34 +01:00
indata . dsize = offsetof ( struct ctdb_control_schedule_for_deletion , key ) + key . dsize ;
indata . dptr = talloc_zero_array ( ctdb_db , uint8_t , indata . dsize ) ;
if ( indata . dptr = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " out of memory \n " ) ) ;
return - 1 ;
}
dd = ( struct ctdb_control_schedule_for_deletion * ) ( void * ) indata . dptr ;
dd - > db_id = ctdb_db - > db_id ;
dd - > hdr = * hdr ;
dd - > keylen = key . dsize ;
memcpy ( dd - > key , key . dptr , key . dsize ) ;
ret = ctdb_control ( ctdb_db - > ctdb ,
CTDB_CURRENT_NODE ,
ctdb_db - > db_id ,
CTDB_CONTROL_SCHEDULE_FOR_DELETION ,
CTDB_CTRL_FLAG_NOREPLY , /* flags */
indata ,
NULL , /* mem_ctx */
NULL , /* outdata */
& status ,
NULL , /* timeout : NULL == wait forever */
NULL ) ; /* error message */
talloc_free ( indata . dptr ) ;
if ( ret ! = 0 | | status ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Error sending "
" SCHEDULE_FOR_DELETION "
" control. \n " ) ) ;
if ( status ! = 0 ) {
ret = - 1 ;
}
}
return ret ;
}
2011-04-07 12:17:16 +02:00
void ctdb_local_remove_from_delete_queue ( struct ctdb_db_context * ctdb_db ,
const struct ctdb_ltdb_header * hdr ,
const TDB_DATA key )
{
if ( ctdb_db - > ctdb - > ctdbd_pid ! = getpid ( ) ) {
/*
* Only remove the record from the delete queue if called
* in the main daemon .
*/
return ;
}
remove_record_from_delete_queue ( ctdb_db , hdr , key ) ;
return ;
}
2018-02-16 15:30:13 +11:00
static int vacuum_fetch_parser ( uint32_t reqid ,
struct ctdb_ltdb_header * header ,
TDB_DATA key , TDB_DATA data ,
void * private_data )
{
struct ctdb_db_context * ctdb_db = talloc_get_type_abort (
private_data , struct ctdb_db_context ) ;
struct fetch_record_data * rd ;
size_t len ;
uint32_t hash ;
len = offsetof ( struct fetch_record_data , keydata ) + key . dsize ;
rd = ( struct fetch_record_data * ) talloc_size ( ctdb_db - > fetch_queue ,
len ) ;
if ( rd = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Memory error \n " ) ) ;
return - 1 ;
}
talloc_set_name_const ( rd , " struct fetch_record_data " ) ;
rd - > key . dsize = key . dsize ;
rd - > key . dptr = rd - > keydata ;
memcpy ( rd - > keydata , key . dptr , key . dsize ) ;
hash = ctdb_hash ( & key ) ;
trbt_insert32 ( ctdb_db - > fetch_queue , hash , rd ) ;
return 0 ;
}
int32_t ctdb_control_vacuum_fetch ( struct ctdb_context * ctdb , TDB_DATA indata )
{
struct ctdb_rec_buffer * recbuf ;
struct ctdb_db_context * ctdb_db ;
size_t npull ;
int ret ;
ret = ctdb_rec_buffer_pull ( indata . dptr , indata . dsize , ctdb , & recbuf ,
& npull ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Invalid data in vacuum_fetch \n " ) ) ;
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , recbuf - > db_id ) ;
if ( ctdb_db = = NULL ) {
talloc_free ( recbuf ) ;
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%08x \n " ,
recbuf - > db_id ) ) ;
return - 1 ;
}
ret = ctdb_rec_buffer_traverse ( recbuf , vacuum_fetch_parser , ctdb_db ) ;
talloc_free ( recbuf ) ;
return ret ;
}