mirror of
https://github.com/samba-team/samba.git
synced 2025-03-08 04:58:40 +03:00
Redo the vacukming process to mkake it scalable.
Vacumming used to delete one record at a time on all nodes, that was m*n behaviour and would require a huge storm of ctdb->ctdb controls and just wouldnt scale at all. The new vacuming process collects all records to be deleted locally and then only sends 1 control to the other nodes. This control contains a list of all records to be deleted. (This used to be ctdb commit 9e625ece19a91f362c9539fa73b6b2108f0d9c53)
This commit is contained in:
parent
e2930588b3
commit
74d57f8d51
@ -490,12 +490,13 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
|
||||
CTDB_CONTROL_TRANSACTION_START = 65,
|
||||
CTDB_CONTROL_TRANSACTION_COMMIT = 66,
|
||||
CTDB_CONTROL_WIPE_DATABASE = 67,
|
||||
CTDB_CONTROL_DELETE_RECORD = 68,
|
||||
/* #68 removed */
|
||||
CTDB_CONTROL_UPTIME = 69,
|
||||
CTDB_CONTROL_START_RECOVERY = 70,
|
||||
CTDB_CONTROL_END_RECOVERY = 71,
|
||||
CTDB_CONTROL_RELOAD_NODES_FILE = 72,
|
||||
CTDB_CONTROL_GET_RECLOCK_FILE = 73,
|
||||
CTDB_CONTROL_TRY_DELETE_RECORDS = 74,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1161,6 +1162,7 @@ int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
|
||||
int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
|
||||
|
||||
void ctdb_tunables_set_defaults(struct ctdb_context *ctdb);
|
||||
|
||||
@ -1229,8 +1231,6 @@ int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
int ctdb_vacuum(struct ctdb_context *ctdb, int argc, const char **argv);
|
||||
int ctdb_repack(struct ctdb_context *ctdb, int argc, const char **argv);
|
||||
|
||||
int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
|
||||
void ctdb_block_signal(int signum);
|
||||
void ctdb_unblock_signal(int signum);
|
||||
int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb);
|
||||
|
@ -353,9 +353,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_wipe_database));
|
||||
return ctdb_control_wipe_database(ctdb, indata);
|
||||
|
||||
case CTDB_CONTROL_DELETE_RECORD:
|
||||
return ctdb_control_delete_record(ctdb, indata);
|
||||
|
||||
case CTDB_CONTROL_UPTIME:
|
||||
return ctdb_control_uptime(ctdb, outdata);
|
||||
|
||||
@ -369,6 +366,9 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
CHECK_CONTROL_DATA_SIZE(0);
|
||||
return ctdb_control_get_reclock_file(ctdb, outdata);
|
||||
|
||||
case CTDB_CONTROL_TRY_DELETE_RECORDS:
|
||||
return ctdb_control_try_delete_records(ctdb, indata, outdata);
|
||||
|
||||
default:
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
|
||||
return -1;
|
||||
|
@ -641,16 +641,17 @@ bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
delete a record as part of the vacuum process
|
||||
only delete if we are not lmaster or dmaster, and our rsn is <= the provided rsn
|
||||
use non-blocking locks
|
||||
|
||||
return 0 if the record was successfully deleted (i.e. it does not exist
|
||||
when the function returns)
|
||||
or !0 is the record still exists in the tdb after returning.
|
||||
*/
|
||||
int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, struct ctdb_rec_data *rec)
|
||||
{
|
||||
struct ctdb_rec_data *rec = (struct ctdb_rec_data *)indata.dptr;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
TDB_DATA key, data;
|
||||
struct ctdb_ltdb_header *hdr, *hdr2;
|
||||
|
||||
@ -659,16 +660,6 @@ int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
|
||||
int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
|
||||
|
||||
if (indata.dsize < sizeof(uint32_t) || indata.dsize != rec->length) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Bad record size in ctdb_control_delete_record\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_db = find_ctdb_db(ctdb, rec->reqid);
|
||||
if (!ctdb_db) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", rec->reqid));
|
||||
return -1;
|
||||
}
|
||||
|
||||
key.dsize = rec->keylen;
|
||||
key.dptr = &rec->data[0];
|
||||
@ -747,6 +738,7 @@ int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct recovery_callback_state {
|
||||
struct ctdb_req_control *c;
|
||||
};
|
||||
@ -879,3 +871,89 @@ int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outda
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
try to delete all these records as part of the vacuuming process
|
||||
and return the records we failed to delete
|
||||
*/
|
||||
int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
|
||||
{
|
||||
struct ctdb_control_pulldb_reply *reply = (struct ctdb_control_pulldb_reply *)indata.dptr;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
int i;
|
||||
struct ctdb_rec_data *rec;
|
||||
struct ctdb_control_pulldb_reply *records;
|
||||
|
||||
if (indata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " invalid data in try_delete_records\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_db = find_ctdb_db(ctdb, reply->db_id);
|
||||
if (!ctdb_db) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", reply->db_id));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(DEBUG_DEBUG,("starting try_delete_records of %u records for dbid 0x%x\n",
|
||||
reply->count, reply->db_id));
|
||||
|
||||
|
||||
/* create a blob to send back the records we couldnt delete */
|
||||
records = (struct ctdb_control_pulldb_reply *)
|
||||
talloc_zero_size(outdata,
|
||||
offsetof(struct ctdb_control_pulldb_reply, data));
|
||||
if (records == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
return -1;
|
||||
}
|
||||
records->db_id = ctdb_db->db_id;
|
||||
|
||||
|
||||
rec = (struct ctdb_rec_data *)&reply->data[0];
|
||||
for (i=0;i<reply->count;i++) {
|
||||
TDB_DATA key, data;
|
||||
|
||||
key.dptr = &rec->data[0];
|
||||
key.dsize = rec->keylen;
|
||||
data.dptr = &rec->data[key.dsize];
|
||||
data.dsize = rec->datalen;
|
||||
|
||||
if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record in indata\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* If we cant delete the record we must add it to the reply
|
||||
so the lmaster knows it may not purge this record
|
||||
*/
|
||||
if (delete_tdb_record(ctdb, ctdb_db, rec) != 0) {
|
||||
size_t old_size;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
data.dptr += sizeof(*hdr);
|
||||
data.dsize -= sizeof(*hdr);
|
||||
|
||||
DEBUG(DEBUG_INFO, (__location__ " Failed to vacuum delete record with hash 0x%08x\n", ctdb_hash(&key)));
|
||||
|
||||
old_size = talloc_get_size(records);
|
||||
records = talloc_realloc_size(outdata, records, old_size + rec->length);
|
||||
if (records == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to expand\n"));
|
||||
return -1;
|
||||
}
|
||||
records->count++;
|
||||
memcpy(old_size+(uint8_t *)records, rec, rec->length);
|
||||
}
|
||||
|
||||
rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec);
|
||||
}
|
||||
|
||||
|
||||
outdata->dptr = (uint8_t *)records;
|
||||
outdata->dsize = talloc_get_size(records);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -23,132 +23,12 @@
|
||||
#include "system/network.h"
|
||||
#include "../include/ctdb.h"
|
||||
#include "../include/ctdb_private.h"
|
||||
#include "../common/rb_tree.h"
|
||||
#include "db_wrap.h"
|
||||
|
||||
/* should be tunable */
|
||||
#define TIMELIMIT() timeval_current_ofs(10, 0)
|
||||
|
||||
/*
|
||||
vacuum one record
|
||||
*/
|
||||
static int ctdb_vacuum_one(struct ctdb_context *ctdb, TDB_DATA key,
|
||||
struct ctdb_db_context *ctdb_db, uint32_t *count)
|
||||
{
|
||||
TDB_DATA data;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
struct ctdb_rec_data *rec;
|
||||
uint64_t rsn;
|
||||
|
||||
if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, key) != 0) {
|
||||
/* the chain is busy - come back later */
|
||||
return 0;
|
||||
}
|
||||
|
||||
data = tdb_fetch(ctdb_db->ltdb->tdb, key);
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, key);
|
||||
if (data.dptr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
|
||||
free(data.dptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
rsn = hdr->rsn;
|
||||
|
||||
/* if we are not the lmaster and the dmaster then skip the record */
|
||||
if (hdr->dmaster != ctdb->pnn ||
|
||||
ctdb_lmaster(ctdb, &key) != ctdb->pnn) {
|
||||
free(data.dptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
rec = ctdb_marshall_record(ctdb, ctdb_db->db_id, key, hdr, tdb_null);
|
||||
free(data.dptr);
|
||||
if (rec == NULL) {
|
||||
/* try it again later */
|
||||
return 0;
|
||||
}
|
||||
|
||||
data.dptr = (void *)rec;
|
||||
data.dsize = rec->length;
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_DELETE_RECORD,
|
||||
list_of_vnnmap_nodes(ctdb, ctdb->vnn_map, rec, false),
|
||||
TIMELIMIT(), true, data) != 0) {
|
||||
/* one or more nodes failed to delete a record - no problem! */
|
||||
talloc_free(rec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
talloc_free(rec);
|
||||
|
||||
/* its deleted on all other nodes - refetch, check and delete */
|
||||
if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, key) != 0) {
|
||||
/* the chain is busy - come back later */
|
||||
return 0;
|
||||
}
|
||||
|
||||
data = tdb_fetch(ctdb_db->ltdb->tdb, key);
|
||||
if (data.dptr == NULL) {
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, key);
|
||||
return 0;
|
||||
}
|
||||
if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
|
||||
free(data.dptr);
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
|
||||
/* if we are not the lmaster and the dmaster then skip the record */
|
||||
if (hdr->dmaster != ctdb->pnn ||
|
||||
ctdb_lmaster(ctdb, &key) != ctdb->pnn ||
|
||||
rsn != hdr->rsn) {
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, key);
|
||||
free(data.dptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ctdb_block_signal(SIGALRM);
|
||||
tdb_delete(ctdb_db->ltdb->tdb, key);
|
||||
ctdb_unblock_signal(SIGALRM);
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, key);
|
||||
free(data.dptr);
|
||||
|
||||
(*count)++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
vacuum records for which we are the lmaster
|
||||
*/
|
||||
static int ctdb_vacuum_local(struct ctdb_context *ctdb, struct ctdb_control_pulldb_reply *list,
|
||||
struct ctdb_db_context *ctdb_db, uint32_t *count)
|
||||
{
|
||||
struct ctdb_rec_data *r;
|
||||
int i;
|
||||
|
||||
r = (struct ctdb_rec_data *)&list->data[0];
|
||||
|
||||
for (i=0;
|
||||
i<list->count;
|
||||
r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r), i++) {
|
||||
TDB_DATA key;
|
||||
key.dptr = &r->data[0];
|
||||
key.dsize = r->keylen;
|
||||
if (ctdb_vacuum_one(ctdb, key, ctdb_db, count) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
a list of records to possibly delete
|
||||
@ -156,24 +36,37 @@ static int ctdb_vacuum_local(struct ctdb_context *ctdb, struct ctdb_control_pull
|
||||
struct vacuum_data {
|
||||
uint32_t vacuum_limit;
|
||||
struct ctdb_context *ctdb;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
trbt_tree_t *delete_tree;
|
||||
uint32_t delete_count;
|
||||
struct ctdb_control_pulldb_reply **list;
|
||||
bool traverse_error;
|
||||
uint32_t total;
|
||||
};
|
||||
|
||||
/* this structure contains the information for one record to be deleted */
|
||||
struct delete_record_data {
|
||||
struct ctdb_context *ctdb;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
struct ctdb_ltdb_header hdr;
|
||||
TDB_DATA key;
|
||||
};
|
||||
|
||||
/*
|
||||
traverse function for vacuuming
|
||||
*/
|
||||
static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private)
|
||||
{
|
||||
struct vacuum_data *vdata = talloc_get_type(private, struct vacuum_data);
|
||||
struct ctdb_context *ctdb = vdata->ctdb;
|
||||
struct ctdb_db_context *ctdb_db = vdata->ctdb_db;
|
||||
uint32_t lmaster;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
struct ctdb_rec_data *rec;
|
||||
size_t old_size;
|
||||
|
||||
lmaster = ctdb_lmaster(vdata->ctdb, &key);
|
||||
if (lmaster >= vdata->ctdb->vnn_map->size) {
|
||||
lmaster = ctdb_lmaster(ctdb, &key);
|
||||
if (lmaster >= ctdb->vnn_map->size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -184,13 +77,53 @@ static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
|
||||
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
|
||||
if (hdr->dmaster != vdata->ctdb->pnn) {
|
||||
if (hdr->dmaster != ctdb->pnn) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* is this a records we could possibly delete? I.e.
|
||||
if the record is empty and also we are both lmaster
|
||||
and dmaster for the record we should be able to delete it
|
||||
*/
|
||||
if ( (lmaster == ctdb->pnn)
|
||||
&&( (vdata->delete_count < vdata->vacuum_limit)
|
||||
||(vdata->vacuum_limit == 0) ) ){
|
||||
uint32_t hash;
|
||||
|
||||
hash = ctdb_hash(&key);
|
||||
if (trbt_lookup32(vdata->delete_tree, hash)) {
|
||||
DEBUG(DEBUG_INFO, (__location__ " Hash collission when vacuuming, skipping this record.\n"));
|
||||
} else {
|
||||
struct delete_record_data *dd;
|
||||
|
||||
/* store key and header indexed by the key hash */
|
||||
dd = talloc_zero(vdata->delete_tree, struct delete_record_data);
|
||||
if (dd == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
return -1;
|
||||
}
|
||||
dd->ctdb = ctdb;
|
||||
dd->ctdb_db = ctdb_db;
|
||||
dd->key.dsize = key.dsize;
|
||||
dd->key.dptr = talloc_memdup(dd, key.dptr, key.dsize);
|
||||
if (dd->key.dptr == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
dd->hdr = *hdr;
|
||||
|
||||
|
||||
trbt_insert32(vdata->delete_tree, hash, dd);
|
||||
|
||||
vdata->delete_count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* add the record to the blob ready to send to the nodes */
|
||||
rec = ctdb_marshall_record(vdata->list[lmaster], vdata->ctdb->pnn, key, NULL, tdb_null);
|
||||
rec = ctdb_marshall_record(vdata->list[lmaster], ctdb->pnn, key, NULL, tdb_null);
|
||||
if (rec == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
vdata->traverse_error = true;
|
||||
@ -219,6 +152,84 @@ static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct delete_records_list {
|
||||
struct ctdb_control_pulldb_reply *records;
|
||||
};
|
||||
|
||||
/*
|
||||
traverse the tree of records to delete and marshall them into
|
||||
a blob
|
||||
*/
|
||||
static void
|
||||
delete_traverse(void *param, void *data)
|
||||
{
|
||||
struct delete_record_data *dd = talloc_get_type(data, struct delete_record_data);
|
||||
struct delete_records_list *recs = talloc_get_type(param, struct delete_records_list);
|
||||
struct ctdb_rec_data *rec;
|
||||
size_t old_size;
|
||||
|
||||
rec = ctdb_marshall_record(dd, recs->records->db_id, dd->key, &dd->hdr, tdb_null);
|
||||
if (rec == NULL) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " failed to marshall record\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
old_size = talloc_get_size(recs->records);
|
||||
recs->records = talloc_realloc_size(NULL, recs->records, old_size + rec->length);
|
||||
if (recs->records == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to expand\n"));
|
||||
return;
|
||||
}
|
||||
recs->records->count++;
|
||||
memcpy(old_size+(uint8_t *)(recs->records), rec, rec->length);
|
||||
}
|
||||
|
||||
|
||||
static void delete_record(void *param, void *d)
|
||||
{
|
||||
struct delete_record_data *dd = talloc_get_type(d, struct delete_record_data);
|
||||
struct ctdb_context *ctdb = dd->ctdb;
|
||||
struct ctdb_db_context *ctdb_db = dd->ctdb_db;
|
||||
uint32_t *count = (uint32_t *)param;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
TDB_DATA data;
|
||||
|
||||
/* its deleted on all other nodes - refetch, check and delete */
|
||||
if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, dd->key) != 0) {
|
||||
/* the chain is busy - come back later */
|
||||
return;
|
||||
}
|
||||
|
||||
data = tdb_fetch(ctdb_db->ltdb->tdb, dd->key);
|
||||
if (data.dptr == NULL) {
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key);
|
||||
return;
|
||||
}
|
||||
if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
|
||||
free(data.dptr);
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key);
|
||||
return;
|
||||
}
|
||||
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
|
||||
/* if we are not the lmaster and the dmaster then skip the record */
|
||||
if (hdr->dmaster != ctdb->pnn ||
|
||||
ctdb_lmaster(ctdb, &(dd->key)) != ctdb->pnn ||
|
||||
dd->hdr.rsn != hdr->rsn) {
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key);
|
||||
free(data.dptr);
|
||||
return;
|
||||
}
|
||||
|
||||
ctdb_block_signal(SIGALRM);
|
||||
tdb_delete(ctdb_db->ltdb->tdb, dd->key);
|
||||
ctdb_unblock_signal(SIGALRM);
|
||||
tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key);
|
||||
free(data.dptr);
|
||||
|
||||
(*count)++;
|
||||
}
|
||||
|
||||
/* vacuum one database */
|
||||
static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node_map *map,
|
||||
@ -237,6 +248,11 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
|
||||
|
||||
vdata->ctdb = ctdb;
|
||||
vdata->vacuum_limit = vacuum_limit;
|
||||
vdata->delete_tree = trbt_create(vdata, 0);
|
||||
if (vdata->delete_tree == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ctdb_ctrl_getdbname(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, db_id, vdata, &name) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to get name of db 0x%x\n", db_id));
|
||||
@ -250,6 +266,7 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
|
||||
talloc_free(vdata);
|
||||
return -1;
|
||||
}
|
||||
vdata->ctdb_db = ctdb_db;
|
||||
|
||||
/* the list needs to be of length num_nodes */
|
||||
vdata->list = talloc_array(vdata, struct ctdb_control_pulldb_reply *, ctdb->vnn_map->size);
|
||||
@ -301,23 +318,104 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
|
||||
}
|
||||
}
|
||||
|
||||
for (i=0;i<ctdb->vnn_map->size;i++) {
|
||||
uint32_t count = 0;
|
||||
|
||||
if (vdata->list[i]->count == 0) {
|
||||
continue;
|
||||
/* Process all records we can delete (if any) */
|
||||
if (vdata->delete_count > 0) {
|
||||
struct delete_records_list *recs;
|
||||
TDB_DATA indata, outdata;
|
||||
int ret;
|
||||
int32_t res;
|
||||
uint32_t count;
|
||||
|
||||
recs = talloc_zero(vdata, struct delete_records_list);
|
||||
if (recs == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
return -1;
|
||||
}
|
||||
recs->records = (struct ctdb_control_pulldb_reply *)
|
||||
talloc_zero_size(vdata,
|
||||
offsetof(struct ctdb_control_pulldb_reply, data));
|
||||
if (recs->records == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
|
||||
return -1;
|
||||
}
|
||||
recs->records->db_id = db_id;
|
||||
|
||||
/* traverse the tree of all records we want to delete and
|
||||
create a blob we can send to the other nodes.
|
||||
*/
|
||||
trbt_traversearray32(vdata->delete_tree, 1, delete_traverse, recs);
|
||||
|
||||
indata.dsize = talloc_get_size(recs->records);
|
||||
indata.dptr = (void *)recs->records;
|
||||
|
||||
/* now tell all the other nodes to delete all these records
|
||||
(if possible)
|
||||
*/
|
||||
for (i=0;i<ctdb->vnn_map->size;i++) {
|
||||
struct ctdb_control_pulldb_reply *records;
|
||||
struct ctdb_rec_data *rec;
|
||||
|
||||
if (ctdb->vnn_map->map[i] == ctdb->pnn) {
|
||||
/* we dont delete the records on the local node
|
||||
just yet
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_control(ctdb, ctdb->vnn_map->map[i], 0,
|
||||
CTDB_CONTROL_TRY_DELETE_RECORDS, 0,
|
||||
indata, recs, &outdata, &res,
|
||||
NULL, NULL);
|
||||
if (ret != 0 || res != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to delete records on node %u\n", ctdb->vnn_map->map[i]));
|
||||
exit(10);
|
||||
}
|
||||
|
||||
/* outdata countains the list of records coming back
|
||||
from the node which the node could not delete
|
||||
*/
|
||||
records = (struct ctdb_control_pulldb_reply *)outdata.dptr;
|
||||
rec = (struct ctdb_rec_data *)&records->data[0];
|
||||
while (records->count-- > 1) {
|
||||
TDB_DATA reckey, recdata;
|
||||
struct ctdb_ltdb_header *rechdr;
|
||||
|
||||
reckey.dptr = &rec->data[0];
|
||||
reckey.dsize = rec->keylen;
|
||||
recdata.dptr = &rec->data[reckey.dsize];
|
||||
recdata.dsize = rec->datalen;
|
||||
|
||||
if (recdata.dsize < sizeof(struct ctdb_ltdb_header)) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n"));
|
||||
exit(10);
|
||||
}
|
||||
rechdr = (struct ctdb_ltdb_header *)recdata.dptr;
|
||||
recdata.dptr += sizeof(*rechdr);
|
||||
recdata.dsize -= sizeof(*rechdr);
|
||||
|
||||
/* that other node couldnt delete the record
|
||||
so we shouldnt delete it either.
|
||||
remove it from the tree.
|
||||
*/
|
||||
talloc_free(trbt_lookup32(vdata->delete_tree, ctdb_hash(&reckey)));
|
||||
|
||||
rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec);
|
||||
}
|
||||
}
|
||||
|
||||
/* for records where we are the lmaster, we can try to delete them */
|
||||
if (ctdb_vacuum_local(ctdb, vdata->list[i], ctdb_db, &count) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Deletion error in vacuuming '%s'\n", name));
|
||||
talloc_free(vdata);
|
||||
return -1;
|
||||
|
||||
/* the only records remaining in the tree would be those
|
||||
records where all other nodes could successfully
|
||||
delete them, so we can now safely delete them on the
|
||||
lmaster as well.
|
||||
*/
|
||||
count = 0;
|
||||
trbt_traversearray32(vdata->delete_tree, 1, delete_record, &count);
|
||||
if (vdata->delete_count != 0) {
|
||||
printf("Deleted %u records out of %u on this node from '%s'\n", count, vdata->delete_count, name);
|
||||
}
|
||||
if (count != 0) {
|
||||
printf("Deleted %u records on this node from '%s'\n", count, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* this ensures we run our event queue */
|
||||
ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
|
||||
|
Loading…
x
Reference in New Issue
Block a user