mirror of
https://github.com/samba-team/samba.git
synced 2025-01-10 01:18:15 +03:00
ctdb-vacuum: Fix the incorrect counting of remote errors
If a node fails to delete a record in TRY_DELETE_RECORDS control during vacuuming, then it's possible that other nodes also may fail to delete a record. So instead of deleting the record from RB tree on first failure, keep track of the remote failures. Update delete_list.remote_error and delete_list.left statistics only once per record during the delete_record_traverse. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Martin Schwenke <martin@meltin.net>
This commit is contained in:
parent
202b9027ba
commit
ef05239717
@ -107,6 +107,7 @@ struct delete_record_data {
|
||||
struct ctdb_context *ctdb;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
struct ctdb_ltdb_header hdr;
|
||||
uint32_t remote_fail_count;
|
||||
TDB_DATA key;
|
||||
uint8_t keydata[1];
|
||||
};
|
||||
@ -149,6 +150,7 @@ static int insert_delete_record_data_into_tree(struct ctdb_context *ctdb,
|
||||
memcpy(dd->keydata, key.dptr, key.dsize);
|
||||
|
||||
dd->hdr = *hdr;
|
||||
dd->remote_fail_count = 0;
|
||||
|
||||
hash = ctdb_hash(&key);
|
||||
|
||||
@ -451,6 +453,13 @@ static int delete_record_traverse(void *param, void *data)
|
||||
uint32_t lmaster;
|
||||
uint32_t hash = ctdb_hash(&(dd->key));
|
||||
|
||||
if (dd->remote_fail_count > 0) {
|
||||
vdata->count.delete_list.remote_error++;
|
||||
vdata->count.delete_list.left--;
|
||||
talloc_free(dd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key);
|
||||
if (res != 0) {
|
||||
DEBUG(DEBUG_ERR,
|
||||
@ -828,22 +837,17 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db,
|
||||
ctdb_hash(&reckey));
|
||||
if (dd != NULL) {
|
||||
/*
|
||||
* The other node could not delete the
|
||||
* record and it is the first node that
|
||||
* failed. So we should remove it from
|
||||
* the tree and update statistics.
|
||||
* The remote node could not delete the
|
||||
* record. Since other remote nodes can
|
||||
* also fail, we just mark the record.
|
||||
*/
|
||||
talloc_free(dd);
|
||||
vdata->count.delete_list.remote_error++;
|
||||
vdata->count.delete_list.left--;
|
||||
dd->remote_fail_count++;
|
||||
} else {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Failed to "
|
||||
"find record with hash 0x%08x coming "
|
||||
"back from TRY_DELETE_RECORDS "
|
||||
"control in delete list.\n",
|
||||
ctdb_hash(&reckey)));
|
||||
vdata->count.delete_list.local_error++;
|
||||
vdata->count.delete_list.left--;
|
||||
}
|
||||
|
||||
rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec);
|
||||
|
Loading…
Reference in New Issue
Block a user