1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-02 09:47:23 +03:00

s3:g_lock: avoid a lot of unused overhead using the new dbwrap_watch features

The key points are:

1. We keep our position in the watcher queue until we got what
   we were waiting for. It means the order is now fair and stable.

2. We only wake up other during g_lock_unlock() and only if
   we detect that an pending exclusive lock is able to make progress.
   (Note: read lock holders are never waiters on their own)

This reduced the contention on locking.tdb records drastically,
as waiters are no longer woken 3 times (where the first 2 times were completely useless).

The following test with 256 commections all looping with open/close
on the same inode (share root) is improved drastically:

  smbtorture //127.0.0.1/m -Uroot%test smb2.create.bench-path-contention-shared \
     --option='torture:bench_path=' \
     --option="torture:timelimit=60" \
     --option="torture:nprocs=256"

From some like this:

   open[num/s=50,avslat=6.455775,minlat=0.000157,maxlat=55.683846]
   close[num/s=50,avslat=4.563605,minlat=0.000128,maxlat=53.585839]

to:

   open[num/s=80,avslat=2.793862,minlat=0.004097,maxlat=46.597053]
   close[num/s=80,avslat=2.387326,minlat=0.023875,maxlat=50.878165]

Note the real effect of this commit will releaved together
with a following commit that only wakes one waiter at a time.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125

Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
This commit is contained in:
Stefan Metzmacher 2022-06-27 13:40:55 +00:00 committed by Ralph Boehme
parent 5272051699
commit 2e92267920

View File

@ -338,6 +338,7 @@ struct g_lock_lock_fn_state {
struct server_id *dead_blocker;
struct tevent_req *watch_req;
uint64_t watch_instance;
NTSTATUS status;
};
@ -361,6 +362,7 @@ static NTSTATUS g_lock_trylock(
ok = g_lock_parse(data.dptr, data.dsize, &lck);
if (!ok) {
dbwrap_watched_watch_remove_instance(rec, state->watch_instance);
DBG_DEBUG("g_lock_parse failed\n");
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
@ -383,6 +385,10 @@ static NTSTATUS g_lock_trylock(
if (type == G_LOCK_DOWNGRADE) {
struct server_id_buf tmp2;
dbwrap_watched_watch_remove_instance(rec,
state->watch_instance);
DBG_DEBUG("%s: Trying to downgrade %s\n",
server_id_str_buf(self, &tmp),
server_id_str_buf(
@ -392,6 +398,10 @@ static NTSTATUS g_lock_trylock(
if (type == G_LOCK_UPGRADE) {
ssize_t shared_idx;
dbwrap_watched_watch_remove_instance(rec,
state->watch_instance);
shared_idx = g_lock_find_shared(&lck, &self);
if (shared_idx == -1) {
@ -423,6 +433,18 @@ static NTSTATUS g_lock_trylock(
DBG_DEBUG("Waiting for lck.exclusive=%s\n",
server_id_str_buf(lck.exclusive, &tmp));
/*
* We will return NT_STATUS_LOCK_NOT_GRANTED
* and need to monitor the record.
*
* If we don't have a watcher instance yet,
* we should add one.
*/
if (state->watch_instance == 0) {
state->watch_instance =
dbwrap_watched_watch_add_instance(rec);
}
*blocker = lck.exclusive;
return NT_STATUS_LOCK_NOT_GRANTED;
}
@ -436,6 +458,9 @@ static NTSTATUS g_lock_trylock(
}
if (!retry) {
dbwrap_watched_watch_remove_instance(rec,
state->watch_instance);
DBG_DEBUG("%s already locked by self\n",
server_id_str_buf(self, &tmp));
return NT_STATUS_WAS_LOCKED;
@ -449,9 +474,27 @@ static NTSTATUS g_lock_trylock(
DBG_DEBUG("Continue waiting for shared lock %s\n",
server_id_str_buf(*blocker, &tmp));
/*
* We will return NT_STATUS_LOCK_NOT_GRANTED
* and need to monitor the record.
*
* If we don't have a watcher instance yet,
* we should add one.
*/
if (state->watch_instance == 0) {
state->watch_instance =
dbwrap_watched_watch_add_instance(rec);
}
return NT_STATUS_LOCK_NOT_GRANTED;
}
/*
* All pending readers are gone and we no longer need
* to monitor the record.
*/
dbwrap_watched_watch_remove_instance(rec, state->watch_instance);
if (orig_num_shared != lck.num_shared) {
status = g_lock_store(rec, &lck, NULL, NULL, 0);
if (!NT_STATUS_IS_OK(status)) {
@ -475,6 +518,9 @@ noexclusive:
ssize_t shared_idx = g_lock_find_shared(&lck, &self);
if (shared_idx == -1) {
dbwrap_watched_watch_remove_instance(rec,
state->watch_instance);
DBG_DEBUG("Trying to upgrade %s without "
"existing shared lock\n",
server_id_str_buf(self, &tmp));
@ -489,6 +535,8 @@ noexclusive:
ssize_t shared_idx = g_lock_find_shared(&lck, &self);
if (shared_idx != -1) {
dbwrap_watched_watch_remove_instance(rec,
state->watch_instance);
DBG_DEBUG("Trying to writelock existing shared %s\n",
server_id_str_buf(self, &tmp));
return NT_STATUS_WAS_LOCKED;
@ -498,6 +546,30 @@ noexclusive:
g_lock_cleanup_shared(&lck);
if (lck.num_shared == 0) {
/*
* If we store ourself as exclusive writter,
* without any pending readers, we don't
* need to monitor the record anymore...
*/
dbwrap_watched_watch_remove_instance(rec, state->watch_instance);
} else if (state->watch_instance == 0) {
/*
* Here we have lck.num_shared != 0.
*
* We will return NT_STATUS_LOCK_NOT_GRANTED
* below.
*
* And don't have a watcher instance yet!
*
* We add it here before g_lock_store()
* in order to trigger just one
* low level dbwrap_do_locked() call.
*/
state->watch_instance =
dbwrap_watched_watch_add_instance(rec);
}
status = g_lock_store(rec, &lck, NULL, NULL, 0);
if (!NT_STATUS_IS_OK(status)) {
DBG_DEBUG("g_lock_store() failed: %s\n",
@ -526,6 +598,15 @@ noexclusive:
do_shared:
/*
* We are going to store us as a reader,
* so we got what we were waiting for.
*
* So we no longer need to monitor the
* record.
*/
dbwrap_watched_watch_remove_instance(rec, state->watch_instance);
if (lck.num_shared == 0) {
status = g_lock_store(rec, &lck, &self, NULL, 0);
if (!NT_STATUS_IS_OK(status)) {
@ -556,6 +637,15 @@ static void g_lock_lock_fn(
struct g_lock_lock_fn_state *state = private_data;
struct server_id blocker = {0};
/*
* We're trying to get a lock and if we are
* successful in doing that, we should not
* wakeup any other waiters, all they would
* find is that we're holding a lock they
* are conflicting with.
*/
dbwrap_watched_watch_skip_alerting(rec);
state->status = g_lock_trylock(rec, state, value, &blocker);
if (!NT_STATUS_IS_OK(state->status)) {
DBG_DEBUG("g_lock_trylock returned %s\n",
@ -566,7 +656,7 @@ static void g_lock_lock_fn(
}
state->watch_req = dbwrap_watched_watch_send(
state->req_state, state->req_state->ev, rec, 0, blocker);
state->req_state, state->req_state->ev, rec, state->watch_instance, blocker);
if (state->watch_req == NULL) {
state->status = NT_STATUS_NO_MEMORY;
}
@ -651,8 +741,9 @@ static void g_lock_lock_retry(struct tevent_req *subreq)
struct server_id blocker = { .pid = 0 };
bool blockerdead = false;
NTSTATUS status;
uint64_t instance = 0;
status = dbwrap_watched_watch_recv(subreq, NULL, &blockerdead, &blocker);
status = dbwrap_watched_watch_recv(subreq, &instance, &blockerdead, &blocker);
DBG_DEBUG("watch_recv returned %s\n", nt_errstr(status));
TALLOC_FREE(subreq);
@ -667,6 +758,7 @@ static void g_lock_lock_retry(struct tevent_req *subreq)
fn_state = (struct g_lock_lock_fn_state) {
.req_state = state,
.dead_blocker = blockerdead ? &blocker : NULL,
.watch_instance = instance,
};
status = dbwrap_do_locked(state->ctx->db, state->key,
@ -735,6 +827,15 @@ static void g_lock_lock_simple_fn(
struct g_lock lck = { .exclusive.pid = 0 };
bool ok;
/*
* We're trying to get a lock and if we are
* successful in doing that, we should not
* wakeup any other waiters, all they would
* find is that we're holding a lock they
* are conflicting with.
*/
dbwrap_watched_watch_skip_alerting(rec);
ok = g_lock_parse(value.dptr, value.dsize, &lck);
if (!ok) {
DBG_DEBUG("g_lock_parse failed\n");
@ -908,6 +1009,20 @@ static void g_lock_unlock_fn(
return;
}
if (!exclusive && lck.exclusive.pid != 0) {
/*
* We only had a read lock and there's
* someone waiting for an exclusive lock.
*
* Don't alert the exclusive lock waiter
* if there are still other read lock holders.
*/
g_lock_cleanup_shared(&lck);
if (lck.num_shared != 0) {
dbwrap_watched_watch_skip_alerting(rec);
}
}
state->status = g_lock_store(rec, &lck, NULL, NULL, 0);
}
@ -956,6 +1071,17 @@ static void g_lock_writev_data_fn(
bool exclusive;
bool ok;
/*
* We're holding an exclusiv write lock.
*
* Now we're updating the content of the record.
*
* We should not wakeup any other waiters, all they
* would find is that we're still holding a lock they
* are conflicting with.
*/
dbwrap_watched_watch_skip_alerting(rec);
ok = g_lock_parse(value.dptr, value.dsize, &lck);
if (!ok) {
DBG_DEBUG("g_lock_parse for %s failed\n",